In [1]:
import pinecone
from pinecone import Pinecone, ServerlessSpec
from sentence_transformers import SentenceTransformer
import pandas as pd
import uuid




In [None]:
pc = Pinecone(api_key="key")

In [3]:
index_name = "learning-buddy-faq"

In [4]:
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )

In [5]:
index = pc.Index(index_name)

In [6]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [7]:
csv_file = "faq_dataset.csv"
df = pd.read_csv(csv_file)

In [9]:
required_columns = ["question", "answer"]
for col in required_columns:
    if col not in df.columns:
        raise ValueError(f"Column '{col}' not found in CSV")

In [10]:
items_to_upsert = []
for _, row in df.iterrows():
    # Generate embedding for the question column
    question = str(row["question"])  # Convert to string to handle non-string data
    embedding = model.encode(question).tolist()  # Convert numpy array to list
    
    # Use existing ID if available, otherwise generate a unique ID
    item_id = str(row.get("id", uuid.uuid4()))
    
    # Create metadata with question and answer
    metadata = {
        "question": question,
        "answer": str(row["answer"])
    }
    
    # Add to upsert list
    items_to_upsert.append((item_id, embedding, metadata))

In [11]:
batch_size = 100
for i in range(0, len(items_to_upsert), batch_size):
    batch = items_to_upsert[i:i + batch_size]
    index.upsert(vectors=batch)
    print(f"Upserted batch {i // batch_size + 1} of {len(items_to_upsert) // batch_size + 1}")

Upserted batch 1 of 1


In [None]:
query_text = "Is there a mobile app available?"
query_embedding = model.encode(query_text).tolist()
results = index.query(vector=query_embedding, top_k=2, include_metadata=True)

In [15]:
print("\nQuery Results:")
for match in results["matches"]:
    print(f"ID: {match['id']}, Score: {match['score']}, Question: {match['metadata']['question']}, Answer: {match['metadata']['answer']}")


Query Results:
ID: 4, Score: 0.96848917, Question: Is there a mobile app available?, Answer: Yes, you can download our mobile app from the App Store or Google Play Store.
ID: 50, Score: 0.407608449, Question: Is there a loyalty program?, Answer: Yes, you earn points for learning activity that can be redeemed later.
