In [5]:
# %pip install python-dotenv
# %pip install openai
# %pip install "pinecone-client[grpc]"
# %pip install -q -U google-generativeai

In [24]:
from dotenv import load_dotenv
load_dotenv()
import os
from pinecone import Pinecone, ServerlessSpec

In [25]:
pc = Pinecone(api_key=os.getenv("PINECONE_KEY"))

In [26]:
pc.create_index(
    name="rag-alg",
    dimension=768,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)

In [27]:
import json

data = json.load(open("linear_equations_dataset.json"))
# display(data["reviews"])

In [28]:
import google.generativeai as genai

genai.configure(api_key=os.environ["GEMINI_API_KEY"])

In [29]:
process_data = []

# # Creating embeddings
# # embedding capture the semantic representation of the text in numerical form
# # man and uncle would be more semantically related than man and woman

for question in data["questions"]:
    response = genai.embed_content(
        model="models/text-embedding-004",
        content=question["question"]
    )

    embedding = response["embedding"]

    process_data.append({
        "values": embedding,
        "id": question["id"],
        "metadata": {
            "question": question["question"],
            "solution": question["solution"],
            "topic": question["topic"],
            "difficulty": question["difficulty"],
            "steps": question["tags"]
        } 
    })


In [30]:
index = pc.Index("rag-alg")
index.upsert(
    vectors=process_data,
    namespace="ns1"
)

{'upserted_count': 50}

In [31]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 50}},
 'total_vector_count': 50}