In [2]:
from dotenv import load_dotenv
load_dotenv()
from pinecone import Pinecone, ServerlessSpec
from openai import OpenAI
import os
import json

# Initialize Pinecone
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

# Create a Pinecone index
pc.create_index(
    name="rag",
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

# Load the review data
data = json.load(open("reviews.json"))

processed_data = []
client = OpenAI()

# Create embeddings for each review
for review in data["reviews"]:
    response = client.embeddings.create(
        input=review['review'], model="text-embedding-3-small"
    )
    embedding = response.data[0].embedding
    processed_data.append(
        {
            "values": embedding,
            "id": review["professor"],
            "metadata":{
                "review": review["review"],
                "subject": review["subject"],
                "stars": review["stars"],
            }
        }
    )

# Insert the embeddings into the Pinecone index
print(processed_data)
index = pc.Index("rag")
upsert_response = index.upsert(
    vectors=processed_data,
    namespace="ns1",
)
print(f"Upserted count: {upsert_response['upserted_count']}")

# Print index statistics
print(index.describe_index_stats())


[{'values': [-0.016153483, -0.050456103, -0.03652419, 0.023470873, -0.0346415, 0.0030687852, -0.04470762, 0.034239862, -0.049401794, 0.005287222, 0.011578546, 0.012689333, -0.035068244, -0.019918865, 0.015952663, 0.010223009, 0.0055162827, 0.028139945, 0.017760046, 0.056681532, -0.0010260663, -0.009758611, 0.01276464, 0.007932402, -0.01930385, -0.03810565, 0.022065131, 0.008478383, 0.015952663, 0.011898603, 0.08389268, -0.005547661, -0.02422395, -0.014471613, -0.056229685, 0.020408364, 0.0031817467, 0.030097943, 0.014885805, 0.030248558, 0.02265504, 0.027085638, 1.9108817e-05, 0.031629197, 0.043000646, -0.023571283, -0.026784409, -0.017019521, 0.03552009, 0.032281864, -0.010392451, -0.0037277269, 0.057635427, 0.015111728, -0.037829526, 0.038231164, 0.022981374, 0.016304098, 0.004173297, -0.02620705, 0.029495481, 0.01078154, 0.035821322, -0.005779859, -0.011904879, -0.07184346, -0.01888966, 0.005921061, -0.05577784, -0.039436087, 0.033587195, 0.0008252459, -0.0053750807, -0.014772844, -