In [56]:
from dotenv import load_dotenv 
load_dotenv()
import os
from groq import Groq
from pinecone import Pinecone, ServerlessSpec
from openai import OpenAI

api_key = os.environ.get("PINECONE_API_KEY")


In [50]:
pc = Pinecone(api_key=api_key)
spec = ServerlessSpec(cloud='aws', region='us-east-1')
pc.create_index(
        name='rag-rmp',
        dimension=1536,
        metric="cosine",
        spec=spec
    )

In [None]:
import json
data = json.load(open('reviews.json'))
data['reviews']

In [58]:
processed_data = []
client = OpenAI()
for review in data['reviews']:
    response = client.embeddings.create(
        input = review['review'],
        model = "text-embedding-3-small",
    )
    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding,
        "id": review["professor"],
        "metadata": {
            "review": review["review"],
            "subject": review["subject"],
            "stars": review["stars"]
        }
    })

In [59]:
index = pc.Index('rag-rmp')
index.upsert(
    vectors = processed_data,
    namespace='ns1'
)

{'upserted_count': 5}

In [60]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 5}},
 'total_vector_count': 5}