In [7]:
from dotenv import load_dotenv 
load_dotenv()
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec
from langchain_huggingface import HuggingFaceEmbeddings

In [None]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag", dimension=384, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [None]:
import json
data = json.load(open("reviews.json"))
data['reviews']

In [9]:
processed_data = []
client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=os.getenv("OPENROUTER_API_KEY"))
hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

for review in data['reviews']:
    response = hf_embeddings.embed_query(review['review'])
    
    processed_data.append({
        "values": response,
        "id": review["professor"],
        "metadata": {
            "review": review["review"],
            "subject": review["subject"],
            "stars": review["stars"]
        }
    })




In [18]:
index = pc.Index('rag')
index.upsert(
    vectors=processed_data,
    namespace="professor_rating"
)

{'upserted_count': 10}

In [19]:
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {'professor_rating': {'vector_count': 10}},
 'total_vector_count': 10}