In [5]:
from dotenv import load_dotenv
load_dotenv()
import os
import google.generativeai as genai
from pinecone import Pinecone, ServerlessSpec

In [7]:
pc=Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag", dimension=768, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [8]:
import json
data = json.load(open("reviews.json"))
data['reviews']

[{'professor': 'Dr. John Smith',
  'subject': 'Computer Science',
  'stars': 5,
  'review': 'Dr. Smith is an excellent professor who explains concepts very clearly.'},
 {'professor': 'Dr. Emily Johnson',
  'subject': 'Mathematics',
  'stars': 4,
  'review': 'Dr. Johnson is very knowledgeable but can be a bit fast-paced.'},
 {'professor': 'Dr. Michael Brown',
  'subject': 'Physics',
  'stars': 3,
  'review': 'The lectures are okay, but the exams are quite difficult.'},
 {'professor': 'Dr. Sarah Davis',
  'subject': 'Chemistry',
  'stars': 5,
  'review': 'Amazing professor! Her labs are very well organized.'},
 {'professor': 'Dr. James Wilson',
  'subject': 'History',
  'stars': 2,
  'review': 'Not very engaging, and the material is dry.'},
 {'professor': 'Dr. Linda Garcia',
  'subject': 'Biology',
  'stars': 4,
  'review': 'Dr. Garcia is great, but her grading is tough.'},
 {'professor': 'Dr. Robert Martinez',
  'subject': 'Political Science',
  'stars': 3,
  'review': 'Interesting lect

In [13]:
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

processed_data = []
model = 'models/embedding-001'

for review in data['reviews']:
    # Generate embedding using Gemini
    embedding = genai.embed_content(
        model=model,
        content=review['review'],
        task_type="retrieval_document"
    )
    
    processed_data.append({
        "values": embedding['embedding'],
        "id": review["professor"],
        "metadata": {
            "review": review["review"],
            "subjects": review["subject"],
            "stars": review["stars"]
        }
    })


In [14]:
index = pc.Index("rag")
upsert_response = index.upsert(
    vectors=processed_data,
    namespace="ns1",
)
print(f"Upserted count: {upsert_response['upserted_count']}")

Upserted count: 20
