In [21]:
from dotenv import load_dotenv
import os
load_dotenv()
import google.generativeai as genai
from pinecone import Pinecone, ServerlessSpec



In [32]:

pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag", dimension=768, metric="cosine", spec = ServerlessSpec(cloud="aws", region="us-east-1")
)

In [9]:
import json
data = json.load(open("reviews.json"))
data['reviews']

[{'professor': 'Dr. Sarah Johnson',
  'subject': 'Physics',
  'stars': 4,
  'review': 'Dr. Johnson explains complex concepts clearly. Her enthusiasm for physics is contagious!'},
 {'professor': 'Prof. Michael Chen',
  'subject': 'Computer Science',
  'stars': 5,
  'review': "Brilliant instructor. Prof. Chen's practical approach to coding is incredibly helpful."},
 {'professor': 'Dr. Emily Rodriguez',
  'subject': 'Biology',
  'stars': 3,
  'review': 'Knowledgeable, but lectures can be dry. More interactive sessions would be beneficial.'},
 {'professor': 'Prof. David Lee',
  'subject': 'Mathematics',
  'stars': 4,
  'review': 'Challenging course, but Prof. Lee is always available for extra help. Great problem-solving techniques.'},
 {'professor': 'Dr. Lisa Thompson',
  'subject': 'Psychology',
  'stars': 5,
  'review': "Dr. Thompson's real-world examples make the material relatable and easy to understand."},
 {'professor': 'Prof. Robert Brown',
  'subject': 'History',
  'stars': 2,
  'r

In [24]:
processed_data = []
genai.configure(api_key=os.getenv("API_KEY")) 


for review in data["reviews"]:
    response = genai.embed_content(
        model="models/text-embedding-004",
        content=review['review'],
    )
    embedding = response["embedding"]
    processed_data.append({
        "values": embedding,
        "id": review["professor"],
        "metadata": {
            "review": review["review"],
            "subject": review["subject"],
            "stars": review["stars"]
        }
    })

In [25]:
processed_data[0]

{'values': [0.056270275,
  0.009288698,
  -0.010714889,
  0.0029713651,
  0.010109,
  -0.003701102,
  -0.0032520199,
  0.029172443,
  -0.019072693,
  0.003472986,
  0.02285943,
  0.028746381,
  0.040739484,
  0.015341386,
  0.0017053791,
  -0.07763738,
  -0.012430096,
  0.01891133,
  -0.119109325,
  0.00898605,
  0.033717245,
  -0.028497191,
  0.024595007,
  -0.001035836,
  0.02145677,
  0.010084572,
  0.030138465,
  -0.041154232,
  0.051528648,
  -0.042238116,
  0.031994414,
  0.019035507,
  -0.017712196,
  -0.043777153,
  -0.05076878,
  0.04051142,
  0.013584781,
  -0.031592514,
  0.07311858,
  -0.029177997,
  -0.050844174,
  -0.039260928,
  0.021709055,
  -0.010769865,
  -0.021273881,
  0.004789993,
  -0.00922902,
  0.0823439,
  0.0027394076,
  0.078692846,
  0.07342942,
  0.040275786,
  -0.04969214,
  0.048056554,
  -0.023483727,
  -0.02966879,
  -0.034145065,
  -0.016793864,
  0.03095328,
  -0.023448827,
  -0.023296159,
  0.012195336,
  -0.020547712,
  -0.059663754,
  0.06427075,


In [33]:
index = pc.Index('rag')
index.upsert(
    vectors=processed_data,
    namespace="ns1"
)

{'upserted_count': 20}

In [35]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}