In [3]:
from dotenv import load_dotenv
load_dotenv()
import os
import google.generativeai as genai
from pinecone import Pinecone, ServerlessSpec

In [9]:
pc = Pinecone(api_key = os.getenv("PINECONE_PRIVATE_KEY"))
pc.create_index(name = "rag", dimension = 768, metric = "cosine", spec = ServerlessSpec(cloud="aws", region="us-east-1"))

In [11]:
import json
data = json.load(open("reviews.json"))
data['reviews']

[{'professor': 'Dr. Sarah Chen',
  'subject': 'Organic Chemistry',
  'stars': 5,
  'review': 'Dr. Chen is incredibly passionate about organic chemistry and makes complex concepts easy to understand. Her office hours are super helpful and she genuinely cares about student success.'},
 {'professor': 'Dr. Michael Rodriguez',
  'subject': 'World History',
  'stars': 2,
  'review': "Lectures are extremely disorganized and the grading seems arbitrary. Often goes off on tangents and doesn't cover the material in the syllabus."},
 {'professor': 'Dr. Emily Parker',
  'subject': 'Psychology',
  'stars': 4,
  'review': 'Great lecturer who incorporates real-world examples. Only downside is that the workload can be heavy at times.'},
 {'professor': 'Dr. James Wilson',
  'subject': 'Calculus II',
  'stars': 3,
  'review': "Decent professor but moves through material very quickly. You'll need to do a lot of self-study to keep up with the pace."},
 {'professor': 'Dr. Lisa Thompson',
  'subject': 'Engl

In [12]:
processed_data = []
genai.configure(api_key=os.getenv("GEMINI_PRIVATE_KEY"))
for review in data['reviews']:
    result = genai.embed_content(
        model = "models/text-embedding-004",
        content= review['review']
    )
    processed_data.append({
        "values": result['embedding'],
        "id": review["professor"],
        "metadata": {
            "review": review["review"],
            "subject": review["subject"],
            "stars": review["stars"]
        }
    })


In [13]:
processed_data[0]

{'values': [0.05088044,
  0.032853022,
  -0.016277155,
  -0.008316894,
  0.016696172,
  -0.0035404807,
  -0.042140666,
  0.022877635,
  -0.023695014,
  0.0007694691,
  0.06368652,
  0.058853596,
  0.03417103,
  -0.012147661,
  0.0010728538,
  -0.040871944,
  0.012677943,
  0.0313555,
  -0.08878948,
  0.0013894214,
  -0.0026911958,
  -0.032893192,
  0.024551602,
  -0.05858321,
  0.0025707674,
  0.051163904,
  -0.012200803,
  -0.035860952,
  0.030119477,
  -0.06586982,
  0.01933641,
  0.018213585,
  -0.083262384,
  -0.030454338,
  -0.013339498,
  0.046274032,
  0.0273361,
  -0.013845458,
  0.049035866,
  -0.048287537,
  0.0041243723,
  -0.009034701,
  -0.008903714,
  -0.05291094,
  -0.024519002,
  -0.02182696,
  -0.015059295,
  0.12055108,
  -0.030451791,
  0.09115734,
  0.046548657,
  0.018269362,
  -0.019594822,
  0.0247185,
  0.0046560145,
  0.00010479652,
  -0.028197793,
  -0.05037278,
  0.04253725,
  -0.03399528,
  -0.023198694,
  0.0178449,
  -0.022798326,
  -0.09198736,
  0.067491

In [14]:
index = pc.Index('rag')
index.upsert(
    vectors=processed_data,
    namespace="ns1"
)

{'upserted_count': 20}

In [15]:
index.describe_index_stats()

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}