In [26]:
from dotenv import load_dotenv
load_dotenv()

import os
from pinecone import Pinecone, ServerlessSpec
from openai import OpenAI


In [28]:
try:
    pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
    pc.create_index(
        name="rag", 
        dimension=1536, 
        metric="cosine", 
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )
    print("Index created successfully.")
except Exception as e:
    print(f"Failed to create index: {e}")


Index created successfully.


In [29]:
import json
with open("reviews.json") as f:
    data = json.load(f)
data['reviews']

[{'professor': 'Dr. John Smith',
  'subject': 'Computer Science',
  'stars': 4,
  'review': 'Very knowledgeable but can be tough. Lectures are clear and assignments are challenging.'},
 {'professor': 'Dr. Emily Brown',
  'subject': 'Mathematics',
  'stars': 5,
  'review': 'Explains concepts very well and is always willing to help during office hours.'},
 {'professor': 'Dr. Michael Johnson',
  'subject': 'Physics',
  'stars': 3,
  'review': 'Content is difficult, but the professor tries his best. Could improve on providing more examples.'},
 {'professor': 'Dr. Sarah Lee',
  'subject': 'Chemistry',
  'stars': 5,
  'review': 'Great at making difficult topics understandable. Her labs are well-organized.'},
 {'professor': 'Dr. Robert Davis',
  'subject': 'Biology',
  'stars': 2,
  'review': "Lectures are hard to follow and he doesn't respond to emails promptly."},
 {'professor': 'Dr. Jennifer Wilson',
  'subject': 'History',
  'stars': 4,
  'review': 'Engaging lecturer, but the reading load

In [30]:
process_data = []
client = OpenAI()


for review in data['reviews']:
  response = client.embeddings.create(
    input=review['review'],
    model="text-embedding-3-small",
  )
  embedding = response.data[0].embedding
  process_data.append({
    "values": embedding,
    "id":review["professor"],
    "metadata": {
      "review": review["review"],
      "subject": review["subject"],
      "stars": review["stars"]
    }
  })

In [33]:
index = pc.Index('rag')
index.upsert(
  vectors=process_data,
  namespace="ns1"
)

{'upserted_count': 20}

In [34]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}