In [None]:

from dotenv import load_dotenv
load_dotenv()
from openai import OpenAI

import os
import pinecone
from pinecone import Pinecone, ServerlessSpec

In [None]:
pc = Pinecone(api_key=os.getenv('PINECONE_API_KEY'))
pc.create_index(
    name="rag", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [5]:
import json

data = json.load(open("reviews.json"))
data['reviews']

[{'professor': 'Dr. Alice Smith',
  'subject': 'Computer Science',
  'rating': 4,
  'review': 'Dr. Smith is very knowledgeable and engaging. The course material was challenging but rewarding.'},
 {'professor': 'Prof. John Doe',
  'subject': 'Mathematics',
  'rating': 3,
  'review': "Prof. Doe's lectures were clear, but the grading seemed harsh. Some additional help sessions would be appreciated."},
 {'professor': 'Dr. Emily Johnson',
  'subject': 'Physics',
  'rating': 5,
  'review': 'Dr. Johnson is fantastic! Her passion for the subject is infectious, and she makes complex concepts easy to understand.'},
 {'professor': 'Dr. Michael Brown',
  'subject': 'Chemistry',
  'rating': 2,
  'review': "Dr. Brown's classes were often disorganized, and the lab sessions felt rushed. Needs to improve course structure."},
 {'professor': 'Prof. Sarah White',
  'subject': 'Biology',
  'rating': 4,
  'review': 'Prof. White is approachable and provides excellent feedback. The course was well-organized a

In [7]:
processed_data = []
client = OpenAI()

for review in data['reviews']:
    response = client.embeddings.create(
        input=review['review'],
        model='text-embedding-3-small'
    )

    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding,
        'id': review['professor'],
        'metadata': {
            'review': review['review'],
            'subject': review['subject'],
            'rating': review['rating']
        }
    })

In [9]:
# adding the processed data to data base

index = pc.Index('rag')
index.upsert(
    vectors=processed_data,
    namespace='ms1'
)

{'upserted_count': 22}

In [10]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ms1': {'vector_count': 22}},
 'total_vector_count': 22}