In [11]:
from dotenv import load_dotenv
load_dotenv(dotenv_path='.env.local')
import os
from openai import OpenAI

from pinecone import Pinecone, ServerlessSpec

In [26]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="rag", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)



In [27]:
import json
data = json.load(open("reviews.json"))
data['reviews']

[{'professor': 'Dr. John Smith',
  'subject': 'Computer Science',
  'stars': 5,
  'review': 'Dr. Smith is incredibly knowledgeable and always willing to help. His lectures are clear and concise.'},
 {'professor': 'Dr. Emily Brown',
  'subject': 'Mathematics',
  'stars': 4,
  'review': 'Dr. Brown explains concepts well, but sometimes the pace is too fast. Overall, a great professor.'},
 {'professor': 'Dr. Michael Johnson',
  'subject': 'Physics',
  'stars': 3,
  'review': "The content is difficult, and Dr. Johnson's explanations can be hard to follow at times."},
 {'professor': 'Dr. Sarah Miller',
  'subject': 'Chemistry',
  'stars': 4,
  'review': 'Very organized and makes the material interesting. Labs are well-structured.'},
 {'professor': 'Dr. James Wilson',
  'subject': 'Biology',
  'stars': 5,
  'review': 'Dr. Wilson is passionate about biology and makes the subject exciting. Highly recommend his classes.'},
 {'professor': 'Dr. Olivia Davis',
  'subject': 'History',
  'stars': 2,


In [28]:
procesed_data =  []
client = OpenAI()

for review in data['reviews']:
        response = client.embeddings.create(
                input=review['review'],
                model="text-embedding-3-small",
        )
        embedding = response.data[0].embedding
        procesed_data.append(
                {
                        "values": embedding,
                        "id": review["professor"],
                        "metadata": {
                                "review": review["review"],
                                "subject": review["subject"],
                                "stars": review["stars"]
                        }
                }
        )

In [29]:
procesed_data[0]

{'values': [-0.020942533,
  0.0063095237,
  -0.04258092,
  0.018493662,
  0.0068113413,
  0.015121446,
  0.010598394,
  0.015027773,
  0.023003332,
  -0.023551986,
  0.01189643,
  -0.012431702,
  -0.013127556,
  -0.0036398524,
  0.04006514,
  0.005426324,
  -0.019524062,
  0.0077213044,
  0.021558097,
  0.052456696,
  0.04295561,
  -0.0135557735,
  0.039529867,
  -0.03679998,
  -0.044534665,
  -0.036050595,
  -0.0028218892,
  0.04488259,
  0.02194617,
  0.012097157,
  0.083663076,
  -0.009996212,
  -0.0045130155,
  0.01455941,
  -0.048174515,
  0.026549513,
  -0.018159118,
  -0.0020072716,
  0.012538756,
  0.007955486,
  0.025893804,
  0.022414532,
  -0.037576124,
  -0.00292058,
  0.047425136,
  -0.047719534,
  -0.016874462,
  -0.018533807,
  0.01839999,
  0.0456855,
  -0.036907032,
  0.026188202,
  0.03356158,
  -0.03867343,
  -0.03214311,
  0.016285663,
  0.029252637,
  0.042848557,
  0.015188355,
  -0.019791698,
  0.07156592,
  0.022789223,
  -0.005292506,
  -0.022789223,
  -0.01793

In [30]:
index = pc.Index('rag')
index.upsert(
    vectors=procesed_data,
    namespace="ns1"
)

{'upserted_count': 20}

In [31]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}