In [2]:
from dotenv import load_dotenv
load_dotenv()

import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec


In [5]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
  name="rag2", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [6]:
import json
data = json.load(open("reviews.json"))

data["reviews"]

[{'id': '1',
  'professor': 'Dr. John Smith',
  'subject': 'Computer Science',
  'stars': 5,
  'comment': 'Dr. Smith is an outstanding professor who makes complex topics easy to understand. Highly recommended!'},
 {'id': '2',
  'professor': 'Dr. Emily Johnson',
  'subject': 'Mathematics',
  'stars': 4,
  'comment': 'Dr. Johnson explains concepts clearly and provides great support, but her exams are quite challenging.'},
 {'id': '3',
  'professor': 'Dr. Alan Brown',
  'subject': 'History',
  'stars': 3,
  'comment': 'Dr. Brown is knowledgeable, but his lectures can be a bit dry. The course material is interesting, though.'},
 {'id': '4',
  'professor': 'Dr. Laura Williams',
  'subject': 'Physics',
  'stars': 5,
  'comment': 'Dr. Williams is an excellent instructor. Her passion for physics is contagious, and she makes learning fun!'},
 {'id': '5',
  'professor': 'Dr. Michael Davis',
  'subject': 'Chemistry',
  'stars': 4,
  'comment': 'Dr. Davis is very thorough and ensures everyone unde

In [8]:
processed_data = []
client = OpenAI()

for review in data['reviews']:
  response = client.embeddings.create(
    input=review['comment'],
    model="text-embedding-3-small"
  )
  embedding = response.data[0].embedding
  processed_data.append({
    "values": embedding,
    "id": review["professor"],
    "metadata": {
      "comment": review["comment"],
      "subject": review["subject"],
      "stars": review["stars"]
    }
  })


In [9]:
processed_data[0]

{'values': [-0.018445794,
  -0.011268322,
  -0.07038235,
  0.015981426,
  0.025678715,
  -0.0011536323,
  -0.0009048851,
  0.0261223,
  0.016893243,
  -0.05081527,
  0.027625564,
  -0.00649361,
  -0.021538576,
  0.01084938,
  0.030656738,
  0.015845886,
  -0.035067957,
  -0.00036888508,
  0.02390437,
  0.065010026,
  0.024126163,
  -0.02243807,
  0.03188892,
  -0.010929472,
  -0.020934805,
  -0.03233251,
  0.019801198,
  0.04453113,
  0.015722668,
  0.0017928277,
  0.058504097,
  0.000964184,
  -0.005175173,
  -0.01700414,
  -0.041154947,
  0.016560553,
  -0.025875865,
  0.021945197,
  -0.0076765064,
  0.003496322,
  0.02725591,
  0.011551725,
  -0.01982584,
  -0.0076641846,
  0.032505013,
  0.008064644,
  -0.0116133345,
  -0.019591726,
  0.018088462,
  0.039824188,
  -0.06801656,
  -0.005674207,
  0.034772232,
  -0.02376883,
  -0.06348212,
  0.062102072,
  0.019185105,
  0.04058814,
  0.010079265,
  -0.02705876,
  0.05268819,
  0.032160003,
  -0.024397243,
  -0.0064997706,
  -0.022967

In [10]:
index = pc.Index("rag2")
index.upsert(
  vectors=processed_data,
  namespace="ns2"
)

{'upserted_count': 20}

In [11]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns2': {'vector_count': 20}},
 'total_vector_count': 20}