In [None]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec
from sentence_transformers import SentenceTransformer
import torch

In [2]:
pc = Pinecone(api_key= "PINECONE_API_KEY")
pc.create_index(
    name='rag', dimension=384, metric='cosine', spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [3]:
import json
data = json.load(open("reviews.json"))
data["reviews"]

[{'professor': 'Dr. Emily Thompson',
  'subject': 'Biology',
  'stars': 4,
  'review': "Dr. Thompson's lectures are engaging and she explains complex concepts clearly. Very approachable during office hours."},
 {'professor': 'Prof. Michael Chen',
  'subject': 'Computer Science',
  'stars': 5,
  'review': "Brilliant instructor! Prof. Chen's passion for coding is contagious. His projects are challenging but incredibly rewarding."},
 {'professor': 'Dr. Sarah Rodriguez',
  'subject': 'Psychology',
  'stars': 3,
  'review': 'Dr. Rodriguez knows her stuff, but her lectures can be a bit dry. Exams are fair though.'},
 {'professor': 'Prof. Jonathan Lee',
  'subject': 'Mathematics',
  'stars': 4,
  'review': 'Prof. Lee breaks down complex math concepts into digestible pieces. His problem sets are tough but helpful.'},
 {'professor': 'Dr. Olivia Patel',
  'subject': 'Chemistry',
  'stars': 5,
  'review': "Dr. Patel's lab sessions are fantastic! She makes organic chemistry fun and interesting."},

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2').to(device)



In [5]:
sentences = [x["review"] for x in data["reviews"]]
embeddings =  model.encode(sentences)

In [7]:
vectors = []
for d, e in zip(data["reviews"], embeddings):
    vectors.append({
        "values": e,
        "id": d['professor'],
        "metadata": {
            "review": d["review"],
            "subject": d["subject"],
            "stars": d["stars"]
        }
    })

In [8]:
vectors[0]

{'values': array([ 5.07038161e-02,  1.67763047e-02, -1.64733045e-02,  5.14017642e-02,
        -6.98957294e-02,  9.47968196e-03, -4.21546474e-02,  1.52850279e-03,
        -8.35306048e-02,  7.50478916e-03, -6.45772293e-02,  7.37450868e-02,
        -1.15991868e-01,  9.39667076e-02, -5.15341833e-02,  3.15593928e-02,
         3.39771360e-02,  3.07512525e-02,  1.89694650e-02, -2.32788809e-02,
        -4.13217535e-03,  1.06509775e-02, -7.57800490e-02,  1.24105923e-02,
        -2.93630660e-02, -1.00029176e-02,  4.24965024e-02, -5.25870323e-02,
         8.77728462e-02, -7.27882609e-02, -8.91336799e-02, -3.49559374e-02,
         2.44281301e-03,  2.65361611e-02, -1.10609248e-01,  3.21352147e-02,
         5.63334078e-02,  8.56932178e-02,  3.45011726e-02,  9.88056767e-04,
        -6.88491529e-03, -1.05392095e-02,  4.17410396e-02,  1.02026891e-02,
         3.71336862e-02, -6.55791685e-02, -8.82178098e-02, -3.99496115e-04,
        -3.36891934e-02,  7.16498680e-03, -1.19049743e-01, -6.65936098e-02,
  

In [12]:
index = pc.Index('rag')
index.upsert(
    vectors=vectors,
    namespace="ns1"
)

{'upserted_count': 20}