In [33]:
from dotenv import load_dotenv
load_dotenv()
import os
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec



In [28]:
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
pc.create_index(
    name="bag", dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1")
)

In [31]:
import json
data = json.load(open("reviews.json"))
data["reviews"]

[{'professor': 'Dr. John Smith',
  'subject': 'Introduction to Psychology',
  'stars': 5,
  'review': 'Dr. Smith is an amazing professor! His lectures are engaging and he truly cares about his students.'},
 {'professor': 'Dr. Emily Johnson',
  'subject': 'Calculus I',
  'stars': 3,
  'review': 'Dr. Johnson knows her material, but her lectures can be a bit hard to follow at times.'},
 {'professor': 'Dr. Michael Brown',
  'subject': 'Organic Chemistry',
  'stars': 4,
  'review': 'Challenging course, but Dr. Brown is very helpful and provides great resources.'},
 {'professor': 'Dr. Linda Williams',
  'subject': 'World History',
  'stars': 2,
  'review': 'Dr. Williams is knowledgeable, but her lectures are very monotonous and hard to stay awake through.'},
 {'professor': 'Dr. James Jones',
  'subject': 'Introduction to Programming',
  'stars': 5,
  'review': 'Fantastic professor! Dr. Jones makes coding fun and understandable.'},
 {'professor': 'Dr. Patricia Garcia',
  'subject': 'Sociology

In [35]:
processed_data = []
client = OpenAI()

for review in data['reviews']:
    response = client.embeddings.create(
        input=review['review'],
        model="text-embedding-3-small",
    )
    embedding = response.data[0].embedding
    processed_data.append({
        "values": embedding,
        "id": review["professor"],
        "metadata": {
            "review": review["review"],
            "subject": review["subject"],
            "stars": review["stars"],
        }
    })




In [36]:
processed_data[0]

{'values': [-0.0014055263,
  0.0049618282,
  -0.042124,
  0.039761487,
  0.027703773,
  -0.020103622,
  0.049969316,
  0.010380557,
  0.017651962,
  -0.039315734,
  0.017596241,
  -0.021117719,
  -0.025252111,
  0.032852262,
  0.03675263,
  -0.0015671131,
  -0.0046637286,
  -0.01701676,
  0.0111996345,
  0.019111814,
  0.024984658,
  -0.01853233,
  0.022934178,
  -0.010809598,
  -0.03759957,
  -0.034657575,
  0.012046572,
  0.0472502,
  0.034991894,
  0.024003994,
  0.059642233,
  -0.0063520316,
  -0.014565097,
  -0.015467754,
  -0.033119716,
  0.012392034,
  -0.032116763,
  0.014955134,
  -0.009578195,
  -0.0002575289,
  0.033721488,
  0.0041455366,
  -0.011138343,
  0.010514284,
  0.048944075,
  -0.03820134,
  0.014319931,
  -0.024605764,
  0.043438982,
  0.042034846,
  -0.043728724,
  0.018309452,
  0.045333445,
  -0.010937752,
  -0.040519275,
  0.027369455,
  0.02089484,
  0.055942453,
  0.0063464595,
  -0.03820134,
  0.041678242,
  0.03490274,
  -0.038357355,
  -0.020794546,
  -0.

In [37]:
index = pc.Index("bag")
index.upsert(
    vectors=processed_data,
    namespace="ns1"
)

{'upserted_count': 20}

In [38]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'ns1': {'vector_count': 20}},
 'total_vector_count': 20}