In [3]:
from dotenv import load_dotenv
load_dotenv("../../.env")
import os

from pinecone import Pinecone, ServerlessSpec

pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

In [5]:
pc.create_index(
    name="prof-review-data",
    dimension=1024,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [7]:
import json

data = json.load(open("mock_data.json"))
print(data)

[{'professor': 'Dr. Emily Thompson', 'stars': 4, 'review': 'Dr. Thompson is very knowledgeable and passionate about the subject. Her lectures are engaging, but the workload can be heavy at times.'}, {'professor': 'Prof. Michael Chen', 'stars': 5, 'review': "Absolutely brilliant professor! Prof. Chen explains complex concepts in a way that's easy to understand. His office hours are incredibly helpful."}, {'professor': 'Dr. Sarah Patel', 'stars': 3, 'review': "Dr. Patel's course material is interesting, but her teaching style can be a bit dry. Exams are fair, though."}, {'professor': 'Prof. James Wilson', 'stars': 2, 'review': 'Prof. Wilson seems disorganized and often unprepared for class. The course syllabus changes frequently, which is frustrating.'}, {'professor': 'Dr. Lisa Rodriguez', 'stars': 5, 'review': "Dr. Rodriguez is an inspiration! Her real-world examples make the material come alive. She's always willing to help students outside of class."}, {'professor': 'Prof. David Lee',

In [9]:
import requests

processed_data = []


def generate_embeddings(text):
	key = os.getenv("HG_EMBEDDING_API_KEY")
	API_URL = "https://api-inference.huggingface.co/models/intfloat/multilingual-e5-large"
	headers = {"Authorization": f"Bearer {key}"}
	def query(payload):
		response = requests.post(API_URL, headers=headers, json=payload)
		return response.json()
	output = query({
		"inputs": "Today is a sunny day and I will get some ice cream.",
	})
	return output

for review in data:
	text = review["review"]
	embiddings = generate_embeddings(text)
	processed_data.append({
		"values": embiddings,
		"id": review["professor"],
		"metadata": {
			"review": review["review"],
			"stars": review["stars"],
			"professor": review["professor"],
		}
	})



In [11]:
index = pc.Index("prof-review-data")
index.upsert(
  vectors=processed_data,
  namespace="review-vectors"
)

{'upserted_count': 30}

In [12]:
index.describe_index_stats()

{'dimension': 1024,
 'index_fullness': 0.0,
 'namespaces': {'review-vectors': {'vector_count': 30}},
 'total_vector_count': 30}