In [None]:
from dotenv import load_dotenv
import pinecone
import openai
import os
import json

# Load environment variables
load_dotenv(dotenv_path=".env.local")

# Initialize Pinecone
pinecone.init(api_key=os.getenv("PINECONE_API_KEY"), environment="us-east-1")

# Delete the existing index if it exists
index_name = "rag"
if index_name in pinecone.list_indexes():
    pinecone.delete_index(index_name)

# Create a Pinecone index
pinecone.create_index(
    name=index_name,
    dimension=1536,
    metric="cosine",
    pod_type="p1"  # Adjust this according to your needs
)

# Load the review data
with open("reviews.json") as f:
    data = json.load(f)

processed_data = []

# Initialize OpenAI
openai.api_key = os.getenv("OPENAI_API_KEY")

# Create embeddings for each review
for review in data["reviews"]:
    response = openai.Embedding.create(
        input=review['review'], model="text-embedding-ada-002"
    )
    embedding = response['data'][0]['embedding']
    processed_data.append(
        {
            "id": review["professor"],
            "values": embedding,
            "metadata": {
                "review": review["review"],
                "subject": review["subject"],
                "stars": review["stars"],
            }
        }
    )

# Insert the embeddings into the Pinecone index
index = pinecone.Index(index_name)
upsert_response = index.upsert(
    vectors=processed_data,
    namespace="ns1",
)
print(f"Upserted count: {upsert_response['upserted_count']}")

# Print index statistics
print(index.describe_index_stats())


In [None]:
from dotenv import load_dotenv
load_dotenv()
from pinecone import Pinecone, ServerlessSpec
from groq import Groq
import os
import json

# Load environment variables from .env.local
load_dotenv(dotenv_path=".env.local")

# Initialize Pinecone
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

# Delete existing index if it exists
pc.delete_index(name="rag")

# Create a new Pinecone index
pc.create_index(
    name="rag",
    dimension=1536,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

# Load the review data
data = json.load(open("reviews.json"))

processed_data = []
client = Groq()

# Create embeddings for each review using Groq
for review in data["reviews"]:
    response = client.embeddings.create(
        input=review['review'], model="text-embedding-3-small"
    )
    embedding = response.data[0].embedding
    processed_data.append(
        {
            "values": embedding,
            "id": review["professor"],
            "metadata": {
                "review": review["review"],
                "subject": review["subject"],
                "stars": review["stars"],
            }
        }
    )

# Insert the embeddings into the Pinecone index
index = pc.Index("rag")
upsert_response = index.upsert(
    vectors=processed_data,
    namespace="ns1",
)
print(f"Upserted count: {upsert_response['upserted_count']}")

# Print index statistics
print(index.describe_index_stats())
