In [18]:
from dotenv import load_dotenv
import os
import json
import requests
from pinecone import Pinecone, ServerlessSpec

# Load environment variables from .env.local
load_dotenv('.env.local')

# Initialize Pinecone client
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

# Define the index name and dimension
index_name = "rag"
dimension = 1536

# Check if the index exists before creating it
existing_indexes = pc.list_indexes()
if index_name not in existing_indexes:
    try:
        pc.create_index(
            name=index_name,
            dimension=dimension,
            metric="cosine",
            spec=ServerlessSpec(cloud="aws", region="us-east-1"),
        )
        print(f"Index '{index_name}' created successfully.")
    except Exception as e:
        print(f"Error creating index: {e}")
else:
    print(f"Index '{index_name}' already exists.")

# Initialize GeminiAI API client
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
GEMINI_API_URL = "https://api.gemini.com/v1/your-correct-endpoint"  # Replace with the actual URL

def get_embedding(text):
    """Function to get embedding from the GeminiAI API."""
    headers = {
        "Authorization": f"Bearer {GEMINI_API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "text": text
    }
    response = requests.post(GEMINI_API_URL, json=payload, headers=headers)
    if response.status_code != 200:
        raise Exception(f"GeminiAI API request failed: {response.status_code} - {response.text}")
    return response.json().get("embedding")

# Load the review data
with open("reviews.json", "r") as f:
    data = json.load(f)

processed_data = []

# Create embeddings for each review
for review in data.get("reviews", []):
    try:
        embedding = get_embedding(review['review'])
        processed_data.append(
            {
                "values": embedding,
                "id": review["professor"],
                "metadata": {
                    "review": review["review"],
                    "subject": review["subject"],
                    "stars": review["stars"],
                }
            }
        )
    except Exception as e:
        print(f"Error processing review for professor {review.get('professor', 'unknown')}: {e}")

# Insert the embeddings into the Pinecone index
index = pc.Index(index_name)
try:
    upsert_response = index.upsert(
        vectors=processed_data,
        namespace="ns1",
    )
    print(f"Upserted count: {upsert_response['upserted_count']}")
except Exception as e:
    print(f"Error upserting vectors into Pinecone: {e}")

# Print index statistics
try:
    stats = index.describe_index_stats()
    print("Index statistics:", stats)
except Exception as e:
    print(f"Error describing index stats: {e}")

Error creating index: (409)
Reason: Conflict
HTTP response headers: HTTPHeaderDict({'content-type': 'text/plain; charset=utf-8', 'access-control-allow-origin': '*', 'vary': 'origin,access-control-request-method,access-control-request-headers', 'access-control-expose-headers': '*', 'x-pinecone-api-version': '2024-07', 'X-Cloud-Trace-Context': 'adfd6642105f233cd92f5d352243a2c4', 'Date': 'Thu, 22 Aug 2024 07:39:39 GMT', 'Server': 'Google Frontend', 'Content-Length': '85', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000'})
HTTP response body: {"error":{"code":"ALREADY_EXISTS","message":"Resource  already exists"},"status":409}

Error processing review for professor Dr. Emily Smith: GeminiAI API request failed: 404 - {"result":"error","reason":"EndpointNotFound","message":"API entry point `/v1/your-correct-endpoint` not found"}
Error processing review for professor Prof. John Doe: GeminiAI API request failed: 404 - {"result":"error","reason":"EndpointNotFound