In [5]:
from dotenv import load_dotenv
import os
import json
import time
import google.generativeai as genai
import pinecone
from pinecone import Pinecone, ServerlessSpec

# Load environment variables from .env.local
load_dotenv('.env.local')

# Initialize Pinecone client
pinecone_api_key = os.getenv("PINECONE_API_KEY")
if pinecone_api_key is None:
    raise ValueError("PINECONE_API_KEY environment variable is not set.")

pc = Pinecone(api_key=pinecone_api_key)

# Define the index name and dimension
index_name = "rag"
dimension = 768

# Check if the index exists before creating it
existing_indexes = pc.list_indexes()
if index_name not in existing_indexes:
    try:
        pc.create_index(
            name=index_name,
            dimension=dimension,
            metric="cosine",
            spec=ServerlessSpec(cloud="aws", region="us-east-1"),
        )
        print(f"Index '{index_name}' created successfully.")
    except Exception as e:
        print(f"Error creating index: {e}")
else:
    print(f"Index '{index_name}' already exists.")

# Access environment variables
api_key = os.getenv("GEMINI_API_KEY")

if api_key is None:
    raise ValueError("GEMINI_API_KEY environment variable is not set.")

# Configure the GeminiAI API client
genai.configure(api_key=api_key)

# Initialize the GeminiAI model
model = genai.GenerativeModel(model_name='gemini-1.5-flash')

def get_embedding(text):
    """Fetch embedding for the provided text using GeminiAI."""
    try:
        response = model.embed_texts([text])
        return response['embeddings'][0]
    except Exception as e:
        print(f"An error occurred while generating embedding: {e}")
        return None

# Load the review data
try:
    with open("reviews.json", "r") as f:
        data = json.load(f)
except FileNotFoundError:
    raise FileNotFoundError("The file 'reviews.json' was not found.")

# Prepare and upsert embeddings
processed_data = []
for review in data.get("reviews", []):
    text = review.get('review')
    professor_id = review.get('professor')
    metadata = {
        "review": text,
        "subject": review.get("subject"),
        "stars": review.get("stars"),
    }
    start_time = time.time()
    embedding = get_embedding(text)
    if embedding:
        processed_data.append({
            "values": embedding,
            "id": professor_id,
            "metadata": metadata
        })
        print(f"Processed embedding for professor '{professor_id}' in {time.time() - start_time:.2f} seconds.")
    else:
        print(f"Failed to process embedding for professor '{professor_id}'.")

# Upsert embeddings into Pinecone
if processed_data:
    try:
        index = pc.Index(index_name)
        index.upsert(vectors=processed_data, namespace="ns1")
        print(f"Successfully upserted {len(processed_data)} embeddings into Pinecone index '{index_name}'.")
    except Exception as e:
        print(f"Error upserting vectors into Pinecone: {e}")
else:
    print("No embeddings were generated. Skipping upsert operation.")

# Retrieve and print index statistics
try:
    stats = index.describe_index_stats()
    print("Index statistics:", stats)
except Exception as e:
    print(f"Error describing index stats: {e}")

Index 'rag' created successfully.
An error occurred while generating embedding: 'GenerativeModel' object has no attribute 'embed_texts'
Failed to process embedding for professor 'Dr. Emily Smith'.
An error occurred while generating embedding: 'GenerativeModel' object has no attribute 'embed_texts'
Failed to process embedding for professor 'Prof. John Doe'.
An error occurred while generating embedding: 'GenerativeModel' object has no attribute 'embed_texts'
Failed to process embedding for professor 'Dr. Sarah Johnson'.
An error occurred while generating embedding: 'GenerativeModel' object has no attribute 'embed_texts'
Failed to process embedding for professor 'Prof. Michael Brown'.
An error occurred while generating embedding: 'GenerativeModel' object has no attribute 'embed_texts'
Failed to process embedding for professor 'Dr. Linda Davis'.
An error occurred while generating embedding: 'GenerativeModel' object has no attribute 'embed_texts'
Failed to process embedding for professor 'P