In [1]:
from dotenv import load_dotenv
import os
import json
import time
from google.cloud import aiplatform
from google.cloud.aiplatform import schema
import google.generativeai as genai
from pinecone import Pinecone, ServerlessSpec

# Load environment variables from .env.local
load_dotenv('.env.local')

# Initialize Pinecone client
pinecone_api_key = os.getenv("PINECONE_API_KEY")
if pinecone_api_key is None:
    raise ValueError("PINECONE_API_KEY environment variable is not set.")

pc = Pinecone(api_key=pinecone_api_key)

# Define the index name and dimension
index_name = "rag"
dimension = 1536

# Check if the index exists before creating it
existing_indexes = pc.list_indexes()
if index_name not in existing_indexes:
    try:
        pc.create_index(
            name=index_name,
            dimension=dimension,
            metric="cosine",
            spec=ServerlessSpec(cloud="aws", region="us-east-1"),
        )
        print(f"Index '{index_name}' created successfully.")
    except Exception as e:
        print(f"Error creating index: {e}")
else:
    print(f"Index '{index_name}' already exists.")

# Access environment variables
api_key = os.getenv("GEMINI_API_KEY")
if api_key is None:
    raise ValueError("GEMINI_API_KEY environment variable is not set.")

# Configure the GeminiAI API client
genai.configure(api_key=api_key)

# Function to get embedding from the GeminiAI API
def get_embedding(text):
    """Function to get embedding from the GeminiAI API."""
    try:
        response = genai.generate_embedding(model="gemini-1.5-flash", text=[text])
        print(f"Response received in {time.time() - start_time:.2f} seconds")
        return response['embeddings'][0]['embedding']
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Load the review data
with open("reviews.json", "r") as f:
    data = json.load(f)

processed_data = []

# Create embeddings for each review
for review in data.get("reviews", []):
    try:
        start_time = time.time()
        embedding = get_embedding(review['review'])
        if embedding:
            processed_data.append(
                {
                    "values": embedding,
                    "id": review["professor"],
                    "metadata": {
                        "review": review["review"],
                        "subject": review["subject"],
                        "stars": review["stars"],
                    }
                }
            )
        else:
            print(f"No embedding returned for review: {review['review']}")
    except Exception as e:
        print(f"Error processing review for professor {review.get('professor', 'unknown')}: {e}")

# Check if there are embeddings to upsert
if processed_data:
    # Insert the embeddings into the Pinecone index
    index = pc.Index(index_name)
    try:
        upsert_response = index.upsert(
            vectors=processed_data,
            namespace="ns1",
        )
        print(f"Upserted count: {upsert_response['upserted_count']}")
    except Exception as e:
        print(f"Error upserting vectors into Pinecone: {e}")
else:
    print("No embeddings were generated, skipping the upsert operation.")

# Print index statistics
try:
    stats = index.describe_index_stats()
    print("Index statistics:", stats)
except Exception as e:
    print(f"Error describing index stats: {e}")

  from .autonotebook import tqdm as notebook_tqdm


Index 'rag' created successfully.
An error occurred: module 'google.generativeai' has no attribute 'generate_embedding'
No embedding returned for review: Great lecturer with clear explanations. Assignments were challenging but fair.
An error occurred: module 'google.generativeai' has no attribute 'generate_embedding'
No embedding returned for review: Amazing professor! Makes complex topics easy to understand.
An error occurred: module 'google.generativeai' has no attribute 'generate_embedding'
No embedding returned for review: Knowledgeable, but lectures can be a bit dry.
An error occurred: module 'google.generativeai' has no attribute 'generate_embedding'
No embedding returned for review: Engaging lectures and helpful during office hours.
An error occurred: module 'google.generativeai' has no attribute 'generate_embedding'
No embedding returned for review: The material is interesting, but the professor can be difficult to follow.
An error occurred: module 'google.generativeai' has no 