In [1]:
from dotenv import load_dotenv
import os
from google.cloud import aiplatform
from google.cloud.aiplatform import schema
from pinecone import Pinecone, ServerlessSpec
import json
import requests

# Load environment variables from .env.local
load_dotenv('.env.local')

# Initialize Pinecone client
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

# Define the index name and dimension
index_name = "rag"
dimension = 1536

# Check if the index exists before creating it
existing_indexes = pc.list_indexes()
if index_name not in existing_indexes:
    try:
        pc.create_index(
            name=index_name,
            dimension=dimension,
            metric="cosine",
            spec=ServerlessSpec(cloud="aws", region="us-east-1"),
        )
        print(f"Index '{index_name}' created successfully.")
    except Exception as e:
        print(f"Error creating index: {e}")
else:
    print(f"Index '{index_name}' already exists.")

# Initialize GeminiAI API client
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/gemini-1_5-flash:embedText"  # Updated API URL

def get_embedding(text):
    """Function to get embedding from the GeminiAI API."""
    headers = {
        "Authorization": f"Bearer {GEMINI_API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": "gemini-1.5-flash",
        "text": text
    }
    response = requests.post(GEMINI_API_URL, json=payload, headers=headers)
    print(f"Response status code: {response.status_code}")
    print(f"Response body: {response.text}")
    if response.status_code != 200:
        raise Exception(f"GeminiAI API request failed: {response.status_code} - {response.text}")
    return response.json().get("embedding")

# Load the review data
with open("reviews.json", "r") as f:
    data = json.load(f)

processed_data = []

# Create embeddings for each review
for review in data.get("reviews", []):
    try:
        embedding = get_embedding(review['review'])
        if embedding:
            processed_data.append(
                {
                    "values": embedding,
                    "id": review["professor"],
                    "metadata": {
                        "review": review["review"],
                        "subject": review["subject"],
                        "stars": review["stars"],
                    }
                }
            )
        else:
            print(f"No embedding returned for review: {review['review']}")
    except Exception as e:
        print(f"Error processing review for professor {review.get('professor', 'unknown')}: {e}")

# Check if there are embeddings to upsert
if processed_data:
    # Insert the embeddings into the Pinecone index
    index = pc.Index(index_name)
    try:
        upsert_response = index.upsert(
            vectors=processed_data,
            namespace="ns1",
        )
        print(f"Upserted count: {upsert_response['upserted_count']}")
    except Exception as e:
        print(f"Error upserting vectors into Pinecone: {e}")
else:
    print("No embeddings were generated, skipping the upsert operation.")

# Print index statistics
try:
    stats = index.describe_index_stats()
    print("Index statistics:", stats)
except Exception as e:
    print(f"Error describing index stats: {e}")


  from tqdm.autonotebook import tqdm


Index 'rag' created successfully.
Response status code: 404
Response body: 
Error processing review for professor Dr. Emily Smith: GeminiAI API request failed: 404 - 
Response status code: 404
Response body: 
Error processing review for professor Prof. John Doe: GeminiAI API request failed: 404 - 
Response status code: 404
Response body: 
Error processing review for professor Dr. Sarah Johnson: GeminiAI API request failed: 404 - 
Response status code: 404
Response body: 
Error processing review for professor Prof. Michael Brown: GeminiAI API request failed: 404 - 
Response status code: 404
Response body: 
Error processing review for professor Dr. Linda Davis: GeminiAI API request failed: 404 - 
Response status code: 404
Response body: 
Error processing review for professor Prof. James Wilson: GeminiAI API request failed: 404 - 
Response status code: 404
Response body: 
Error processing review for professor Dr. Karen Taylor: GeminiAI API request failed: 404 - 
Response status code: 404


In [12]:
from dotenv import load_dotenv
import os
import json
import time
import google.generativeai as genai
from pinecone import Pinecone

# Load environment variables from .env.local
load_dotenv('.env.local')

# Access environment variables
api_key = os.getenv("GEMINI_API_KEY")
pinecone_api_key = os.getenv("PINECONE_API_KEY")

if api_key is None or pinecone_api_key is None:
    raise ValueError("GEMINI_API_KEY or PINECONE_API_KEY environment variables are not set.")

# Configure the GeminiAI API client
genai.configure(api_key=api_key)

# Initialize the GeminiAI model
model = genai.GenerativeModel(model_name='gemini-1.5-flash')

# Initialize Pinecone client
pc = Pinecone(api_key=pinecone_api_key)

# Define the index name
index_name = "rag"

def get_embedding(text):
    """Function to get embedding from the GeminiAI API."""
    try:
        response = model.generate_content(text)
        print(f"Response received in {time.time() - start_time:.2f} seconds")
        return response.text
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

# Load the review data
with open("reviews.json", "r") as f:
    data = json.load(f)

processed_data = []

# Create embeddings for each review
for review in data.get("reviews", []):
    try:
        start_time = time.time()
        embedding = get_embedding(review['review'])
        if embedding:
            processed_data.append(
                {
                    "values": embedding,
                    "id": review["professor"],
                    "metadata": {
                        "review": review["review"],
                        "subject": review["subject"],
                        "stars": review["stars"],
                    }
                }
            )
        else:
            print(f"No embedding returned for review: {review['review']}")
    except Exception as e:
        print(f"Error processing review for professor {review.get('professor', 'unknown')}: {e}")

# Check if there are embeddings to upsert
if processed_data:
    # Insert the embeddings into the Pinecone index
    index = pc.Index(index_name)
    try:
        upsert_response = index.upsert(
            vectors=processed_data,
            namespace="ns1",
        )
        print(f"Upserted count: {upsert_response['upserted_count']}")
    except Exception as e:
        print(f"Error upserting vectors into Pinecone: {e}")
else:
    print("No embeddings were generated, skipping the upsert operation.")

# Print index statistics
try:
    stats = index.describe_index_stats()
    print("Index statistics:", stats)
except Exception as e:
    print(f"Error describing index stats: {e}")

Response received in 3.17 seconds
Response received in 1.22 seconds
Response received in 2.66 seconds
Response received in 3.69 seconds
Response received in 1.44 seconds
Response received in 0.81 seconds
Response received in 2.56 seconds
Response received in 2.87 seconds
Response received in 1.43 seconds
Response received in 3.58 seconds
Response received in 2.36 seconds
Response received in 2.97 seconds
Response received in 1.02 seconds
Response received in 2.35 seconds
Response received in 2.76 seconds
Response received in 1.54 seconds
Response received in 1.64 seconds
Response received in 2.55 seconds
Response received in 2.15 seconds
Response received in 2.25 seconds
Error upserting vectors into Pinecone: Expected a list or list-like data structure, but got: This is a positive and concise review of a lecturer!  It captures the key aspects of a good instructor:

* **Clarity:**  "Great lecturer with clear explanations" emphasizes the instructor's ability to communicate effectively.
*

In [10]:
import google.generativeai as genai
import os
import time

# Access environment variable correctly
api_key = os.getenv("GEMINI_API_KEY")

if api_key is None:
    raise ValueError("GEMINI_API_KEY environment variable is not set.")

# Configure the API client
genai.configure(api_key=api_key)

# Initialize the model
model = genai.GenerativeModel(model_name='gemini-1.5-flash')

# Generate content
start_time = time.time()
try:
    response = model.generate_content('Teach me about how an LLM works')
    end_time = time.time()
    print(f"Response received in {end_time - start_time:.2f} seconds")
    print(response.text)
except Exception as e:
    print(f"An error occurred: {e}")


Response received in 8.53 seconds
## Unpacking the Magic Box: How an LLM Works

Let's break down how these powerful language models (LLMs) actually function. Here's a simplified explanation:

**1. The Core:  A Deep Neural Network**

At the heart of an LLM lies a deep neural network (DNN). Imagine it as a complex web of interconnected nodes, organized in multiple layers. These nodes are like tiny processors, performing mathematical calculations based on the data they receive.

**2. The Input: Feeding the Model**

You feed an LLM text data – think massive amounts of books, articles, code, and more. This data is processed and converted into numerical representations (vectors) that the model can understand. 

**3. Learning Through Prediction**

The model then learns by predicting the next word in a sequence. It analyzes the input text, looks for patterns and relationships between words, and uses this information to guess the most likely next word. 

**4. The Magic of Transformers**

Many m