In [1]:
import pandas as pd
import google.generativeai as genai
import json
import os
from dotenv import load_dotenv
import time
from typing import Dict, List
import re

# Load environment variables
load_dotenv()
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))

print("✓ Setup complete!")

✓ Setup complete!


In [2]:
# Load dataset
print("Loading dataset...")
df = pd.read_csv('yelp.csv')
print(f"Dataset shape: {df.shape}")
print(df.head())

# Sample 150 reviews (50 per approach)
sample_df = df.sample(n=150, random_state=42).reset_index(drop=True)
print(f"\n✓ Sampled {len(sample_df)} reviews for evaluation")

Loading dataset...
Dataset shape: (10000, 10)
              business_id        date               review_id  stars  \
0  9yKzy9PApeiPPOUJEtnvkg  2011-01-26  fWKvX83p0-ka4JS3dc6E5A      5   
1  ZRJwVLyzEJq1VAihDhYiow  2011-07-27  IjZ33sJrzXqU-0X6U8NwyA      5   
2  6oRAC4uyJCsJl1X0WZpVSA  2012-06-14  IESLBzqUCLdSzSqm0eCSxQ      4   
3  _1QQZuf4zZOyFCvXc0o6Vg  2010-05-27  G-WvGaISbqqaMHlNnByodA      5   
4  6ozycU1RpktNG2-1BroVtw  2012-01-05  1uJFq2r5QfJG_6ExMRCaGw      5   

                                                text    type  \
0  My wife took me here on my birthday for breakf...  review   
1  I have no idea why some people give bad review...  review   
2  love the gyro plate. Rice is so good and I als...  review   
3  Rosie, Dakota, and I LOVE Chaparral Dog Park!!...  review   
4  General Manager Scott Petello is a good egg!!!...  review   

                  user_id  cool  useful  funny  
0  rLtl8ZkDX5vH5nAx9C3q5Q     2       5      0  
1  0a2KyEL0d3Yb1V6aivbIuQ     0       

In [23]:
def approach1_basic(review_text: str) -> str:
    """Basic direct prompting"""
    prompt = f"""Given this restaurant review, predict the star rating (1-5).
Return ONLY valid JSON: {{"predicted_stars": <number>, "explanation": "<reason>"}}

Review: {review_text}

JSON:"""
    
    model = genai.GenerativeModel('models/gemini-2.0-flash')
    response = model.generate_content(prompt)
    return response.text

print("✓ Approach 1 defined")

✓ Approach 1 defined


In [24]:
def approach2_fewshot(review_text: str) -> str:
    """Few-shot with examples"""
    prompt = f"""You are an expert at analyzing restaurant reviews.

Examples:
Review: "Absolutely fantastic! Best meal ever!"
JSON: {{"predicted_stars": 5, "explanation": "Extremely positive with superlatives"}}

Review: "Pretty good, would recommend."
JSON: {{"predicted_stars": 4, "explanation": "Positive with recommendation"}}

Review: "It was okay, nothing special."
JSON: {{"predicted_stars": 3, "explanation": "Neutral, average experience"}}

Review: "Disappointing food and slow service."
JSON: {{"predicted_stars": 2, "explanation": "Multiple negative aspects"}}

Review: "Terrible! Never coming back!"
JSON: {{"predicted_stars": 1, "explanation": "Strongly negative"}}

Now rate this review:
Review: {review_text}

Return ONLY valid JSON: {{"predicted_stars": <number>, "explanation": "<reason>"}}

JSON:"""
    
    model = genai.GenerativeModel('models/gemini-2.0-flash')
    response = model.generate_content(prompt)
    return response.text

print("✓ Approach 2 defined")

✓ Approach 2 defined


In [25]:
def approach3_structured(review_text: str) -> str:
    """Chain-of-thought reasoning"""
    prompt = f"""Analyze this review systematically:

Review: "{review_text}"

1. SENTIMENT: Identify positive/negative words
2. ASPECTS: Food? Service? Atmosphere?
3. INDICATORS:
   - "excellent, amazing, best" → 5 stars
   - "good, nice, enjoyed" → 4 stars
   - "okay, average, decent" → 3 stars
   - "disappointing, not great" → 2 stars
   - "terrible, worst, awful" → 1 star

Return ONLY valid JSON: {{"predicted_stars": <1-5>, "explanation": "<brief reason>"}}

JSON:"""
    
    model = genai.GenerativeModel('models/gemini-2.0-flash')
    response = model.generate_content(prompt)
    return response.text

print("✓ Approach 3 defined")

✓ Approach 3 defined


In [26]:
def extract_json(text: str) -> Dict:
    """Extract JSON from response"""
    try:
        return json.loads(text)
    except:
        try:
            # Look for JSON in markdown
            json_match = re.search(r'```json\s*(\{.*?\})\s*```', text, re.DOTALL)
            if json_match:
                return json.loads(json_match.group(1))
            
            # Look for JSON object
            json_match = re.search(r'\{[^{}]*"predicted_stars"[^{}]*\}', text, re.DOTALL)
            if json_match:
                return json.loads(json_match.group(0))
        except:
            pass
    return None

print("✓ Helper functions defined")

✓ Helper functions defined


In [27]:
def evaluate_approach(approach_func, sample_df, approach_name):
    """Evaluate a prompting approach"""
    print(f"\n{'='*60}")
    print(f"Evaluating: {approach_name}")
    print(f"{'='*60}")
    
    results = []
    correct = 0
    valid_json = 0
    
    for idx, row in sample_df.iterrows():
        try:
            response = approach_func(row['text'])
            parsed = extract_json(response)
            
            if parsed and 'predicted_stars' in parsed:
                valid_json += 1
                predicted = int(parsed['predicted_stars'])
                actual = row['stars']
                
                if 1 <= predicted <= 5:
                    is_correct = (predicted == actual)
                    if is_correct:
                        correct += 1
                    
                    results.append({
                        'actual': actual,
                        'predicted': predicted,
                        'correct': is_correct
                    })
            
            time.sleep(1)  # Rate limiting
            
            if (idx + 1) % 10 == 0:
                print(f"Processed {idx + 1}/{len(sample_df)}...")
                
        except Exception as e:
            print(f"Error at {idx}: {e}")
    
    accuracy = (correct / len(results) * 100) if results else 0
    json_validity = (valid_json / len(sample_df) * 100)
    
    print(f"\n✓ Accuracy: {accuracy:.1f}%")
    print(f"✓ JSON Validity: {json_validity:.1f}%")
    
    return {
        'approach': approach_name,
        'accuracy': accuracy,
        'json_validity': json_validity,
        'results': results
    }

print("✓ Evaluation function defined")

✓ Evaluation function defined


In [28]:
print("Starting evaluations (FAST VERSION - 30 reviews total)...")
print("This will take ~30 minutes\n")

# Evaluate each approach on 10 reviews
results1 = evaluate_approach(approach1_basic, sample_df.head(10), "Approach 1: Basic")
results2 = evaluate_approach(approach2_fewshot, sample_df.iloc[10:20], "Approach 2: Few-Shot")
results3 = evaluate_approach(approach3_structured, sample_df.iloc[20:30], "Approach 3: Chain-of-Thought")

print("\n" + "="*60)
print("ALL EVALUATIONS COMPLETE!")
print("="*60)

Starting evaluations (FAST VERSION - 30 reviews total)...
This will take ~30 minutes


Evaluating: Approach 1: Basic
Error at 0: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.0-flash
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.0-flash
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.0-flash
Please retry in 46.796343733s. [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_metric: "generativelanguage.g

In [22]:
import google.generativeai as genai
import os
from dotenv import load_dotenv

# Reload environment
load_dotenv()
api_key = os.getenv('GEMINI_API_KEY')

print(f"API Key found: {api_key is not None}")
if api_key:
    print(f"API Key starts with: {api_key[:10]}...")
    print(f"API Key length: {len(api_key)}")
else:
    print("ERROR: API Key not loaded!")

# Try to list available models
genai.configure(api_key=api_key)

try:
    models = genai.list_models()
    print("\n✓ Available models:")
    for model in models:
        if 'generateContent' in model.supported_generation_methods:
            print(f"  - {model.name}")
except Exception as e:
    print(f"\n✗ Error listing models: {e}")

# Test simple generation
try:
    model = genai.GenerativeModel('gemini-pro')
    response = model.generate_content("Say hello in one word")
    print(f"\n✓ Test successful! Response: {response.text}")
except Exception as e:
    print(f"\n✗ Test failed: {e}")

API Key found: True
API Key starts with: AIzaSyCFaS...
API Key length: 39

✓ Available models:
  - models/gemini-2.5-flash
  - models/gemini-2.5-pro
  - models/gemini-2.0-flash-exp
  - models/gemini-2.0-flash
  - models/gemini-2.0-flash-001
  - models/gemini-2.0-flash-exp-image-generation
  - models/gemini-2.0-flash-lite-001
  - models/gemini-2.0-flash-lite
  - models/gemini-2.0-flash-lite-preview-02-05
  - models/gemini-2.0-flash-lite-preview
  - models/gemini-2.0-pro-exp
  - models/gemini-2.0-pro-exp-02-05
  - models/gemini-exp-1206
  - models/gemini-2.5-flash-preview-tts
  - models/gemini-2.5-pro-preview-tts
  - models/gemma-3-1b-it
  - models/gemma-3-4b-it
  - models/gemma-3-12b-it
  - models/gemma-3-27b-it
  - models/gemma-3n-e4b-it
  - models/gemma-3n-e2b-it
  - models/gemini-flash-latest
  - models/gemini-flash-lite-latest
  - models/gemini-pro-latest
  - models/gemini-2.5-flash-lite
  - models/gemini-2.5-flash-image-preview
  - models/gemini-2.5-flash-image
  - models/gemini-2.