In [3]:
import os
import pandas as pd
import json
import time
from typing import Dict
import google.generativeai as genai


API_KEY = "AIzaSyC_MXS03rmjoFcmY4xKQp1tYiMXx798SwE" 

genai.configure(api_key=API_KEY)

DATA_PATH = "data\yelp.csv"  


  DATA_PATH = "data\yelp.csv"


In [4]:
def load_and_sample_data(filepath: str, sample_size: int = 200) -> pd.DataFrame:
    """
    Load Yelp reviews and sample for testing.
    Expected columns: 'text', 'stars' (or 'rating').
    """
    df = pd.read_csv(filepath)

    # Rename if needed
    if "rating" in df.columns:
        df = df.rename(columns={"rating": "stars"})

    df = df.sample(n=min(sample_size, len(df)), random_state=42).reset_index(drop=True)
    print(f"Loaded {len(df)} reviews")
    print(f"Columns: {df.columns.tolist()}")
    return df

df = load_and_sample_data(DATA_PATH, sample_size=200)
df.head()


Loaded 200 reviews
Columns: ['business_id', 'date', 'review_id', 'stars', 'text', 'type', 'user_id', 'cool', 'useful', 'funny']


Unnamed: 0,business_id,date,review_id,stars,text,type,user_id,cool,useful,funny
0,QVR7dsvBeg8xFt9B-vd1BA,2010-07-22,hwYVJs8Ko4PMjI19QcR57g,4,We got here around midnight last Friday... the...,review,90a6z--_CUrl84aCzZyPsg,5,5,2
1,24qSrF_XOrvaHDBy-gLIQg,2012-01-22,0mvthYPKb2ZmKhCADiKSmQ,5,Brought a friend from Louisiana here. She say...,review,9lJAj_2zCvP2jcEiRjF9oA,0,0,0
2,j0Uc-GuOe-x9_N_IK1KPpA,2009-05-09,XJHknNIecha6h0wkBSZB4w,3,"Every friday, my dad and I eat here. We order ...",review,0VfJi9Au0rVFVnPKcJpt3Q,0,0,0
3,RBiiGw8c7j-0a8nk35JO3w,2010-12-22,z6y3GRpYDqTznVe-0dn--Q,1,"My husband and I were really, really disappoin...",review,lwppVF0Yqkuwt-xaEuugqw,2,2,2
4,U8VA-RW6LYOhxR-Ygi6eDw,2011-01-17,vhWHdemMvsqVNv5zi2OMiA,5,Love this place! Was in phoenix 3 weeks for w...,review,Y2R_tlSk4lTHiLXTDsn1rg,0,1,0


In [6]:
class RatingPredictor:
    """Unified interface for all 3 prompting approaches."""

    def __init__(self, model_name: str = "gemini-1.5-flash"):
        self.model = genai.GenerativeModel(model_name)
        self.model_name = model_name

    # APPROACH 1: Simple Direct Prompt
    def prompt_v1_simple(self, review_text: str) -> Dict:
        """
        Simple prompt: Direct classification request.
        Pros: Fast, simple
        Cons: May not return valid JSON, inconsistent formatting
        """
        prompt = f"""Read this review and predict the star rating (1-5).

Review: {review_text}

Respond in JSON format:
{{"predicted_stars": <number>, "explanation": "<reason>"}}"""

        try:
            response = self.model.generate_content(prompt)
            text = response.text

            # Try to parse JSON
            json_start = text.find("{")
            json_end = text.rfind("}") + 1
            if json_start != -1 and json_end > json_start:
                json_str = text[json_start:json_end]
                result = json.loads(json_str)
            else:
                result = {"predicted_stars": -1, "explanation": "Parse error"}
        except Exception as e:
            result = {"predicted_stars": -1, "explanation": f"Error: {str(e)}"}

        return result

    # APPROACH 2: Structured with Clear Constraints
    def prompt_v2_structured(self, review_text: str) -> Dict:
        """
        Structured prompt: Clear format + constraints.
        Pros: Better JSON validity, more consistent
        Cons: Still may have edge cases
        """
        prompt = f"""You are a sentiment analysis expert. Analyze this review and predict a star rating.

REVIEW:
{review_text}

INSTRUCTIONS:
1. Determine overall sentiment (1=very negative, 5=very positive)
2. Return ONLY valid JSON (no extra text)
3. predicted_stars must be an integer between 1 and 5
4. explanation must be 1-2 sentences

REQUIRED JSON OUTPUT:
{{"predicted_stars": <1-5>, "explanation": "<brief reason>"}}"""

        try:
            response = self.model.generate_content(prompt)
            text = response.text.strip()

            # Parse JSON
            json_start = text.find("{")
            json_end = text.rfind("}") + 1
            if json_start != -1 and json_end > json_start:
                json_str = text[json_start:json_end]
                result = json.loads(json_str)
                # Validate stars
                if not (1 <= result.get("predicted_stars", -1) <= 5):
                    result["predicted_stars"] = 3
            else:
                result = {"predicted_stars": 3, "explanation": "Parse error"}
        except Exception as e:
            result = {"predicted_stars": 3, "explanation": f"Error: {str(e)}"}

        return result

    # APPROACH 3: Advanced with Few-Shot Examples
    def prompt_v3_fewshot(self, review_text: str) -> Dict:
        """
        Few-shot prompt: Provides examples + detailed rubric.
        Pros: Best accuracy, consistent JSON, best explanations
        Cons: Longer, uses more tokens
        """
        prompt = f"""You are an expert review analyst. Rate this review on a 1-5 scale.

EXAMPLES OF EXPECTED OUTPUT:
Example 1: "Service was terrible, waited 2 hours" 
→ {{"predicted_stars": 1, "explanation": "Customer experienced very poor service with long wait times"}}

Example 2: "Good food, reasonable prices, nice atmosphere"
→ {{"predicted_stars": 4, "explanation": "Positive experience with quality food and fair pricing"}}

Example 3: "Just okay, nothing special"
→ {{"predicted_stars": 3, "explanation": "Neutral experience, met basic expectations but lacked standout qualities"}}

RATING RUBRIC:
1 = Extremely negative (serious issues, strong dissatisfaction)
2 = Negative (multiple problems, disappointed)
3 = Neutral (mixed experience, average)
4 = Positive (mostly good, some minor issues)
5 = Extremely positive (excellent, highly satisfied)

REVIEW TO ANALYZE:
{review_text}

RESPONSE RULES:
- Return ONLY valid JSON
- predicted_stars must be 1-5 integer
- explanation must be 1-2 sentences, specific to this review
- No markdown, no extra text

JSON OUTPUT:
{{"predicted_stars": <1-5>, "explanation": "<specific reason>"}}"""

        try:
            response = self.model.generate_content(prompt)
            text = response.text.strip()

            # Parse JSON
            json_start = text.find("{")
            json_end = text.rfind("}") + 1
            if json_start != -1 and json_end > json_start:
                json_str = text[json_start:json_end]
                result = json.loads(json_str)
                # Validate
                if not (1 <= result.get("predicted_stars", -1) <= 5):
                    result["predicted_stars"] = 3
            else:
                result = {"predicted_stars": 3, "explanation": "Parse error"}
        except Exception as e:
            result = {"predicted_stars": 3, "explanation": f"Error: {str(e)}"}

        return result

    def predict_with_all_approaches(self, review_text: str) -> Dict:
        """Run all 3 approaches and return results, without spamming output."""
        results = {
            "v1_simple": self.prompt_v1_simple(review_text),
            "v2_structured": self.prompt_v2_structured(review_text),
            "v3_fewshot": self.prompt_v3_fewshot(review_text),
        }
        # Small sleep to be kind to the API
        time.sleep(1)
        return results


In [7]:
def is_valid_json(prediction: Dict) -> bool:
    """Check if prediction has required fields with valid types."""
    try:
        stars = prediction.get("predicted_stars")
        expl = prediction.get("explanation")
        return isinstance(stars, int) and 1 <= stars <= 5 and isinstance(expl, str)
    except Exception:
        return False


def calculate_accuracy(actual_stars: int, predicted_stars: int) -> bool:
    """Check if prediction matches actual."""
    return actual_stars == predicted_stars


def evaluate_approach(results_df: pd.DataFrame, approach: str) -> Dict:
    """Evaluate one approach across all reviews."""
    # Column names from our results_df
    predictions = results_df[f"{approach}_prediction"].tolist()
    actual = results_df["actual_stars"].tolist()

    valid_count = sum(1 for p in predictions if is_valid_json(p))
    json_validity_rate = valid_count / len(predictions) * 100

    correct_count = sum(
        1
        for i, p in enumerate(predictions)
        if is_valid_json(p) and calculate_accuracy(actual[i], p.get("predicted_stars"))
    )
    accuracy = correct_count / len(predictions) * 100

    return {
        "approach": approach,
        "json_validity_rate": json_validity_rate,
        "accuracy": accuracy,
        "total_tests": len(predictions),
    }


In [8]:
predictor = RatingPredictor()

all_results = []
total = len(df)

print("Running predictions on sampled reviews...")
for idx, row in df.iterrows():
    # Compact progress: one line, overwrites itself
    print(f"Processing review {idx + 1}/{total}", end="\r", flush=True)
    preds = predictor.predict_with_all_approaches(row["text"])
    all_results.append({
        "review_idx": idx,
        "review_text": row["text"][:100] + "...",
        "actual_stars": row["stars"],
        "v1_simple_prediction": preds["v1_simple"],
        "v2_structured_prediction": preds["v2_structured"],
        "v3_fewshot_prediction": preds["v3_fewshot"],
    })

print("\nDone.")
results_df = pd.DataFrame(all_results)
results_df.head()


Running predictions on sampled reviews...
Processing review 200/200
Done.


Unnamed: 0,review_idx,review_text,actual_stars,v1_simple_prediction,v2_structured_prediction,v3_fewshot_prediction
0,0,We got here around midnight last Friday... the...,4,"{'predicted_stars': -1, 'explanation': 'Error:...","{'predicted_stars': 3, 'explanation': 'Error: ...","{'predicted_stars': 3, 'explanation': 'Error: ..."
1,1,Brought a friend from Louisiana here. She say...,5,"{'predicted_stars': -1, 'explanation': 'Error:...","{'predicted_stars': 3, 'explanation': 'Error: ...","{'predicted_stars': 3, 'explanation': 'Error: ..."
2,2,"Every friday, my dad and I eat here. We order ...",3,"{'predicted_stars': -1, 'explanation': 'Error:...","{'predicted_stars': 3, 'explanation': 'Error: ...","{'predicted_stars': 3, 'explanation': 'Error: ..."
3,3,"My husband and I were really, really disappoin...",1,"{'predicted_stars': -1, 'explanation': 'Error:...","{'predicted_stars': 3, 'explanation': 'Error: ...","{'predicted_stars': 3, 'explanation': 'Error: ..."
4,4,Love this place! Was in phoenix 3 weeks for w...,5,"{'predicted_stars': -1, 'explanation': 'Error:...","{'predicted_stars': 3, 'explanation': 'Error: ...","{'predicted_stars': 3, 'explanation': 'Error: ..."


In [9]:
rows = []
for approach in ["v1_simple", "v2_structured", "v3_fewshot"]:
    metrics = evaluate_approach(results_df, approach)
    rows.append({
        "Approach": approach,
        "JSON Validity (%)": metrics["json_validity_rate"],
        "Accuracy (%)": metrics["accuracy"],
        "Total Tests": metrics["total_tests"],
    })

comparison_df = pd.DataFrame(rows)
comparison_df


Unnamed: 0,Approach,JSON Validity (%),Accuracy (%),Total Tests
0,v1_simple,0.0,0.0,200
1,v2_structured,100.0,16.5,200
2,v3_fewshot,100.0,16.5,200
