# Task 1: Rating Prediction via Prompting

This notebook demonstrates how to predict star ratings (1-5) from Yelp reviews using an LLM (Gemini/OpenAI).
We evaluate three prompting strategies:
1. Zero-shot
2. Rubric-based
3. Self-reasoning (Chain of Thought)

In [None]:
import os
import pandas as pd
import google.generativeai as genai
from dotenv import load_dotenv
from prompts import ZERO_SHOT_PROMPT, RUBRIC_PROMPT, REASONING_PROMPT
from evaluator import parse_llm_output, evaluate_predictions, save_results
import time

# Load environment variables
load_dotenv()

# Configure Gemini
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
if not GEMINI_API_KEY:
    print("WARNING: GEMINI_API_KEY not found in environment variables.")
else:
    genai.configure(api_key=GEMINI_API_KEY)
    model = genai.GenerativeModel('gemini-pro')

In [None]:
# Load Dataset (Synthetic if file not found)
try:
    df = pd.read_csv("yelp_reviews_sample.csv")
    print("Loaded existing dataset.")
except FileNotFoundError:
    print("Dataset not found. Creating synthetic dataset...")
    data = {
        "review": [
            "The food was absolutely amazing! Best pasta I've ever had.",
            "Service was terrible. Waiter was rude and food took forever.",
            "It was okay. Not great, not bad. Just average.",
            "Loved the ambiance, but the food was a bit salty.",
            "Disgusting hygiene. Found a hair in my soup. Never returning!"
        ],
        "stars": [5, 1, 3, 4, 1]
    }
    df = pd.DataFrame(data)
    # Creating more rows for thorough testing would be ideal
    df = pd.concat([df]*10, ignore_index=True) # Duplicate to simulate more data

print(f"Dataset shape: {df.shape}")
reviews = df['review'].tolist()
true_stars = df['stars'].tolist()

In [None]:
def get_llm_prediction(review, prompt_template):
    try:
        prompt = prompt_template.format(review=review)
        # Rate limiting handling
        time.sleep(1) 
        response = model.generate_content(prompt)
        return parse_llm_output(response.text)
    except Exception as e:
        print(f"Error processing review: {e}")
        return None

In [None]:
# 1. Zero-shot Evaluation
print("Running Zero-shot Evaluation...")
zero_shot_preds = []
for review in reviews[:5]: # Limit to 5 for demo speed, remove slice for full run
    pred = get_llm_prediction(review, ZERO_SHOT_PROMPT)
    zero_shot_preds.append(pred)
    print(f"Review: {review[:30]}... -> Pred: {pred.get('predicted_stars')}")

# Note: In a real run, you would process the full list 'reviews'
# For this file generation, we keep it short.

In [None]:
# 2. Rubric-based Evaluation
print("Running Rubric-based Evaluation...")
rubric_preds = []
for review in reviews[:5]:
    pred = get_llm_prediction(review, RUBRIC_PROMPT)
    rubric_preds.append(pred)
    print(f"Review: {review[:30]}... -> Pred: {pred.get('predicted_stars')}")

In [None]:
# 3. Self-reasoning Evaluation
print("Running Self-reasoning Evaluation...")
reasoning_preds = []
for review in reviews[:5]:
    pred = get_llm_prediction(review, REASONING_PROMPT)
    reasoning_preds.append(pred)
    print(f"Review: {review[:30]}... -> Pred: {pred.get('predicted_stars')}")

In [None]:
# Comparison & Reporting
# Note: Accuracy metrics require running on the full labeled dataset
# This is a placeholder for the comparison table logic

results_data = {
    "Method": ["Zero-shot", "Rubric", "Reasoning"],
    "Accuracy": [0.0, 0.0, 0.0], # Replace with calculated values
    "JSON Validity": [100, 100, 100] # ROI
}
pd.DataFrame(results_data)