### 1. Setup

In [None]:
!pip install pandas requests scikit-learn matplotlib seaborn

In [None]:
import pandas as pd
import requests
import json
import time
import random
from sklearn.metrics import accuracy_score
import io
import os
from google.colab import files

In [None]:
uploaded = files.upload()

filename = "yelp.csv"
if filename not in uploaded:
    if len(uploaded) > 0:
        filename = list(uploaded.keys())[0]
        print(f"Using uploaded file: {filename}")
    else:
        print("No file uploaded. Please rerun this cell and upload 'yelp.csv'.")

In [None]:
API_KEY = "k_b77bff20013e.3KoZgR-_aVbzchPJeWRhLcl5klaSbDKaMfackYjEjEv64QAoLPO9cw"
API_URL = "https://platform.qubrid.com/api/v1/qubridai/chat/completions"
MODEL_NAME = "openai/gpt-oss-120b"

NUM_SAMPLES = 200
SEED = 42

In [None]:
try:
    if filename in uploaded:
        df = pd.read_csv(io.BytesIO(uploaded[filename]))
    elif os.path.exists(filename):
        df = pd.read_csv(filename)
    else:
        raise FileNotFoundError("Dataset not found.")

    print(f"Dataset loaded. Total rows: {len(df)}")
    
    if 'stars' in df.columns:
        sample_df = df.groupby('stars', group_keys=False).apply(lambda x: x.sample(min(len(x), NUM_SAMPLES // 5), random_state=SEED))
        if len(sample_df) < NUM_SAMPLES:
            remaining_n = NUM_SAMPLES - len(sample_df)
            remaining_df = df.drop(sample_df.index).sample(n=remaining_n, random_state=SEED)
            sample_df = pd.concat([sample_df, remaining_df])
    else:
        sample_df = df.sample(n=NUM_SAMPLES, random_state=SEED)
        
    sample_df = sample_df.reset_index(drop=True)
    print(f"Sampled {len(sample_df)} reviews.")
    
except Exception as e:
    print(f"Error loading data: {e}")

### 5. Define Prompts

In [None]:
def get_zero_shot_prompt(review_text):
    return f"""
You are a helpful assistant that analyzes Yelp reviews.
Classify the following review into a star rating (1 to 5).
Return the result in strictly valid JSON format with keys: "predicted_stars" (integer) and "explanation" (string).
Do not output any markdown formatting or extra text, just the specific JSON.

Review: "{review_text}"
"""

def get_few_shot_prompt(review_text):
    return f"""
You are a helpful assistant that analyzes Yelp reviews.
Classify the following review into a star rating (1 to 5).
Return the result in strictly valid JSON format with keys: "predicted_stars" (integer) and "explanation" (string).
Do not output any markdown formatting or extra text, just the specific JSON.

Examples:
Review: "The food was terrible and the service was rude."
JSON: {{"predicted_stars": 1, "explanation": "Negative sentiment regarding both food and service."}}

Review: "It was okay, nothing special but not bad."
JSON: {{"predicted_stars": 3, "explanation": "Neutral sentiment, average experience."}}

Review: "Absolutely amazing! Best pizza I've ever had."
JSON: {{"predicted_stars": 5, "explanation": "Highly positive sentiment, strong praise."}}

Review: "{review_text}"
"""

def get_cot_prompt(review_text):
    return f"""
You are a helpful assistant that analyzes Yelp reviews.
Classify the following review into a star rating (1 to 5).
First, think step-by-step about the sentiment expressed in the review regarding different aspects like food, service, and ambiance.
Then, determine the final rating.
Return the result in strictly valid JSON format with keys: "predicted_stars" (integer) and "explanation" (string).
Do not output any markdown formatting or extra text, just the specific JSON.

Review: "{review_text}"
"""

In [None]:
def call_api(prompt, model=MODEL_NAME):
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json"
    }
    data = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.0,
        "max_tokens": 1024,
        "stream": False
    }
    
    retries = 3
    for i in range(retries):
        try:
            response = requests.post(API_URL, headers=headers, json=data, timeout=30)
            if response.status_code == 200:
                return response.json()
            else:
                time.sleep(2)
        except Exception as e:
            time.sleep(2)
    return None

def parse_response(api_response):
    if not api_response:
        return None, False
    try:
        content = api_response.get('choices', [{}])[0].get('message', {}).get('content')
        if not content:
            content = api_response.get('content')
            
        if not content: return None, False

        content = content.replace("```json", "").replace("```", "").strip()
        data = json.loads(content)
        return data, True
    except:
        return None, False

In [None]:
results = []
prompts_map = {
    "Zero-shot": get_zero_shot_prompt,
    "Few-shot": get_few_shot_prompt,
    "Chain-of-Thought": get_cot_prompt
}
metrics = {strategy: {"correct": 0, "valid_json": 0, "total": 0} for strategy in prompts_map}

print("Starting evaluation...")
for index, row in sample_df.iterrows():
    review_text = row['text']
    actual_stars = row['stars']
    
    row_result = {
        "review_id": row.get('review_id', index),
        "text": review_text,
        "actual_stars": actual_stars
    }

    for strategy_name, prompt_func in prompts_map.items():
        prompt = prompt_func(review_text)
        response = call_api(prompt)
        data, is_valid = parse_response(response)
        
        metrics[strategy_name]["total"] += 1
        row_result[f"{strategy_name}_valid"] = is_valid
        
        if is_valid:
            metrics[strategy_name]["valid_json"] += 1
            p_stars = data.get("predicted_stars")
            row_result[f"{strategy_name}_predicted"] = p_stars
            row_result[f"{strategy_name}_explanation"] = data.get("explanation")
            
            if p_stars == actual_stars:
                metrics[strategy_name]["correct"] += 1
        else:
            row_result[f"{strategy_name}_predicted"] = None
            row_result[f"{strategy_name}_explanation"] = "JSON Parsing Failed"
            
        time.sleep(1)
    
    results.append(row_result)
    if (index + 1) % 10 == 0:
        print(f"Processed {index + 1}/{len(sample_df)} reviews")

results_df = pd.DataFrame(results)
print("Evaluation complete.")

In [None]:
print("Strategy | Accuracy | JSON Validity Rate")
print("--- | --- | ---")
for strategy, m in metrics.items():
    accuracy = (m["correct"] / m["valid_json"]) * 100 if m["valid_json"] > 0 else 0
    validity = (m["valid_json"] / m["total"]) * 100 if m["total"] > 0 else 0
    print(f"{strategy} | {accuracy:.2f}% | {validity:.2f}%")

results_df.to_csv("task1_results.csv", index=False)
files.download("task1_results.csv")