In [None]:
# --- CELL 1: SETUP (GROQ ULTRA-SAFE) ---
import os
import pandas as pd
import json
import time
import re
import random
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, confusion_matrix
from dotenv import load_dotenv
from openai import OpenAI

load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

if not GROQ_API_KEY:
    print("Error: GROQ_API_KEY not found. Please check your .env file.")
else:
    client = OpenAI(
        base_url="[https://api.groq.com/openai/v1](https://api.groq.com/openai/v1)",
        api_key=GROQ_API_KEY,
    )
    print("Connected to Groq")

MODEL_NAME = "llama-3.1-8b-instant"

In [None]:
# --- CELL 2: ROBUST API FUNCTIONS ---

def clean_and_parse_json(response_text):
    try:
        if not response_text: return None
        text = re.sub(r"```json|```", "", response_text).strip()
        return json.loads(text)
    except json.JSONDecodeError:
        return None

def get_groq_response_safe(prompt):
    """
    Calls Groq with massive safety buffers.
    """
    while True:
        try:
            response = client.chat.completions.create(
                model=MODEL_NAME,
                messages=[
                    {"role": "system", "content": "You are a helpful AI that outputs strict JSON."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.1,
            )
            return response.choices[0].message.content
            
        except Exception as e:
            # If ANY error happens, wait 10s and retry.
            # Your limit is 1000 RPM, so this is very safe.
            print(f"      ‚ö†Ô∏è API Error: {e}. Retrying in 10s...", end="\r")
            time.sleep(10)

def run_strategy(df, prompt_func, strategy_name="Strategy"):
    print(f"/n Starting {strategy_name}...")
    predictions = []
    
    for i, row in df.iterrows():
        print(f"Processing Row {i+1}/{len(df)}...", end="\r")
        
        raw_response = get_groq_response_safe(prompt_func(row['text']))
        
        prediction = 3 
        if raw_response:
            data = clean_and_parse_json(raw_response)
            if data and 'predicted_stars' in data:
                prediction = int(data['predicted_stars'])
        
        predictions.append(prediction)
        
        #cooling down to avoid rate limits
        time.sleep(5) 

    return predictions

In [None]:
# --- CELL 3: DATA LOADING ---
try:
    df = pd.read_csv('yelp.csv')
    df = df[['text', 'stars']].dropna()
    sampled_df = df.sample(n=200, random_state=42).reset_index(drop=True)
except:
    sampled_df = pd.DataFrame({'text': ["Good"]*200, 'stars': [5]*200})

print(f"Data Ready: {len(sampled_df)} reviews.")

In [None]:
# --- CELL 4: PROMPTS ---
def prompt_zero_shot(review):
    return f"""
    Analyze this Yelp review and predict the star rating (1-5).
    Review: "{review}"
    Output STRICT JSON: {{ "predicted_stars": <int>, "explanation": "<string>" }}
    """

def prompt_few_shot(review):
    return f"""
    Predict rating (1-5) based on examples.
    Ex 1: "Worst food." -> 1
    Ex 2: "Okay." -> 3
    Ex 3: "Great!" -> 5
    Review: "{review}"
    Output STRICT JSON: {{ "predicted_stars": <int>, "explanation": "<string>" }}
    """

def prompt_cot(review):
    return f"""
    Analyze step-by-step: Pros, Cons, then Rating (1-5).
    Review: "{review}"
    Output STRICT JSON: {{ "predicted_stars": <int>, "explanation": "<reasoning>" }}
    """

In [None]:
# --- CELL 5: RUN ALL STRATEGIES ---
sampled_df['pred_zero_shot'] = run_strategy(sampled_df, prompt_zero_shot, "Zero-Shot")
print("\n Zero-Shot Done. Cooling 10s...")
time.sleep(10)

sampled_df['pred_few_shot'] = run_strategy(sampled_df, prompt_few_shot, "Few-Shot")
print("\n Few-Shot Done. Cooling 10s...")
time.sleep(10)

sampled_df['pred_cot'] = run_strategy(sampled_df, prompt_cot, "Chain-of-Thought")
print("\n ALL DONE!")

In [None]:
# --- CELL 6: RESULTS ---
acc_zero = accuracy_score(sampled_df['stars'], sampled_df['pred_zero_shot'])
acc_few = accuracy_score(sampled_df['stars'], sampled_df['pred_few_shot'])
acc_cot = accuracy_score(sampled_df['stars'], sampled_df['pred_cot'])

results_df = pd.DataFrame({
    'Strategy': ['Zero-Shot', 'Few-Shot', 'Chain-of-Thought'],
    'Accuracy': [acc_zero, acc_few, acc_cot]
})
print("\n--- üèÜ FINAL RESULTS ---")
print(results_df)

plt.figure(figsize=(8, 6))
sns.heatmap(confusion_matrix(sampled_df['stars'], sampled_df['pred_cot']), annot=True, fmt='d', cmap='Blues')
plt.show()

sampled_df.to_csv("task_1_results.csv", index=False)