<a href="https://colab.research.google.com/github/MrPrabhathPeri/AI-Feedback-System/blob/main/AI_Automated_Feedback_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# --- 1. INSTALL GROQ CLIENT ---
!pip install -q groq
from google.colab import userdata

import os
from groq import Groq
import pandas as pd
import json
import time
from sklearn.metrics import accuracy_score

# --- 2. SETUP (3 SEPARATE KEYS) ---
print("üîë Loading 3 separate API keys...")

try:
    # Key for Zero Shot
    KEY_1 = userdata.get('GROQ_API_KEY_1')
    client_1 = Groq(api_key=KEY_1)

    # Key for Few Shot
    KEY_2 = userdata.get('GROQ_API_KEY_2')
    client_2 = Groq(api_key=KEY_2)

    # Key for Chain of Thought
    KEY_3 = userdata.get('GROQ_API_KEY_3')
    client_3 = Groq(api_key=KEY_3)

    print("‚úÖ Successfully loaded all 3 API keys.")
except Exception as e:
    print(f"‚ùå Error loading keys. Make sure GROQ_API_KEY_1, _2, and _3 are in Secrets.")
    # Stop execution if keys are missing to avoid errors later
    raise e

# MODEL NAME
MODEL_NAME = "llama-3.3-70b-versatile"
print(f"üöÄ Using Groq Model: {MODEL_NAME}")

# --- 3. DATA LOAD ---
try:
    full_df = pd.read_csv('yelp.csv')
    full_df['stars'] = pd.to_numeric(full_df['stars'], errors='coerce').fillna(0).astype(int)

    # Run on 200 rows
    df = full_df.sample(n=200, random_state=42).reset_index(drop=True)
    print(f"‚úÖ Loaded dataset. Running on {len(df)} rows.")
except Exception as e:
    print(f"‚ùå Error loading file: {e}")
    df = pd.DataFrame([{"text": "Good", "stars": 5}, {"text": "Bad", "stars": 1}])

# --- 4. HELPER FUNCTION (Accepts specific client) ---
def get_groq_prediction(client, prompt):
    try:
        completion = client.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {"role": "system", "content": "You are a helpful assistant that outputs ONLY JSON."},
                {"role": "user", "content": prompt}
            ],
            temperature=0,
            response_format={"type": "json_object"}
        )
        return completion.choices[0].message.content
    except Exception as e:
        print(f"   ‚ö†Ô∏è API Error: {e}")
        time.sleep(2)
        return None

# --- 5. PROMPTS ---

def get_prompt_zero_shot(review_text):
    return f"""
    You are an expert Review Sentiment Classifier. Your task is to analyze the user's review and assign a precise star rating (1-5).

    Rating Guide:
    - 1 Star: Completely negative experience, angry customer, severe issues.
    - 2 Stars: Mostly negative, but one redeemable quality.
    - 3 Stars: Mixed feelings. Good food but bad service, or average experience.
    - 4 Stars: Very good, but not perfect.
    - 5 Stars: Outstanding, highly recommended, no complaints.

    Output STRICT JSON format: {{"predicted_stars": int, "explanation": "string"}}

    Review: "{review_text}"
    """

def get_prompt_few_shot(review_text):
    return f"""
    You are a Yelp Rating AI. Classify the review into 1-5 stars based on these examples.

    Examples:
    Input: "The waiter was rude and the food was cold. I'm never coming back!"
    Output: {{"predicted_stars": 1, "explanation": "Customer expresses anger about both service and food quality. Strong negative sentiment."}}

    Input: "The burger was tasty, but we had to wait 45 minutes for a table. It was just okay."
    Output: {{"predicted_stars": 3, "explanation": "Mixed sentiment. Positive food comment is outweighed by negative service experience."}}

    Input: "Absolutely incredible! The steak was cooked perfectly and the ambiance was lovely."
    Output: {{"predicted_stars": 5, "explanation": "Glowing review with no negatives mentioned. High enthusiasm."}}

    Task:
    Input: "{review_text}"
    Output:
    """

def get_prompt_cot(review_text):
    return f"""
    Analyze the following review using a Step-by-Step Chain of Thought process.

    Steps:
    1. Identify positive mentions (e.g., food taste, ambiance).
    2. Identify negative mentions (e.g., wait time, rude staff).
    3. Weigh the positives against the negatives to determine the final score.

    Return the final rating in JSON format: {{"predicted_stars": int, "explanation": "Detailed reasoning based on the steps above."}}

    Review: "{review_text}"
    """


# --- 6. EXPERIMENT ENGINE (Accepts specific client) ---
def run_experiment(name, prompt_func, dataset, specific_client):
    results = []
    print(f"\nüöÄ Running {name}...")

    for i, row in dataset.iterrows():
        # Pass the specific client (key) for this experiment
        json_response = get_groq_prediction(specific_client, prompt_func(row['text']))

        valid = False
        pred = 0
        explanation = "N/A"

        if json_response:
            try:
                parsed = json.loads(json_response)
                pred = int(parsed['predicted_stars'])
                explanation = parsed.get('explanation', "No explanation found")
                valid = True
            except:
                pass

        results.append({
            "review_text": row['text'],
            "actual": int(row['stars']),
            "predicted": pred,
            "explanation": explanation,
            "valid_json": valid
        })

        if i % 10 == 0: print(f"   Processed {i+1}/{len(dataset)}...", end="\r")
        time.sleep(0.3)

    results_df = pd.DataFrame(results)

    # Calculate Accuracy
    valid_df = results_df[results_df['valid_json'] == True]
    if not valid_df.empty:
        acc = accuracy_score(valid_df['actual'], valid_df['predicted'])
    else:
        acc = 0.0

    validity = results_df['valid_json'].mean()

    print(f"\n‚úÖ {name} Done. Accuracy: {acc:.2%} | Validity: {validity:.2%}")
    return results_df, acc, validity

# --- 7. EXECUTE (Using 3 different keys) ---

# Experiment 1: Zero Shot using Key 1
res_zero, acc_zero, val_zero = run_experiment("Zero Shot", get_prompt_zero_shot, df, client_1)

# Experiment 2: Few Shot using Key 2
res_few, acc_few, val_few = run_experiment("Few Shot", get_prompt_few_shot, df, client_2)

# Experiment 3: CoT using Key 3
res_cot, acc_cot, val_cot = run_experiment("Chain of Thought", get_prompt_cot, df, client_3)

# Save Files
res_zero.to_csv("results_zero_shot.csv", index=False)
res_few.to_csv("results_few_shot.csv", index=False)
res_cot.to_csv("results_cot.csv", index=False)

comparison_df = pd.DataFrame({
    "Approach": ["Zero Shot", "Few Shot", "Chain of Thought"],
    "Accuracy": [acc_zero, acc_few, acc_cot],
    "JSON Validity": [val_zero, val_few, val_cot]
})
comparison_df.to_csv("final_comparison.csv", index=False)

print("\nüìä FINAL SCORES")
print(comparison_df)
print("\n‚úÖ DONE! Download the CSV files.")

üîë Loading 3 separate API keys...
‚úÖ Successfully loaded all 3 API keys.
üöÄ Using Groq Model: llama-3.3-70b-versatile
‚úÖ Loaded dataset. Running on 200 rows.

üöÄ Running Zero Shot...

‚úÖ Zero Shot Done. Accuracy: 63.50% | Validity: 100.00%

üöÄ Running Few Shot...

‚úÖ Few Shot Done. Accuracy: 60.50% | Validity: 100.00%

üöÄ Running Chain of Thought...

‚úÖ Chain of Thought Done. Accuracy: 59.50% | Validity: 100.00%

üìä FINAL SCORES
           Approach  Accuracy  JSON Validity
0         Zero Shot     0.635            1.0
1          Few Shot     0.605            1.0
2  Chain of Thought     0.595            1.0

‚úÖ DONE! Download the CSV files.
