<a href="https://colab.research.google.com/github/Griffin2005/FyndAssignment/blob/main/Task1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q pandas tqdm requests


In [2]:
import pandas as pd
import json
import requests
from tqdm import tqdm

In [3]:
df = pd.read_csv("yelp.csv")
df_sample = df.sample(n=200, random_state=42).reset_index(drop=True)

In [11]:
from google.colab import userdata

OPENROUTER_API_KEY = userdata.get("OPENROUTER_API_KEY")

In [12]:
def predict_rating(prompt, review_text):
    full_prompt = prompt.format(review_text=review_text)

    headers = {
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        "Content-Type": "application/json"
    }

    data = {
        "model": "mistralai/mistral-7b-instruct",  # FREE MODEL
        "messages": [
            {"role": "user", "content": full_prompt}
        ]
    }

    response = requests.post(
        "https://openrouter.ai/api/v1/chat/completions",
        headers=headers,
        json=data
    )

    return response.json()["choices"][0]["message"]["content"]

In [13]:
prompt_v1 = """
Read the Yelp review below and predict the star rating from 1 to 5.

Return output strictly in JSON:
{{
  "predicted_stars": number,
  "explanation": "short reason"
}}

Review:
"{review_text}"
"""

In [33]:
def clean_json_output(text):
    # Remove code block markers if present
    text = text.strip()

    if text.startswith("```"):
        text = text.replace("```json", "").replace("```", "").strip()

    return text

In [39]:
results_v1 = []

for i in tqdm(range(len(df_sample))):
    review = df_sample.loc[i, "text"]
    actual = df_sample.loc[i, "stars"]

    output = predict_rating(prompt_v1, review)

    try:
        cleaned_output = clean_json_output(output)
        parsed = json.loads(cleaned_output)

        predicted = parsed["predicted_stars"]
        valid_json = True
    except:
        predicted = None
        valid_json = False

    results_v1.append({
        "actual": actual,
        "predicted": predicted,
        "valid_json": valid_json
    })

100%|██████████| 200/200 [00:55<00:00,  3.59it/s]


In [40]:
correct = sum(
    1 for r in results_v1
    if r["predicted"] == r["actual"]
)

accuracy_v1 = correct / len(results_v1)
accuracy_v1

0.01

In [41]:
json_validity_v1 = sum(
    1 for r in results_v1 if r["valid_json"]
) / len(results_v1)

json_validity_v1

0.015

In [42]:
valid_preds = [r for r in results_v1 if r["valid_json"]]

accuracy_valid_only = sum(
    1 for r in valid_preds if r["predicted"] == r["actual"]
) / len(valid_preds)

accuracy_valid_only

0.6666666666666666

In [18]:
prompt_v2 = """
You are a strict Yelp review rating classifier.

Your task:
- Read the review
- Assign a star rating from 1 to 5 based on sentiment

Rating guidelines:
1 star: Extremely negative experience, strong complaints
2 stars: Mostly negative, some minor positives
3 stars: Mixed or neutral feedback
4 stars: Mostly positive experience
5 stars: Very positive, enthusiastic praise

IMPORTANT RULES:
- Return ONLY valid JSON
- Do NOT add any extra text
- Do NOT explain outside the JSON

Output format:
{{
  "predicted_stars": number,
  "explanation": "one short sentence"
}}

Review:
"{review_text}"
"""

In [44]:
results_v2 = []

for i in tqdm(range(len(df_sample))):
    review = df_sample.loc[i, "text"]
    actual = df_sample.loc[i, "stars"]

    output = predict_rating(prompt_v2, review)

    try:
        cleaned_output = clean_json_output(output)
        parsed = json.loads(cleaned_output)


        predicted = parsed["predicted_stars"]
        valid_json = True
    except:
        predicted = None
        valid_json = False

    results_v2.append({
        "actual": actual,
        "predicted": predicted,
        "valid_json": valid_json
    })

100%|██████████| 200/200 [01:58<00:00,  1.69it/s]


In [46]:
accuracy_v2 = sum(
    1 for r in results_v2 if r["predicted"] == r["actual"]
) / len(results_v2)

accuracy_v2

0.055

In [47]:
json_validity_v2 = sum(
    1 for r in results_v2 if r["valid_json"]
) / len(results_v2)

json_validity_v2

0.085

In [48]:
valid_v2 = [r for r in results_v2 if r["valid_json"]]

accuracy_v2_valid = sum(
    1 for r in valid_v2 if r["predicted"] == r["actual"]
) / len(valid_v2)

accuracy_v2_valid

0.6470588235294118

In [24]:
prompt_v3 = """
You are a classification system, not a chatbot.

Follow these steps internally:
1. Analyze the sentiment of the review.
2. Decide the most appropriate star rating (1 to 5).
3. Prepare the final answer.

CRITICAL OUTPUT RULES:
- Output ONLY the final answer
- Output MUST be valid JSON
- Do NOT include any extra text
- Do NOT include step explanations

The JSON format MUST be:
{{
  "predicted_stars": number,
  "explanation": "very brief reason"
}}

Review:
"{review_text}"
"""

In [45]:
results_v3 = []

for i in tqdm(range(len(df_sample))):
    review = df_sample.loc[i, "text"]
    actual = df_sample.loc[i, "stars"]

    output = predict_rating(prompt_v3, review)

    try:
        cleaned_output = clean_json_output(output)
        parsed = json.loads(cleaned_output)


        predicted = parsed["predicted_stars"]
        valid_json = True
    except:
        predicted = None
        valid_json = False

    results_v3.append({
        "actual": actual,
        "predicted": predicted,
        "valid_json": valid_json
    })

100%|██████████| 200/200 [01:56<00:00,  1.72it/s]


In [49]:
accuracy_v3 = sum(
    1 for r in results_v3 if r["predicted"] == r["actual"]
) / len(results_v3)

accuracy_v3

0.035

In [50]:
json_validity_v3 = sum(
    1 for r in results_v3 if r["valid_json"]
) / len(results_v3)

json_validity_v3

0.065

In [51]:
valid_v3 = [r for r in results_v3 if r["valid_json"]]

accuracy_v3_valid = sum(
    1 for r in valid_v3 if r["predicted"] == r["actual"]
) / len(valid_v3)

accuracy_v3_valid

0.5384615384615384