In [None]:
!pip install pandas requests




In [None]:
import pandas as pd

df = pd.read_csv("yelp.csv")
df.head()


Unnamed: 0,business_id,date,review_id,stars,text,type,user_id,cool,useful,funny
0,9yKzy9PApeiPPOUJEtnvkg,2011-01-26,fWKvX83p0-ka4JS3dc6E5A,5,My wife took me here on my birthday for breakf...,review,rLtl8ZkDX5vH5nAx9C3q5Q,2,5,0
1,ZRJwVLyzEJq1VAihDhYiow,2011-07-27,IjZ33sJrzXqU-0X6U8NwyA,5,I have no idea why some people give bad review...,review,0a2KyEL0d3Yb1V6aivbIuQ,0,0,0
2,6oRAC4uyJCsJl1X0WZpVSA,2012-06-14,IESLBzqUCLdSzSqm0eCSxQ,4,love the gyro plate. Rice is so good and I als...,review,0hT2KtfLiobPvh6cDC8JQg,0,1,0
3,_1QQZuf4zZOyFCvXc0o6Vg,2010-05-27,G-WvGaISbqqaMHlNnByodA,5,"Rosie, Dakota, and I LOVE Chaparral Dog Park!!...",review,uZetl9T0NcROGOyFfughhg,1,2,0
4,6ozycU1RpktNG2-1BroVtw,2012-01-05,1uJFq2r5QfJG_6ExMRCaGw,5,General Manager Scott Petello is a good egg!!!...,review,vYmM4KTsC8ZfQBg-j5MWkw,0,0,0


In [None]:
df = df[["text", "stars"]]
df.head()


Unnamed: 0,text,stars
0,My wife took me here on my birthday for breakf...,5
1,I have no idea why some people give bad review...,5
2,love the gyro plate. Rice is so good and I als...,4
3,"Rosie, Dakota, and I LOVE Chaparral Dog Park!!...",5
4,General Manager Scott Petello is a good egg!!!...,5


In [None]:
df = df.sample(10, random_state=42).reset_index(drop=True)
len(df)


10

In [101]:
from google.colab import userdata

OPENROUTER_API_KEY = userdata.get("OPENROUTER_API_KEY")

if OPENROUTER_API_KEY is None:
    raise ValueError("OPENROUTER_API_KEY not found in Colab secrets")

print("Key starts correctly:", OPENROUTER_API_KEY.startswith("sk-"))
print("Key length:", len(OPENROUTER_API_KEY))


Key starts correctly: True
Key length: 73


In [100]:
import requests
import json

OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
MODEL_NAME = "mistralai/mistral-7b-instruct"

HEADERS = {
    "Authorization": f"Bearer {OPENROUTER_API_KEY}",
    "Content-Type": "application/json",
    "HTTP-Referer": "https://colab.research.google.com",
    "X-Title": "Fynd AI Intern Assignment"
}


In [None]:
def call_llm(prompt):
    data = {
        "model": MODEL_NAME,
        "messages": [
            {"role": "user", "content": prompt}
        ]
    }

    response = requests.post(
        OPENROUTER_URL,
        headers=HEADERS,
        json=data
    )

    result = response.json()

    if "choices" not in result:
        print("Unexpected response:", result)
        return None

    return result["choices"][0]["message"]["content"]


In [None]:
output = call_llm("Say hello and tell me you are working.")
print(output)


 Hello! I'm working and ready to assist you. How can I help you today?


In [None]:
def prompt_v1(review_text):
    return f"""
Predict the star rating (1 to 5) for the following Yelp review.

Review:
"{review_text}"

Return the response strictly in JSON with:
- predicted_stars (number)
- explanation (short text)
"""


In [None]:
test_review = df.loc[0, "text"]

output = call_llm(prompt_v1(test_review))
print(output)


 ```json
{
  "predicted_stars": 4,
  "explanation": "The review highlights positive aspects like well-made pub grub, friendly service, quality cocktails, and a good atmosphere. The mention of a great patio for day-drinking and the overall positive tone suggest a high rating, though the late-night emptiness slightly lowers the score from a perfect 5."
}
```


In [None]:
import json
import re


In [None]:
def extract_json(text):
    try:
        json_str = re.search(r'\{.*\}', text, re.DOTALL).group()
        return json.loads(json_str)
    except:
        return None


In [None]:
raw_output = call_llm(prompt_v1(df.loc[0, "text"]))
parsed_output = extract_json(raw_output)

print("RAW OUTPUT:")
print(raw_output)

print("\nPARSED OUTPUT:")
print(parsed_output)


RAW OUTPUT:
 ```json
{
  "predicted_stars": 4,
  "explanation": "The review highlights positive aspects like good food, friendly service, quality cocktails, and a pleasant atmosphere. The mention of a great patio for day-drinking further adds to the positive experience. The only minor drawback is the location being somewhat commercial, but it doesn't significantly detract from the overall positive rating."
}
```

PARSED OUTPUT:
{'predicted_stars': 4, 'explanation': "The review highlights positive aspects like good food, friendly service, quality cocktails, and a pleasant atmosphere. The mention of a great patio for day-drinking further adds to the positive experience. The only minor drawback is the location being somewhat commercial, but it doesn't significantly detract from the overall positive rating."}


In [None]:
results_v1 = []


In [None]:
for i in range(5):
    review_text = df.loc[i, "text"]
    actual_stars = df.loc[i, "stars"]

    raw_output = call_llm(prompt_v1(review_text))
    parsed = extract_json(raw_output)

    predicted_stars = parsed["predicted_stars"] if parsed else None

    results_v1.append({
        "actual": actual_stars,
        "predicted": predicted_stars
    })

results_v1


[{'actual': np.int64(4), 'predicted': 4},
 {'actual': np.int64(5), 'predicted': 5},
 {'actual': np.int64(3), 'predicted': 4},
 {'actual': np.int64(1), 'predicted': 1},
 {'actual': np.int64(5), 'predicted': 5}]

In [None]:
import pandas as pd

results_v1_df = pd.DataFrame(results_v1)
results_v1_df


Unnamed: 0,actual,predicted
0,4,4
1,5,5
2,3,4
3,1,1
4,5,5


In [None]:
accuracy_v1 = (results_v1_df["actual"] == results_v1_df["predicted"]).mean()
accuracy_v1


np.float64(0.8)

In [None]:
results_v1 = []

for i in range(len(df)):
    review_text = df.loc[i, "text"]
    actual_stars = df.loc[i, "stars"]

    raw_output = call_llm(prompt_v1(review_text))
    parsed = extract_json(raw_output)

    predicted_stars = parsed["predicted_stars"] if parsed else None

    results_v1.append({
        "actual": actual_stars,
        "predicted": predicted_stars
    })

results_v1


[{'actual': np.int64(4), 'predicted': 4},
 {'actual': np.int64(5), 'predicted': 5},
 {'actual': np.int64(3), 'predicted': 3},
 {'actual': np.int64(1), 'predicted': 1},
 {'actual': np.int64(5), 'predicted': 5},
 {'actual': np.int64(4), 'predicted': 4},
 {'actual': np.int64(4), 'predicted': 5},
 {'actual': np.int64(4), 'predicted': 5},
 {'actual': np.int64(5), 'predicted': 5},
 {'actual': np.int64(1), 'predicted': 2}]

In [None]:
results_v1_df = pd.DataFrame(results_v1)

accuracy_v1 = (results_v1_df["actual"] == results_v1_df["predicted"]).mean()
accuracy_v1


np.float64(0.7)

In [None]:
df = pd.read_csv("yelp.csv")
df = df[["text", "stars"]]
df = df.sample(200, random_state=42).reset_index(drop=True)

len(df)


200

In [None]:
def create_batches(df, batch_size=5):
    batches = []
    for i in range(0, len(df), batch_size):
        batches.append(df.iloc[i:i+batch_size])
    return batches

batches = create_batches(df, batch_size=5)
len(batches)


40

In [None]:
def prompt_v1_batch(batch_df):
    prompt = """
You are a Yelp review sentiment classifier.

Task:
- Predict the star rating (1â€“5) based purely on sentiment.
- Be conservative: avoid guessing extreme ratings unless clearly justified.

Return ONLY valid JSON in this format:
[
  {
    "index": 0,
    "predicted_stars": 4,
    "explanation": "short reason"
  }
]

Reviews:
"""
    for idx, row in batch_df.iterrows():
        prompt += f"\n[{idx}] {row['text']}\n"

    return prompt


In [98]:
def extract_json_list(text):
    try:

        start = text.find("[")
        end = text.rfind("]") + 1

        if start == -1 or end == -1:
            return None

        json_str = text[start:end]
        return json.loads(json_str)
    except:
        return None


In [None]:
results_v1 = []

for batch in batches:
    raw_output = call_llm(prompt_v1_batch(batch))
    parsed = extract_json_list(raw_output)

    if parsed is None:
        continue

    for item in parsed:
        idx = item["index"]
        predicted = item["predicted_stars"]
        actual = df.loc[idx, "stars"]

        results_v1.append({
            "actual": actual,
            "predicted": predicted
        })


In [None]:
results_v1_df = pd.DataFrame(results_v1)

accuracy_v1 = (results_v1_df["actual"] == results_v1_df["predicted"]).mean()
accuracy_v1 = round(float(accuracy_v1), 2)
accuracy_v1


0.33

In [None]:
def prompt_v2_batch(batch_df):
    prompt = """
You are a Yelp review rating classifier.

Use the following rating rubric STRICTLY:
1 star = Very negative, strong complaints, poor experience
2 stars = Mostly negative, several issues
3 stars = Mixed or neutral, pros and cons
4 stars = Mostly positive, minor issues
5 stars = Very positive, no complaints

For EACH review below:
- Assign a star rating from 1 to 5 using the rubric
- Provide a brief explanation

Return ONLY valid JSON in this format:
[
  {
    "index": 0,
    "predicted_stars": 4,
    "explanation": "short reason"
  }
]

Reviews:
"""
    for idx, row in batch_df.iterrows():
        prompt += f"\n[{idx}] {row['text']}\n"

    return prompt


In [None]:
results_v2 = []

for batch in batches:
    raw_output = call_llm(prompt_v2_batch(batch))
    parsed = extract_json_list(raw_output)

    if parsed is None:
        continue

    for item in parsed:
        idx = item["index"]
        predicted = item["predicted_stars"]
        actual = df.loc[idx, "stars"]

        results_v2.append({
            "actual": actual,
            "predicted": predicted
        })


In [None]:
results_v2_df = pd.DataFrame(results_v2)

accuracy_v2 = (results_v2_df["actual"] == results_v2_df["predicted"]).mean()
accuracy_v2 = round(float(accuracy_v2), 2)
accuracy_v2


0.42

In [None]:
def prompt_v3_batch(batch_df):
    prompt = """
You are a Yelp review rating classifier.

Below are examples of correctly rated reviews.

Example 1:
Review: "The food was terrible and the service was rude. I will not come back."
Rating: 1

Example 2:
Review: "The place was okay. Some dishes were good, others were disappointing."
Rating: 3

Example 3:
Review: "Amazing food, friendly staff, and great ambiance. Loved everything."
Rating: 5

Now, rate the following reviews using the same logic.

Return ONLY valid JSON in this format:
[
  {
    "index": 0,
    "predicted_stars": 4,
    "explanation": "short reason"
  }
]

Reviews:
"""
    for idx, row in batch_df.iterrows():
        prompt += f"\n[{idx}] {row['text']}\n"

    return prompt


In [None]:
results_v3 = []

for batch in batches:
    raw_output = call_llm(prompt_v3_batch(batch))
    parsed = extract_json_list(raw_output)

    if parsed is None:
        continue

    for item in parsed:
        idx = item["index"]
        results_v3.append({
            "actual": df.loc[idx, "stars"],
            "predicted": item["predicted_stars"]
        })


In [None]:
results_v3_df = pd.DataFrame(results_v3)

accuracy_v3 = (results_v3_df["actual"] == results_v3_df["predicted"]).mean()
accuracy_v3 = round(float(accuracy_v3), 2)
accuracy_v3


0.6

In [None]:
import pandas as pd

comparison_df = pd.DataFrame({
    "Prompt Version": ["Prompt v1 ", "Prompt v2 ", "Prompt v3 "],
    "Accuracy": [accuracy_v1, accuracy_v2, accuracy_v3]
})

comparison_df


Unnamed: 0,Prompt Version,Accuracy
0,Prompt v1,0.33
1,Prompt v2,0.42
2,Prompt v3,0.6
