In [1]:
!pip install google-generativeai tqdm pandas
!pip install --upgrade google-generativeai




In [3]:
import os, requests
from google.colab import userdata
key = userdata.get('GEMINI_API_KEY') or os.environ.get('GEMINI_API_KEY') or "AIzaSyCu3yinjDFSLXpwmrjxP3941Tfo1aSyEY4"
url = "https://generativelanguage.googleapis.com/v1/models"
r = requests.get(url, params={"key": key}, timeout=20)
print("status:", r.status_code)
print(r.text[:2000])


status: 200
{
  "models": [
    {
      "name": "models/gemini-2.5-flash",
      "version": "001",
      "displayName": "Gemini 2.5 Flash",
      "description": "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
      "inputTokenLimit": 1048576,
      "outputTokenLimit": 65536,
      "supportedGenerationMethods": [
        "generateContent",
        "countTokens",
        "createCachedContent",
        "batchGenerateContent"
      ],
      "temperature": 1,
      "topP": 0.95,
      "topK": 64,
      "maxTemperature": 2,
      "thinking": true
    },
    {
      "name": "models/gemini-2.5-pro",
      "version": "2.5",
      "displayName": "Gemini 2.5 Pro",
      "description": "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
      "inputTokenLimit": 1048576,
      "outputTokenLimit": 65536,
      "supportedGenerationMethods": [
        "generateContent",
        "countTokens",
        "createCachedCo

In [4]:
import os, requests
from google.colab import userdata

key = userdata.get('GEMINI_API_KEY') or os.environ.get('GEMINI_API_KEY')
print("Got key in environment/secrets:", bool(key))

url = "https://generativelanguage.googleapis.com/v1/models"
r = requests.get(url, params={"key": key}, timeout=20)
print("HTTP status:", r.status_code)
print("Body (first 1000 chars):\n", r.text[:1000])


Got key in environment/secrets: True
HTTP status: 200
Body (first 1000 chars):
 {
  "models": [
    {
      "name": "models/gemini-2.5-flash",
      "version": "001",
      "displayName": "Gemini 2.5 Flash",
      "description": "Stable version of Gemini 2.5 Flash, our mid-size multimodal model that supports up to 1 million tokens, released in June of 2025.",
      "inputTokenLimit": 1048576,
      "outputTokenLimit": 65536,
      "supportedGenerationMethods": [
        "generateContent",
        "countTokens",
        "createCachedContent",
        "batchGenerateContent"
      ],
      "temperature": 1,
      "topP": 0.95,
      "topK": 64,
      "maxTemperature": 2,
      "thinking": true
    },
    {
      "name": "models/gemini-2.5-pro",
      "version": "2.5",
      "displayName": "Gemini 2.5 Pro",
      "description": "Stable release (June 17th, 2025) of Gemini 2.5 Pro",
      "inputTokenLimit": 1048576,
      "outputTokenLimit": 65536,
      "supportedGenerationMethods": [
     

In [5]:
import pandas as pd
from tqdm import tqdm
import json
import google.generativeai as genai
import os
from google.colab import userdata

key = userdata.get('GEMINI_API_KEY') or os.environ.get('GEMINI_API_KEY') or "AIzaSyCu3yinjDFSLXpwmrjxP3941Tfo1aSyEY4"
genai.configure(api_key=key)

df = pd.read_csv("/content/yelp.csv")


df = df.sample(3, random_state=42).reset_index(drop=True)

TEXT = "text"
STARS = "stars"

In [6]:
def prompt_v1(review):
    return f"""
Rate this Yelp review from 1-5.

Return ONLY valid JSON:
{{
  "predicted_stars": <1-5>,
  "explanation": "<brief reason>"
}}

Review: "{review}"
"""

In [7]:
def prompt_v2(review):
    return f"""
Analyze this Yelp review step-by-step.

1. Identify sentiment.
2. Identify positive/negative keywords.
3. Decide a star rating (1-5).
4. Return ONLY valid JSON.

JSON format:
{{
  "predicted_stars": <1-5>,
  "explanation": "<why>"
}}

Review: "{review}"
"""

In [8]:
def prompt_v3(review):
    return f"""
You are an expert sentiment classifier.

Here are examples:

Example 1:
Review: "Terrible service and cold food."
Output: {{"predicted_stars": 1, "explanation": "Very bad experience"}}

Example 2:
Review: "Amazing food! Loved the ambience."
Output: {{"predicted_stars": 5, "explanation": "Highly positive sentiment"}}

Now classify the next review.

Return valid JSON only.

Review: "{review}"
"""

In [9]:
import re

def call_llm(prompt):
    response = genai.GenerativeModel("models/gemini-2.5-flash").generate_content(prompt)
    text = response.text


    json_match = re.search(r'```json\n(.*?)```', text, re.DOTALL)
    if json_match:
        json_string = json_match.group(1)
    else:

        json_string = text.strip()

    try:
        data = json.loads(json_string)
        return data, True
    except:
        return {"predicted_stars": None, "explanation": "JSON parse failed"}, False


def evaluate_prompt(df, prompt_function):
    predictions = []
    json_validity = 0

    for r in tqdm(df[TEXT]):
        prompt = prompt_function(r)
        output, valid = call_llm(prompt)

        predictions.append(output["predicted_stars"])
        if valid:
            json_validity += 1

    df["predicted"] = predictions
    accuracy = (df["predicted"] == df[STARS]).mean()

    return accuracy, json_validity / len(df)

In [None]:
acc1, json1 = evaluate_prompt(df.head(10).copy(), prompt_v1)
acc2, json2 = evaluate_prompt(df.head(10).copy(), prompt_v2)
acc3, json3 = evaluate_prompt(df.head(10).copy(), prompt_v3)

results = pd.DataFrame({
    "Prompt Version": ["Direct", "Chain-of-Thought", "Few-Shot"],
    "Accuracy": [acc1, acc2, acc3],
    "JSON Validity": [json1, json2, json3]
})

print(results.to_json(orient='records', indent=4))

In [15]:

SAMPLE_SIZE = 200

df_sample = df.sample(n=min(SAMPLE_SIZE, len(df)), random_state=42).reset_index(drop=True)

clean_outputs = []


def get_clean_output(prompt):
    """Returns clean JSON output parsed from LLM response."""
    response = genai.GenerativeModel("gemini-2.0-flash").generate_content(prompt)
    text = response.text


    json_match = re.search(r"```json\s*(\{.*?\})\s*```", text, re.DOTALL)

    if json_match:
        json_str = json_match.group(1)
    else:

        start = text.find("{")
        end = text.rfind("}") + 1
        json_str = text[start:end] if start != -1 else text

    try:
        return json.loads(json_str)
    except:
        return {"predicted_stars": None, "explanation": "JSON parse failed"}



for i in range(len(df_sample)):
    review = df_sample.loc[i, "text"]
    prompt = prompt_v1(review)

    result = get_clean_output(prompt)
    clean_outputs.append(result)

    print(f"\n=== Sample {i+1} ===")
    print(json.dumps(result, indent=4))


=== Sample 1 ===
{
    "predicted_stars": 4,
    "explanation": "Positive comments on food, service, cocktails, and patio, with only minor negativity regarding late-night emptiness."
}

=== Sample 2 ===
{
    "predicted_stars": 5,
    "explanation": "Extremely positive review referencing authenticity and high quality compared to Louisiana food."
}

=== Sample 3 ===
{
    "predicted_stars": 4,
    "explanation": "The reviewer uses positive language like 'good', 'filling', and 'hits the spot', and indicates a regular positive experience ('every friday'). While noting it's 'typical strip mall pizza', the overall sentiment is positive."
}


In [17]:

SAMPLE_SIZE = 200

df_sample = df.sample(n=min(SAMPLE_SIZE, len(df)), random_state=42).reset_index(drop=True)

clean_outputs = []


def get_clean_output(prompt):
    """Returns clean JSON output parsed from LLM response."""
    response = genai.GenerativeModel("gemini-2.0-flash").generate_content(prompt)
    text = response.text


    json_match = re.search(r"```json\s*(\{.*?\})\s*```", text, re.DOTALL)

    if json_match:
        json_str = json_match.group(1)
    else:

        start = text.find("{")
        end = text.rfind("}") + 1
        json_str = text[start:end] if start != -1 else text

    try:
        return json.loads(json_str)
    except:
        return {"predicted_stars": None, "explanation": "JSON parse failed"}



for i in range(len(df_sample)):
    review = df_sample.loc[i, "text"]
    prompt = prompt_v2(review)

    result = get_clean_output(prompt)
    clean_outputs.append(result)

    print(f"\n=== Sample {i+1} ===")
    print(json.dumps(result, indent=4))


=== Sample 1 ===
{
    "predicted_stars": 4,
    "explanation": "The review expresses mixed feelings. While the reviewer notes the place was empty, they praise the food ('well made pub grub'), service ('friendly'), and cocktails ('quality'). The mention of the atmosphere working for a sports bar is positive. The added update about the patio is also a strong positive. The only real negative is the emptiness late at night, which is understood in the context of the location. Overall, the positive aspects outweigh the negative, suggesting a 4-star rating."
}

=== Sample 2 ===
{
    "predicted_stars": 5,
    "explanation": "The review expresses a highly positive sentiment. The phrase \"best she's had outside of Louisiana\" indicates that the crawfish etouffee is exceptionally good and comparable to authentic Louisiana cuisine, which is high praise."
}

=== Sample 3 ===
{
    "predicted_stars": 4,
    "explanation": "The review expresses a positive sentiment. Keywords like \"good,\" \"filli

In [18]:

SAMPLE_SIZE = 200

df_sample = df.sample(n=min(SAMPLE_SIZE, len(df)), random_state=42).reset_index(drop=True)

clean_outputs = []


def get_clean_output(prompt):
    """Returns clean JSON output parsed from LLM response."""
    response = genai.GenerativeModel("gemini-2.0-flash").generate_content(prompt)
    text = response.text


    json_match = re.search(r"```json\s*(\{.*?\})\s*```", text, re.DOTALL)

    if json_match:
        json_str = json_match.group(1)
    else:

        start = text.find("{")
        end = text.rfind("}") + 1
        json_str = text[start:end] if start != -1 else text

    try:
        return json.loads(json_str)
    except:
        return {"predicted_stars": None, "explanation": "JSON parse failed"}



for i in range(len(df_sample)):
    review = df_sample.loc[i, "text"]
    prompt = prompt_v3(review)

    result = get_clean_output(prompt)
    clean_outputs.append(result)

    print(f"\n=== Sample {i+1} ===")
    print(json.dumps(result, indent=4))


=== Sample 1 ===
{
    "predicted_stars": 4,
    "explanation": "Mostly positive review highlighting friendly service, good food and cocktails, and a good patio. The only negative is it being empty late at night, but this is explained by the location."
}

=== Sample 2 ===
{
    "predicted_stars": 5,
    "explanation": "Extremely positive review, especially considering the comparison to Louisiana crawfish etouffee."
}

=== Sample 3 ===
{
    "predicted_stars": 4,
    "explanation": "Generally positive sentiment. The reviewer eats there regularly and enjoys the food and drink. While it's described as 'typical', the overall impression is favorable."
}
