In [None]:
import os
import json
import pandas as pd
import numpy as np
import time
import warnings
from openai import OpenAI
from tqdm import tqdm
from IPython.display import display

warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=RuntimeWarning)

# 1. Set the OpenRouter API Key and Base URL
OPENROUTER_API_KEY = "Configure your OpenRouter API key here"
OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"

# 2. Define the OPENROUTER Client
client = OpenAI(
    base_url=OPENROUTER_BASE_URL,
    api_key=OPENROUTER_API_KEY
)

# 3. USE MISTRAL 7B WITHOUT THE ':free' SUFFIX
# This should work reliably once your quota/credits are available.
LLM_MODEL = "mistralai/mistral-7b-instruct" 

print(f"‚úÖ Setup complete. OpenRouter Client initialized using stable model: {LLM_MODEL}")

‚úÖ Setup complete. OpenRouter Client initialized using stable model: mistralai/mistral-7b-instruct


In [112]:
import pandas as pd
import numpy as np
import warnings
from tqdm import tqdm

warnings.filterwarnings('ignore', category=FutureWarning)

# ‚úÖ CONFIRMED CORRECT FILE PATH from the diagnostic:
FILE_PATH = '/kaggle/input/yelp-file/yelp.csv' 
SAMPLE_SIZE = 200

df_sample = pd.DataFrame() 

def load_data_robustly(file_path):
    """Attempts to load data using comma, then tab, then pipe, then semicolon delimiters."""
    delimiters_to_try = [',', '\t', '|', ';']
    df = None
    
    print(f"Attempting to load data at: {file_path}...")

    for sep in delimiters_to_try:
        try:
            print(f"  -> Trying delimiter: '{sep}'")
            # Using on_bad_lines='skip' handles corrupt lines
            df = pd.read_csv(file_path, sep=sep, on_bad_lines='skip', encoding='utf-8')
            
            # Check for reasonable column count and rows
            if len(df.columns) > 1 and not df.empty:
                return df, sep
            
        except Exception:
            pass # Silently continue if loading fails

    return None, None

try:
    df_full, successful_sep = load_data_robustly(FILE_PATH)
    
    if df_full is None:
        raise ValueError("Failed to load DataFrame with all attempted delimiters. Check file integrity.")

    print(f"‚úÖ Data loaded successfully using delimiter: '{successful_sep}'")
    
    # Check for required columns ('text' and 'stars' are the standard names)
    if 'text' in df_full.columns and 'stars' in df_full.columns:
        # Sample and rename the columns for the prediction phase
        df_sample = df_full[['text', 'stars']].sample(n=SAMPLE_SIZE, random_state=42).reset_index(drop=True)
        df_sample.rename(columns={'stars': 'actual_stars'}, inplace=True)
        
        print(f"Total rows: {len(df_full)} | Sampled rows: {len(df_sample)}")
        print("\nSampled Data Head (Ready for prediction):")
        print(df_sample.head())

    else:
        print("‚ùå ERROR: Required columns ('text' and 'stars') not found after loading.")
        print("Available columns:", df_full.columns.tolist())
        
except Exception as e:
    print(f"‚ùå FATAL ERROR: Data loading failed. Error: {e}")

Attempting to load data at: /kaggle/input/yelp-file/yelp.csv...
  -> Trying delimiter: ','
‚úÖ Data loaded successfully using delimiter: ','
Total rows: 10000 | Sampled rows: 200

Sampled Data Head (Ready for prediction):
                                                text  actual_stars
0  We got here around midnight last Friday... the...             4
1  Brought a friend from Louisiana here.  She say...             5
2  Every friday, my dad and I eat here. We order ...             3
3  My husband and I were really, really disappoin...             1
4  Love this place!  Was in phoenix 3 weeks for w...             5


In [113]:
import pandas as pd
import numpy as np
import warnings
from tqdm import tqdm

warnings.filterwarnings('ignore', category=FutureWarning)

# ‚úÖ CONFIRMED CORRECT FILE PATH:
FILE_PATH = '/kaggle/input/yelp-file/yelp.csv' 
SAMPLE_SIZE = 200

df_sample = pd.DataFrame() 

def load_data_robustly(file_path):
    """Attempts to load data using comma, then tab, then pipe, then semicolon delimiters."""
    delimiters_to_try = [',', '\t', '|', ';'] # Added semicolon
    df = None
    
    print(f"Attempting to load data at: {file_path}...")

    for sep in delimiters_to_try:
        try:
            print(f"  -> Trying delimiter: '{sep}'")
            # Using on_bad_lines='skip' handles corrupt lines
            df = pd.read_csv(file_path, sep=sep, on_bad_lines='skip', encoding='utf-8')
            
            # Check for reasonable column count and rows
            if len(df.columns) > 1 and not df.empty:
                return df, sep
            
        except Exception:
            pass # Silently continue if loading fails

    return None, None

try:
    df_full, successful_sep = load_data_robustly(FILE_PATH)
    
    if df_full is None:
        raise ValueError("Failed to load DataFrame with all attempted delimiters. Please check file integrity.")

    print(f"‚úÖ Data loaded successfully using delimiter: '{successful_sep}'")
    
    # Check for required columns ('text' and 'stars' are the standard names)
    if 'text' in df_full.columns and 'stars' in df_full.columns:
        # Sample and rename the columns for the prediction phase
        df_sample = df_full[['text', 'stars']].sample(n=SAMPLE_SIZE, random_state=42).reset_index(drop=True)
        df_sample.rename(columns={'stars': 'actual_stars'}, inplace=True)
        
        print(f"Total rows: {len(df_full)} | Sampled rows: {len(df_sample)}")
        print("\nSampled Data Head (Ready for prediction):")
        print(df_sample.head())

    else:
        # This handles the case where the data loaded but the columns are named differently (e.g., 'Review' and 'Rating')
        print("‚ùå ERROR: Required columns ('text' and 'stars') not found after loading.")
        print("Available columns:", df_full.columns.tolist())
        print("You must rename the correct columns (e.g., df_full.rename(columns={'Review': 'text', 'Rating': 'stars'}, inplace=True))")
        
except Exception as e:
    print(f"‚ùå FATAL ERROR: Data loading failed. Error: {e}")

Attempting to load data at: /kaggle/input/yelp-file/yelp.csv...
  -> Trying delimiter: ','
‚úÖ Data loaded successfully using delimiter: ','
Total rows: 10000 | Sampled rows: 200

Sampled Data Head (Ready for prediction):
                                                text  actual_stars
0  We got here around midnight last Friday... the...             4
1  Brought a friend from Louisiana here.  She say...             5
2  Every friday, my dad and I eat here. We order ...             3
3  My husband and I were really, really disappoin...             1
4  Love this place!  Was in phoenix 3 weeks for w...             5


In [114]:
# --- 2. Main Processing Function (OpenRouter Implementation) ---
def predict_rating(review_text, prompt_function, strategy_name):
    """Sends a single review to the OpenRouter API and processes the response."""
    
    prompt = prompt_function(review_text)
    
    try:
        # Use ChatCompletion with strict JSON mode enabled
        response = client.chat.completions.create(
            model=LLM_MODEL,
            messages=[
                {"role": "system", "content": "You are a Yelp review rating model. Respond strictly in the required JSON format."},
                {"role": "user", "content": prompt}
            ],
            response_format={"type": "json_object"},
            temperature=0.0
        )

        json_str = response.choices[0].message.content.strip()

        try:
            result = json.loads(json_str)
            
            # üö® FIX: Model outputs 'rating', so we check for both 'rating' and 'predicted_stars'
            pred_stars = result.get('rating') or result.get('predicted_stars')
            explanation = result.get('explanation', '')
            
            # If the model included the review text, extract the rating from that output.
            if pred_stars is None and 'review' in result and 'rating' in result:
                 pred_stars = result.get('rating') 


            if isinstance(pred_stars, int) and 1 <= pred_stars <= 5:
                return {
                    'predicted_stars': pred_stars,
                    'explanation': explanation,
                    'json_valid': True
                }
            else:
                # If the key was found but the value was not an int 1-5
                return {
                    'predicted_stars': np.nan,
                    'explanation': json_str,
                    'json_valid': True
                }

        except json.JSONDecodeError:
            # If the output wasn't a valid JSON
            return {
                'predicted_stars': np.nan,
                'explanation': json_str,
                'json_valid': False
            }

    except Exception as e:
        # API Error (Rate Limit 429, etc.)
        return {
            'predicted_stars': np.nan,
            'explanation': f"API_ERROR: {e}",
            'json_valid': False
        }

In [115]:
# --- Define Strategies to Run ---
strategies = [
    {'name': 'P1_Base_Prompt', 'func': get_prompt_p1},
    {'name': 'P2_CoT_FewShot', 'func': get_prompt_p2},
    {'name': 'P3_Persona_AntiExamples', 'func': get_prompt_p3},
]

# Create a DataFrame to store all results (copies df_sample from Cell 2)
# This assumes df_sample was successfully created in the previous step.
results_df = df_sample.copy()
SAMPLE_SIZE = len(results_df)

print(f"--- Starting Rating Prediction for {SAMPLE_SIZE} reviews ---")

# --- RATE LIMITING CONFIGURATION ---
DELAY_SECONDS = 0.5 
REQUESTS_BEFORE_DELAY = 10 
# -------------------------------------

# Iterate through each prompting strategy
for strategy in strategies:
    strategy_name = strategy['name']
    prompt_func = strategy['func']

    print(f"\nProcessing Strategy: **{strategy_name}**")

    predictions = []

    # Use tqdm for a progress bar while iterating through the reviews
    for i, review_text in enumerate(tqdm(results_df['text'], desc=f"   {strategy_name}")):

        # Implement Delay Check
        if i > 0 and i % REQUESTS_BEFORE_DELAY == 0:
            time.sleep(DELAY_SECONDS)

        # Get prediction and metadata
        result = predict_rating(review_text, prompt_func, strategy_name)
        predictions.append(result)

    # Convert results to a temporary DataFrame and add to main DataFrame
    temp_df = pd.DataFrame(predictions)
    results_df[f'{strategy_name}_pred'] = temp_df['predicted_stars'].values
    results_df[f'{strategy_name}_valid'] = temp_df['json_valid'].values
    results_df[f'{strategy_name}_explanation'] = temp_df['explanation'].values


print("\n--- All predictions complete ---")
print("\nSample of results_df (Actual vs Predictions):")
print(results_df[[
    'actual_stars', 'P1_Base_Prompt_pred', 'P2_CoT_FewShot_pred', 'P3_Persona_AntiExamples_pred'
]].head())

# --- Evaluation ---
evaluation_metrics = []

for strategy in strategies:
    strategy_name = strategy['name']
    pred_col = f'{strategy_name}_pred'
    
    # Calculate Accuracy (on valid predictions)
    correct_predictions = np.sum(np.isclose(results_df['actual_stars'], results_df[pred_col]))
    valid_pred_count = results_df[pred_col].count()
    accuracy = correct_predictions / valid_pred_count if valid_pred_count > 0 else 0
    
    # Calculate JSON Validity Rate
    json_valid_count = results_df[f'{strategy_name}_valid'].sum()
    validity_rate = json_valid_count / SAMPLE_SIZE
    
    # Calculate Reliability (Standard Deviation of Absolute Error)
    error = np.abs(results_df['actual_stars'] - results_df[pred_col])
    consistency_std = error[~error.isna()].std()
    
    evaluation_metrics.append({
        'Approach': strategy_name,
        'Total Valid Predictions': valid_pred_count,
        'Accuracy (on Valid Predictions)': accuracy,
        'JSON Validity Rate': validity_rate,
        'Reliability (Std Dev of Error)': consistency_std
    })

# Convert metrics to a final comparison table
comparison_df = pd.DataFrame(evaluation_metrics)

--- Starting Rating Prediction for 200 reviews ---

Processing Strategy: **P1_Base_Prompt**


   P1_Base_Prompt: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [07:42<00:00,  2.31s/it]



Processing Strategy: **P2_CoT_FewShot**


   P2_CoT_FewShot: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [15:23<00:00,  4.62s/it]



Processing Strategy: **P3_Persona_AntiExamples**


   P3_Persona_AntiExamples: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [04:35<00:00,  1.38s/it]


--- All predictions complete ---

Sample of results_df (Actual vs Predictions):
   actual_stars  P1_Base_Prompt_pred  P2_CoT_FewShot_pred  \
0             4                    4                    4   
1             5                    5                    5   
2             3                    3                    4   
3             1                    1                    1   
4             5                    5                    5   

   P3_Persona_AntiExamples_pred  
0                             4  
1                             5  
2                             4  
3                             1  
4                             5  





In [117]:
# --- RUN THIS CODE NOW (Cell 5) ---
from IPython.display import display

print("\n\n# üìä Evaluation Results: Prompting Strategy Comparison\n")

# Display the main comparison table
print("--- Performance Metrics Table ---")
# The comparison_df creation failed because all predictions were NaN, but let's try the diagnostic check.
    
print("\n--- Diagnostic Check (First Invalid Responses) ---")
# Check the raw output for the first failure in P2 (as an example)
try:
    # We explicitly check for the NaN values in the prediction column
    invalid_p2 = results_df[results_df['P2_CoT_FewShot_pred'].isna()].head(1)
    
    if not invalid_p2.empty:
        print("\nExample P2 Failure (Raw Model Output):")
        # Find the index of the first failure for clear display
        for index, row in invalid_p2.iterrows():
            print(f"Actual Star Rating: {row['actual_stars']}")
            print("----------------------------------------")
            # This is the key: the raw model output that caused the NaN
            print(f"RAW MODEL OUTPUT: {row['P2_CoT_FewShot_explanation']}")
    else:
        print("P2 diagnostic check: All predictions were valid (1-5 integer).")
        
except NameError:
    print("‚ùå ERROR: 'results_df' is not defined. Please ensure Cell 4 completed successfully.")



# üìä Evaluation Results: Prompting Strategy Comparison

--- Performance Metrics Table ---

--- Diagnostic Check (First Invalid Responses) ---
P2 diagnostic check: All predictions were valid (1-5 integer).


In [121]:
import pandas as pd
from IPython.display import display
print("--- RAW RESULTS DUMP ---")
if 'comparison_df' in locals():
    print("Comparison Metrics:")
    display(comparison_df)
else:
    print("Error: comparison_df not found. Please ensure Cell 4 ran successfully.")

--- RAW RESULTS DUMP ---
Comparison Metrics:


Unnamed: 0,Approach,Total Valid Predictions,Accuracy (on Valid Predictions),JSON Validity Rate,Reliability (Std Dev of Error)
0,P1_Base_Prompt,200,0.64,1.0,0.515464
1,P2_CoT_FewShot,200,0.675,1.0,0.505647
2,P3_Persona_AntiExamples,200,0.63,1.0,0.549097
