In [1]:
import os
import sys
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import mean_absolute_error
from dotenv import load_dotenv
import warnings
warnings.filterwarnings('ignore')

# Load environment
load_dotenv()

True

In [2]:
from groq import Groq

# Load environment variables
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

if not GROQ_API_KEY:
    raise ValueError("‚ùå GROQ_API_KEY not found in .env file!")

# Configure native Groq client
client = Groq(api_key=GROQ_API_KEY)

model = "llama-3.1-8b-instant"  

print(f"‚úì Groq configured successfully")
print(f"‚úì Using model: {model}")

‚úì Groq configured successfully
‚úì Using model: llama-3.1-8b-instant


In [3]:
print("\n" + "="*70)
print("LOADING DATASET")
print("="*70)

df = pd.read_csv('yelp.csv')


LOADING DATASET


In [None]:
print(f"\nOriginal dataset shape: {df.shape}")
print(f"Columns: {df.columns.tolist()}")
print(f"\nStar rating distribution:")
print(df['stars'].value_counts().sort_index())

samples_per_star = 40
sample_df = df.groupby('stars', group_keys=False).apply(
    lambda x: x.sample(n=min(samples_per_star, len(x)), random_state=42)
).reset_index(drop=True)

sample_df = sample_df[['text', 'stars']].copy()

print(f"\n‚úì Sampled dataset shape: {sample_df.shape}")
print(f"Sampled star distribution:")
print(sample_df['stars'].value_counts().sort_index())
print(f"\nFirst 3 samples:")
print(sample_df.head(3)[['text', 'stars']].to_string())


Original dataset shape: (10000, 10)
Columns: ['business_id', 'date', 'review_id', 'stars', 'text', 'type', 'user_id', 'cool', 'useful', 'funny']

Star rating distribution:
stars
1     749
2     927
3    1461
4    3526
5    3337
Name: count, dtype: int64

‚úì Sampled dataset shape: (200, 2)
Sampled star distribution:
stars
1    40
2    40
3    40
4    40
5    40
Name: count, dtype: int64

First 3 samples:
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               

In [5]:
def prompt_v1(review_text):
    
    return f"""Classify this Yelp review on a scale of 1-5 stars.

Review: "{review_text}"

Respond with valid JSON only:
{{"predicted_stars": <1-5>, "explanation": "<brief reason>"}}"""



def prompt_v2(review_text):

    return f"""
Rate this Yelp review (1‚Äì5):

1‚òÖ very negative; major failures; strong dissatisfaction
2‚òÖ mostly negative; significant issues; few positives
3‚òÖ mixed; clear positives + negatives; neutral tone
4‚òÖ mostly positive; minor issues only; satisfied
5‚òÖ very positive; enthusiastic praise; no real complaints

EXAMPLES:
1‚òÖ ‚Üí "Food was cold, long wait, rude server." ‚Üí {{"predicted_stars": 1, "explanation": "Severe complaints"}}
3‚òÖ ‚Üí "Decent burger, soggy fries, friendly service." ‚Üí {{"predicted_stars": 3, "explanation": "Mixed"}}
4‚òÖ ‚Üí "Loved the pasta, slow check." ‚Üí {{"predicted_stars": 4, "explanation": "Mostly positive"}}

Review: "{review_text}"

Respond with valid JSON only:
{{"predicted_stars": <1-5>, "explanation": "<brief reason>"}}"""



def prompt_v3(review_text):
    
    return f"""Rate this Yelp review (1-5 stars) by analyzing it systematically.

Review: "{review_text}"

Think through:
1. What specific positive aspects are mentioned?
2. What specific negative aspects are mentioned?
3. What's the overall emotional tone?
4. Are there any strong keywords (love, hate, terrible, amazing)?

Respond with valid JSON only:
{{"predicted_stars": <1-5>, "explanation": "<brief reason>"}}"""




PROMPTS = {
    "V1_Simple": prompt_v1,
    "V2_Detailed_Criteria": prompt_v2,
    "V3_Chain_of_Thought": prompt_v3
}

print("‚úì Defined 3 prompt engineering approaches")

‚úì Defined 3 prompt engineering approaches


In [None]:
from pydantic import BaseModel, Field, ValidationError
from typing import Optional

class ReviewPrediction(BaseModel):
    """Schema for validating LLM review rating predictions"""
    predicted_stars: int = Field(..., ge=1, le=5, description="Star rating from 1 to 5")
    explanation: str = Field(..., min_length=1, description="Brief reasoning")

print("‚úì Pydantic validation schema defined")

‚úì Pydantic validation schema defined


In [7]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Dict, Tuple, Optional
import re
import json
import time

def call_llm(review_text: str, prompt_func: callable, max_retries: int = 3) -> str:
    """Call Groq API with retry logic and error handling."""
    prompt = prompt_func(review_text)
    
    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model=model,
                messages=[{"role": "user", "content": prompt}],
                temperature=0,  
                max_tokens=500  # Increased - Groq is fast and generous
            )
            return response.choices[0].message.content
        
        except Exception as e:
            error_msg = str(e).lower()
            
            # Groq rate limit handling (much higher limits than other providers)
            if "rate" in error_msg or "limit" in error_msg or "429" in error_msg:
                wait_time = (2 ** attempt) * 3  # 3s, 6s, 12s (faster backoff for Groq)
                print(f"  ‚ö† Rate limited. Waiting {wait_time}s... (attempt {attempt + 1}/{max_retries})")
                time.sleep(wait_time)
                continue
            
            # Groq-specific: Model availability issues
            if "model" in error_msg and ("not found" in error_msg or "unavailable" in error_msg):
                print(f"  ‚ùå Model error: {str(e)[:150]}")
                print(f"     ‚Üí Check available models at https://console.groq.com/docs/models")
                return f"ERROR: Model not available - {str(e)}"
            
            # Groq-specific: API key issues
            if "unauthorized" in error_msg or "401" in error_msg or "api key" in error_msg:
                print(f"  ‚ùå Authentication error: {str(e)[:150]}")
                print(f"     ‚Üí Check API key at https://console.groq.com/keys")
                return f"ERROR: Invalid API key - {str(e)}"
            
            # Generic retry for transient errors
            if attempt < max_retries - 1:
                print(f"  ‚ö† Error: {str(e)[:100]}. Retrying in 2s...")
                time.sleep(2)
                continue
            
            # Final failure
            print(f"  ‚ùå Final attempt failed: {str(e)[:150]}")
            return f"ERROR: {str(e)}"
    
    return "ERROR: Max retries exceeded"


def extract_json_from_response(response_text: str) -> Optional[str]:
    """Extract and clean JSON from LLM response with robust parsing."""
    if not response_text or response_text.startswith('ERROR'):
        return "{}"
    
    # Remove markdown code blocks (Groq sometimes includes these)
    text = response_text.replace('``````', '').strip()
    
    # Find JSON boundaries
    start = text.find('{')
    end = text.rfind('}') + 1
    
    if start == -1 or end <= start:
        return "{}"
    
    json_str = text[start:end]
    
    # Extract predicted_stars with regex
    stars_match = re.search(r'"predicted_stars"\s*:\s*(\d+)', json_str)
    if not stars_match:
        return "{}"
    stars = stars_match.group(1)
    
    # Validate star rating is in range 1-5
    if not (1 <= int(stars) <= 5):
        return "{}"
    
    # Extract explanation (handle multiline and quotes)
    expl_pattern = r'"explanation"\s*:\s*"(.*?)"(?:\s*[,}])'
    expl_match = re.search(expl_pattern, json_str, re.DOTALL)
    
    if expl_match:
        explanation = ' '.join(expl_match.group(1).split())
        # Escape internal quotes
        explanation = explanation.replace('"', '\\"')
    else:
        explanation = "No explanation provided"
    
    # Reconstruct clean JSON
    return f'{{"predicted_stars": {stars}, "explanation": "{explanation}"}}'


def validate_prediction(response_text: str) -> dict:
    """Validate LLM response against the ReviewPrediction schema."""
    result = {
        'valid': False,
        'predicted_stars': None,
        'explanation': None,
        'error': None,
        'raw_response': response_text
    }
    
    try:
        json_str = extract_json_from_response(response_text)
        json_data = json.loads(json_str)
        prediction = ReviewPrediction(**json_data)
        
        result['valid'] = True
        result['predicted_stars'] = prediction.predicted_stars
        result['explanation'] = prediction.explanation
        
    except json.JSONDecodeError as e:
        result['error'] = f"JSON parsing error: {str(e)}"
    except ValidationError as e:
        errors = e.errors()
        error_msgs = [f"{err['loc'][0]}: {err['msg']}" for err in errors]
        result['error'] = f"Validation error: {'; '.join(error_msgs)}"
    except Exception as e:
        result['error'] = f"Unexpected error: {str(e)}"
    
    return result


print("‚úì LLM calling and validation functions ready for Groq")


‚úì LLM calling and validation functions ready for Groq


In [8]:
from concurrent.futures import ThreadPoolExecutor, as_completed
from typing import Dict, Tuple
import time

def call_llm_with_name(review_text: str, prompt_name: str, prompt_func: callable) -> Tuple[str, str]:
    """Call LLM and return (prompt_name, raw_response)"""
    raw_response = call_llm(review_text, prompt_func)
    return (prompt_name, raw_response)


def process_single_review_all_prompts(review_text: str) -> Dict[str, any]:
    """
    Process one review with all 3 prompts simultaneously using threading.
    Optimized for Groq's fast parallel inference.
    """
    output = {}
    
    # Groq handles concurrent requests well - use 3 workers for 3 prompts
    with ThreadPoolExecutor(max_workers=3) as executor:
        futures = {
            executor.submit(call_llm_with_name, review_text, prompt_name, prompt_func): prompt_name
            for prompt_name, prompt_func in PROMPTS.items()
        }
        
        for future in as_completed(futures):
            prompt_name, raw_response = future.result()
            
            # Determine version prefix
            if 'V1' in prompt_name:
                prefix = 'v1'
            elif 'V2' in prompt_name:
                prefix = 'v2'
            else:  # V3
                prefix = 'v3'
            
            # Validate the response
            validation = validate_prediction(raw_response)
            
            if validation['valid']:
                output[f'{prefix}_predicted_stars'] = validation['predicted_stars']
                output[f'{prefix}_explanation'] = validation['explanation']
            else:
                output[f'{prefix}_predicted_stars'] = None
                output[f'{prefix}_explanation'] = f"ERROR: {validation['error']}"
    
    return output


def process_reviews_with_all_prompts(df: pd.DataFrame, delay: float = 0.5) -> pd.DataFrame:
    """
    Process each review with all 3 prompts simultaneously.
    Optimized for Groq's ultra-fast inference (500+ tokens/sec).
    
    Args:
        df: DataFrame with 'text' and 'stars' columns
        delay: Seconds to wait after processing each review (default 0.5s for Groq)
    """
    result_df = df.copy()
    
    # Initialize all result columns
    result_df['v1_predicted_stars'] = None
    result_df['v1_explanation'] = None
    result_df['v2_predicted_stars'] = None
    result_df['v2_explanation'] = None
    result_df['v3_predicted_stars'] = None
    result_df['v3_explanation'] = None
    
    total_calls = len(df) * 3
    estimated_time = (len(df) * delay + total_calls * 0.5) / 60  # Rough estimate
    
    print(f"\n{'='*70}")
    print(f"PROCESSING WITH GROQ ‚ö°")
    print(f"{'='*70}")
    print(f"Reviews to process: {len(df)}")
    print(f"Prompts per review: 3 (run in parallel)")
    print(f"Total API calls: {total_calls}")
    print(f"Model: {model}")
    print(f"Estimated time: ~{estimated_time:.1f} minutes")
    print(f"Delay between reviews: {delay}s")
    print(f"{'='*70}\n")
    
    start_time = time.time()
    
    for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing Reviews"):
        review_text = row['text']
        
        # Process all 3 prompts in parallel for this review
        results = process_single_review_all_prompts(review_text)
        
        # Store results in DataFrame
        for col, value in results.items():
            result_df.at[idx, col] = value
        
        # Short delay before next review (Groq is fast, minimal delay needed)
        if idx < len(df) - 1:
            time.sleep(delay)
    
    elapsed_time = time.time() - start_time
    
    print(f"\n{'='*70}")
    print(f"‚úì Processing complete!")
    print(f"{'='*70}")
    print(f"Total time: {elapsed_time/60:.1f} minutes")
    print(f"Average time per review: {elapsed_time/len(df):.1f} seconds")
    print(f"{'='*70}\n")
    
    return result_df


print("‚úì Parallel processing functions ready for Groq")

‚úì Parallel processing functions ready for Groq


In [9]:
# %%
# ============================================
# TEST GROQ API CONNECTION
# ============================================
print("\n" + "="*70)
print("TESTING GROQ API CONNECTION")
print("="*70)

test_review = "The food was amazing!"
test_prompt = f"""Rate this review from 1-5 stars:
Review: "{test_review}"
Respond with JSON: {{"predicted_stars": <number>, "explanation": "<reason>"}}"""

try:
    test_response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": test_prompt}],
        temperature=0,
        max_tokens=200
    )
    print(f"‚úì Groq API working!")
    print(f"Response: {test_response.choices[0].message.content}")
    
    # Additional validation
    validation = validate_prediction(test_response.choices[0].message.content)
    if validation['valid']:
        print(f"‚úì Valid JSON response: {validation['predicted_stars']}‚òÖ")
        print(f"  Explanation: {validation['explanation']}")
    else:
        print(f"‚ö† Response format issue: {validation['error']}")
        
except Exception as e:
    print(f"‚ùå Groq API test failed: {e}")
    print("Check your API key and internet connection")
    print("Get your free API key at: https://console.groq.com/keys")



TESTING GROQ API CONNECTION
‚úì Groq API working!
Response: {"predicted_stars": 5, "explanation": "The review is extremely positive, using the superlative 'amazing' to describe the food, indicating a perfect or near-perfect experience."}
‚úì Valid JSON response: 5‚òÖ
  Explanation: The review is extremely positive, using the superlative 'amazing' to describe the food, indicating a perfect or near-perfect experience.


In [10]:
# %%
# ============================================
# STARTING EVALUATION WITH GROQ
# ============================================
print("\n" + "="*70)
print("STARTING EVALUATION WITH GROQ ‚ö°")
print("="*70)

# ‚úì Optimized for Groq's ultra-fast inference
predictions_df = process_reviews_with_all_prompts(sample_df, delay=0.5)

# Check success rates
print("\n" + "="*70)
print("JSON VALIDITY RATES:")
print("="*70)

validity_results = []
for version in ['v1', 'v2', 'v3']:
    success = predictions_df[f'{version}_predicted_stars'].notna().sum()
    total = len(sample_df)
    percentage = (success/total*100)
    validity_results.append(percentage)
    print(f"{version.upper()}: {success}/{total} ({percentage:.1f}%)")

# Overall validity
avg_validity = sum(validity_results) / len(validity_results)
print(f"\nAVERAGE VALIDITY: {avg_validity:.1f}%")

if avg_validity < 80:
    print("‚ö† Low validity rate - check prompt formatting or model compatibility")
elif avg_validity < 95:
    print("‚úì Good validity rate - minor improvements possible")
else:
    print("‚úì Excellent validity rate!")

# Sample predictions comparison
print("\n" + "="*70)
print("SAMPLE PREDICTIONS (First 10 Reviews):")
print("="*70)
display_cols = ['stars', 'v1_predicted_stars', 'v2_predicted_stars', 'v3_predicted_stars']
print(predictions_df[display_cols].head(10).to_string(index=True))

# Quick accuracy check (for valid predictions only)
print("\n" + "="*70)
print("QUICK ACCURACY CHECK:")
print("="*70)

for version in ['v1', 'v2', 'v3']:
    valid_mask = predictions_df[f'{version}_predicted_stars'].notna()
    if valid_mask.sum() > 0:
        valid_df = predictions_df[valid_mask]
        accuracy = (valid_df['stars'] == valid_df[f'{version}_predicted_stars']).sum() / len(valid_df) * 100
        print(f"{version.upper()}: {accuracy:.1f}% accuracy ({valid_mask.sum()} valid samples)")
    else:
        print(f"{version.upper()}: No valid predictions")



STARTING EVALUATION WITH GROQ ‚ö°

PROCESSING WITH GROQ ‚ö°
Reviews to process: 200
Prompts per review: 3 (run in parallel)
Total API calls: 600
Model: llama-3.1-8b-instant
Estimated time: ~6.7 minutes
Delay between reviews: 0.5s



Processing Reviews:   2%|‚ñé         | 5/200 [00:03<02:20,  1.39it/s]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:   4%|‚ñé         | 7/200 [00:42<31:35,  9.82s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:   4%|‚ñç         | 8/200 [00:55<34:57, 10.92s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:   4%|‚ñç         | 9/200 [01:08<37:01, 11.63s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:   5%|‚ñå         | 10/200 [01:23<40:19, 12.73s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:   6%|‚ñå         | 11/200 [01:33<36:38, 11.63s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:   6%|‚ñå         | 12/200 [01:46<37:50, 12.07s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:   6%|‚ñã         | 13/200 [02:23<1:01:48, 19.83s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:   7%|‚ñã         | 14/200 [02:35<54:12, 17.49s/it]  

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:   8%|‚ñä         | 15/200 [03:03<1:03:03, 20.45s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:   8%|‚ñä         | 16/200 [03:16<56:02, 18.27s/it]  

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:   8%|‚ñä         | 17/200 [03:24<46:26, 15.23s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:   9%|‚ñâ         | 18/200 [03:33<40:34, 13.37s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  10%|‚ñâ         | 19/200 [03:42<36:28, 12.09s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  10%|‚ñà         | 20/200 [03:58<39:37, 13.21s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  10%|‚ñà         | 21/200 [04:05<33:55, 11.37s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  11%|‚ñà         | 22/200 [04:12<29:46, 10.04s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  12%|‚ñà‚ñè        | 23/200 [04:41<45:55, 15.57s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  12%|‚ñà‚ñè        | 24/200 [04:49<39:00, 13.30s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  12%|‚ñà‚ñé        | 25/200 [04:58<35:06, 12.04s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  13%|‚ñà‚ñé        | 26/200 [05:05<30:30, 10.52s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  14%|‚ñà‚ñé        | 27/200 [05:13<28:08,  9.76s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  14%|‚ñà‚ñç        | 29/200 [05:26<22:45,  7.99s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  16%|‚ñà‚ñå        | 31/200 [05:41<21:36,  7.67s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  16%|‚ñà‚ñå        | 32/200 [05:52<24:22,  8.70s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  16%|‚ñà‚ñã        | 33/200 [06:00<23:38,  8.50s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  18%|‚ñà‚ñä        | 35/200 [06:14<20:47,  7.56s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  18%|‚ñà‚ñä        | 36/200 [06:23<21:45,  7.96s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  18%|‚ñà‚ñä        | 37/200 [06:55<41:30, 15.28s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  19%|‚ñà‚ñâ        | 38/200 [07:04<36:12, 13.41s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  20%|‚ñà‚ñâ        | 39/200 [07:19<37:34, 14.00s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  20%|‚ñà‚ñà        | 40/200 [07:35<38:19, 14.37s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  20%|‚ñà‚ñà        | 41/200 [07:48<37:05, 14.00s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  22%|‚ñà‚ñà‚ñè       | 43/200 [08:15<34:59, 13.37s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  22%|‚ñà‚ñà‚ñè       | 44/200 [08:40<44:02, 16.94s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  22%|‚ñà‚ñà‚ñé       | 45/200 [09:02<47:03, 18.22s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  23%|‚ñà‚ñà‚ñé       | 46/200 [09:11<39:49, 15.52s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  24%|‚ñà‚ñà‚ñé       | 47/200 [09:32<43:52, 17.20s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  24%|‚ñà‚ñà‚ñç       | 49/200 [09:46<29:37, 11.77s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  26%|‚ñà‚ñà‚ñå       | 51/200 [09:59<22:49,  9.19s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  26%|‚ñà‚ñà‚ñå       | 52/200 [10:09<23:10,  9.40s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  26%|‚ñà‚ñà‚ñã       | 53/200 [10:20<24:17,  9.92s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  27%|‚ñà‚ñà‚ñã       | 54/200 [10:32<24:59, 10.27s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  28%|‚ñà‚ñà‚ñä       | 55/200 [10:43<25:27, 10.53s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  28%|‚ñà‚ñà‚ñä       | 57/200 [10:59<21:33,  9.04s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  29%|‚ñà‚ñà‚ñâ       | 58/200 [11:08<21:21,  9.02s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  30%|‚ñà‚ñà‚ñâ       | 59/200 [11:19<22:41,  9.66s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  30%|‚ñà‚ñà‚ñà       | 60/200 [11:27<21:24,  9.17s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  30%|‚ñà‚ñà‚ñà       | 61/200 [11:36<21:08,  9.13s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  31%|‚ñà‚ñà‚ñà       | 62/200 [11:45<20:56,  9.10s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  32%|‚ñà‚ñà‚ñà‚ñè      | 63/200 [12:10<31:29, 13.79s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  32%|‚ñà‚ñà‚ñà‚ñè      | 64/200 [12:18<27:28, 12.12s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  32%|‚ñà‚ñà‚ñà‚ñé      | 65/200 [12:28<26:03, 11.58s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  34%|‚ñà‚ñà‚ñà‚ñé      | 67/200 [12:43<20:28,  9.24s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  34%|‚ñà‚ñà‚ñà‚ñç      | 68/200 [12:52<20:22,  9.26s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  35%|‚ñà‚ñà‚ñà‚ñå      | 70/200 [13:12<20:09,  9.30s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  36%|‚ñà‚ñà‚ñà‚ñå      | 71/200 [13:24<21:50, 10.16s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  36%|‚ñà‚ñà‚ñà‚ñå      | 72/200 [13:42<26:49, 12.57s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  36%|‚ñà‚ñà‚ñà‚ñã      | 73/200 [13:49<23:03, 10.90s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  38%|‚ñà‚ñà‚ñà‚ñä      | 75/200 [14:18<25:00, 12.01s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  38%|‚ñà‚ñà‚ñà‚ñä      | 77/200 [14:33<19:25,  9.47s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  40%|‚ñà‚ñà‚ñà‚ñâ      | 79/200 [14:48<16:45,  8.31s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  40%|‚ñà‚ñà‚ñà‚ñà      | 80/200 [14:58<17:37,  8.82s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  40%|‚ñà‚ñà‚ñà‚ñà      | 81/200 [15:08<18:17,  9.22s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  41%|‚ñà‚ñà‚ñà‚ñà      | 82/200 [15:17<18:01,  9.16s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 83/200 [15:24<16:40,  8.55s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 84/200 [15:42<21:35, 11.17s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  42%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 85/200 [15:51<20:17, 10.59s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 86/200 [16:03<21:11, 11.15s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  44%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 87/200 [16:24<26:06, 13.86s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 88/200 [16:35<24:21, 13.05s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 89/200 [16:46<23:05, 12.48s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 90/200 [16:58<22:44, 12.40s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 91/200 [17:07<20:44, 11.41s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 92/200 [17:20<21:31, 11.95s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 94/200 [17:39<18:05, 10.24s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 95/200 [18:00<23:40, 13.53s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 97/200 [18:14<17:17, 10.07s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 98/200 [18:29<19:41, 11.59s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  50%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 99/200 [18:36<17:14, 10.24s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 100/200 [18:45<16:01,  9.61s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 101/200 [18:53<15:05,  9.15s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 102/200 [19:01<14:21,  8.79s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 104/200 [19:17<13:18,  8.32s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 105/200 [19:25<13:03,  8.24s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 106/200 [19:36<14:16,  9.11s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 107/200 [19:48<15:34, 10.04s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 108/200 [19:56<14:28,  9.44s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 109/200 [20:04<13:41,  9.03s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 110/200 [20:13<13:33,  9.04s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 111/200 [20:29<16:36, 11.19s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 113/200 [20:45<13:30,  9.31s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 114/200 [20:53<12:48,  8.94s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 115/200 [21:03<13:11,  9.32s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 116/200 [21:12<13:00,  9.29s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 117/200 [21:21<12:51,  9.29s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 118/200 [21:45<18:26, 13.50s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 119/200 [21:53<16:08, 11.96s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 120/200 [22:01<14:28, 10.85s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 121/200 [22:12<14:22, 10.92s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 122/200 [22:23<13:51, 10.67s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 123/200 [22:31<12:41,  9.89s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 124/200 [22:49<15:41, 12.39s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 125/200 [23:02<15:50, 12.68s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 126/200 [23:09<13:34, 11.01s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 127/200 [23:16<11:59,  9.85s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 128/200 [23:27<11:57,  9.96s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 130/200 [23:41<09:45,  8.36s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 131/200 [23:50<09:51,  8.57s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 132/200 [24:02<10:59,  9.70s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 133/200 [24:12<11:01,  9.87s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 134/200 [24:22<10:55,  9.94s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 135/200 [24:31<10:11,  9.41s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 136/200 [24:40<09:57,  9.34s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 137/200 [24:53<11:01, 10.50s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 138/200 [25:03<10:44, 10.39s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 139/200 [25:12<10:09,  9.99s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 140/200 [25:21<09:43,  9.73s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)
  ‚ö† Rate limited. Waiting 6s... (attempt 2/3)


Processing Reviews:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 142/200 [25:49<10:42, 11.09s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 143/200 [26:13<14:23, 15.14s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 145/200 [26:28<10:09, 11.08s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 146/200 [26:42<10:46, 11.97s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 147/200 [26:51<09:47, 11.08s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 148/200 [27:01<09:21, 10.80s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 149/200 [27:10<08:42, 10.25s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 150/200 [27:20<08:16,  9.92s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 151/200 [27:31<08:24, 10.29s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 152/200 [27:47<09:37, 12.03s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 153/200 [28:00<09:40, 12.35s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 155/200 [28:17<07:46, 10.37s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)
  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 157/200 [28:45<08:17, 11.56s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 158/200 [28:56<07:58, 11.39s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 159/200 [29:10<08:11, 12.00s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 160/200 [29:19<07:24, 11.10s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 161/200 [29:28<06:48, 10.48s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 162/200 [29:37<06:22, 10.06s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 163/200 [29:45<05:49,  9.45s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 165/200 [29:58<04:33,  7.80s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 166/200 [30:06<04:26,  7.84s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 168/200 [30:20<03:55,  7.36s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 170/200 [30:34<03:32,  7.09s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 171/200 [30:42<03:33,  7.37s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 173/200 [30:55<03:04,  6.83s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 174/200 [31:12<04:16,  9.88s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 177/200 [31:31<02:48,  7.33s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 178/200 [31:37<02:31,  6.90s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 179/200 [31:48<02:50,  8.10s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 180/200 [31:56<02:42,  8.14s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 181/200 [32:05<02:39,  8.41s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 182/200 [32:15<02:40,  8.91s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 184/200 [32:27<01:55,  7.23s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 185/200 [32:36<01:55,  7.72s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 186/200 [32:45<01:53,  8.14s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 187/200 [32:53<01:45,  8.10s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 188/200 [33:01<01:37,  8.10s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 189/200 [33:10<01:32,  8.38s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 190/200 [33:20<01:28,  8.87s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 191/200 [33:28<01:17,  8.61s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 192/200 [33:36<01:07,  8.45s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 193/200 [33:48<01:06,  9.56s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 194/200 [33:57<00:54,  9.11s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 195/200 [34:22<01:10, 14.09s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 196/200 [34:29<00:47, 11.96s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 197/200 [34:37<00:32, 10.79s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews:  99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 198/200 [34:45<00:19,  9.93s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 199/200 [34:52<00:09,  9.11s/it]

  ‚ö† Rate limited. Waiting 3s... (attempt 1/3)


Processing Reviews: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [35:05<00:00, 10.53s/it]


‚úì Processing complete!
Total time: 35.1 minutes
Average time per review: 10.5 seconds


JSON VALIDITY RATES:
V1: 200/200 (100.0%)
V2: 200/200 (100.0%)
V3: 200/200 (100.0%)

AVERAGE VALIDITY: 100.0%
‚úì Excellent validity rate!

SAMPLE PREDICTIONS (First 10 Reviews):
   stars v1_predicted_stars v2_predicted_stars v3_predicted_stars
0      1                  1                  1                  1
1      1                  1                  1                  1
2      1                  1                  1                  1
3      1                  1                  1                  1
4      1                  1                  1                  1
5      1                  1                  1                  1
6      1                  1                  1                  1
7      1                  1                  1                  1
8      1                  1                  2                  2
9      1                  1                  1                  1

QUI




In [11]:
print("\n" + "="*70)
print("EVALUATION METRICS")
print("="*70)

results = []
for version, name in [('v1', 'V1_Simple'), ('v2', 'V2_Detailed_Criteria'), 
                      ('v3', 'V3_Chain_of_Thought')]:
    
    valid_mask = predictions_df[f'{version}_predicted_stars'].notna()
    valid_df = predictions_df[valid_mask].copy()
    
    if len(valid_df) > 0:
        actual = valid_df['stars'].astype(int)
        predicted = valid_df[f'{version}_predicted_stars'].astype(int)
        
        accuracy = (actual == predicted).sum() / len(valid_df) * 100
        mae = mean_absolute_error(actual, predicted)
        
        diff = abs(actual - predicted)
        exact = (diff == 0).sum()
        off_1 = (diff == 1).sum()
        off_2_plus = (diff >= 2).sum()
        
        validity = len(valid_df) / len(predictions_df) * 100
        
        results.append({
            'Prompt': name,
            'Accuracy (%)': round(accuracy, 2),
            'MAE': round(mae, 3),
            'Validity (%)': round(validity, 1),
            'Exact Match': exact,
            'Off by 1': off_1,
            'Off by 2+': off_2_plus,
            'Valid Samples': len(valid_df)
        })

results_df = pd.DataFrame(results)
print(results_df.to_string(index=False))

# Winner
if len(results_df) > 0:
    best_idx = results_df['Accuracy (%)'].idxmax()
    best = results_df.loc[best_idx]
    
    print("\n" + "="*70)
    print("üèÜ BEST PERFORMING PROMPT")
    print("="*70)
    print(f"Winner: {best['Prompt']}")
    print(f"  Accuracy: {best['Accuracy (%)']}%")
    print(f"  MAE: {best['MAE']}")
    print(f"  Validity: {best['Validity (%)']}%")
    print(f"  Exact Matches: {best['Exact Match']}/{best['Valid Samples']}")


EVALUATION METRICS
              Prompt  Accuracy (%)   MAE  Validity (%)  Exact Match  Off by 1  Off by 2+  Valid Samples
           V1_Simple          67.0 0.350         100.0          134        64          2            200
V2_Detailed_Criteria          67.0 0.350         100.0          134        64          2            200
 V3_Chain_of_Thought          68.5 0.335         100.0          137        61          2            200

üèÜ BEST PERFORMING PROMPT
Winner: V3_Chain_of_Thought
  Accuracy: 68.5%
  MAE: 0.335
  Validity: 100.0%
  Exact Matches: 137/200


In [12]:
results_df.to_csv('prompt_evaluation_metrics.csv', index=False)

print("\n" + "="*70)
print("RESULTS SAVED")
print("="*70)


RESULTS SAVED


In [13]:
print("\n" + "="*70)
print("ERROR ANALYSIS: PREDICTION DISCREPANCIES")
print("="*70)

def analyze_prediction_errors(df, version='v3', version_name='V3_Chain_of_Thought'):
    """Analyze prediction errors for a specific prompt version"""
    
    valid_mask = df[f'{version}_predicted_stars'].notna()
    analysis_df = df[valid_mask].copy()
    
    if len(analysis_df) == 0:
        print(f"\n{version_name}: No valid predictions to analyze")
        return {'off_by_1': pd.DataFrame(), 'off_by_2_plus': pd.DataFrame(), 'analysis_df': pd.DataFrame()}
    
    analysis_df['error'] = abs(analysis_df['stars'] - analysis_df[f'{version}_predicted_stars'])
    analysis_df['error_type'] = analysis_df['error'].apply(
        lambda x: 'Exact Match' if x == 0 else ('Off by 1' if x == 1 else 'Off by 2+')
    )
    
    print(f"\n{'='*80}")
    print(f"ANALYSIS FOR: {version_name}")
    print(f"{'='*80}")
    
    print(f"\nError Distribution:")
    print(analysis_df['error_type'].value_counts().sort_index())
    
    off_by_1 = analysis_df[analysis_df['error'] == 1].copy()
    off_by_2_plus = analysis_df[analysis_df['error'] >= 2].copy()
    
    print(f"\n{'-'*80}")
    print(f"OFF BY 1 CASES ({len(off_by_1)} total)")
    print(f"{'-'*80}")
    
    if len(off_by_1) > 0:
        for idx, row in off_by_1.head(5).iterrows():
            actual = int(row['stars'])
            predicted = int(row[f'{version}_predicted_stars'])
            direction = "‚Üë" if predicted > actual else "‚Üì"
            
            print(f"\nExample {idx}:")
            print(f"  Actual: {actual}‚òÖ  |  Predicted: {predicted}‚òÖ {direction}")
            print(f"  Review: {row['text'][:150]}...")
            print(f"  Explanation: {row[f'{version}_explanation']}")
    
    print(f"\n{'-'*80}")
    print(f"OFF BY 2+ CASES ({len(off_by_2_plus)} total) - MAJOR ERRORS")
    print(f"{'-'*80}")
    
    if len(off_by_2_plus) > 0:
        for idx, row in off_by_2_plus.iterrows():
            actual = int(row['stars'])
            predicted = int(row[f'{version}_predicted_stars'])
            error = int(row['error'])
            direction = "‚Üë" if predicted > actual else "‚Üì"
            
            print(f"\nExample {idx} [ERROR: {error}]:")
            print(f"  Actual: {actual}‚òÖ  |  Predicted: {predicted}‚òÖ {direction}")
            print(f"  Review: {row['text'][:200]}...")
            print(f"  Explanation: {row[f'{version}_explanation']}")
    
    return {
        'off_by_1': off_by_1,
        'off_by_2_plus': off_by_2_plus,
        'analysis_df': analysis_df
    }

# Analyze each version
error_results = {}
for version, name in [('v1', 'V1_Simple'), 
                      ('v2', 'V2_Detailed_Criteria'), 
                      ('v3', 'V3_Chain_of_Thought')]:
    error_results[version] = analyze_prediction_errors(predictions_df, version, name)

print(f"\n{'='*80}")
print("ERROR ANALYSIS COMPLETE")
print(f"{'='*80}")


ERROR ANALYSIS: PREDICTION DISCREPANCIES

ANALYSIS FOR: V1_Simple

Error Distribution:
error_type
Exact Match    134
Off by 1        64
Off by 2+        2
Name: count, dtype: int64

--------------------------------------------------------------------------------
OFF BY 1 CASES (64 total)
--------------------------------------------------------------------------------

Example 13:
  Actual: 1‚òÖ  |  Predicted: 2‚òÖ ‚Üë
  Review: I love Saddle Ranch for its casual night time festivities (including watching people ride the mechanical bull hehehe), but I cannot ignore the fact th...
  Explanation: The reviewer enjoyed the atmosphere and unique desserts, but was extremely disappointed with the food quality, describing it as 'disgusting', 'dry', and 'tasteless'.

Example 24:
  Actual: 1‚òÖ  |  Predicted: 2‚òÖ ‚Üë
  Review: I went today to meet my daughter for lunch.  We are both big sushi enthusiasts.  The interior and atmosphere was beautiful.  Well appointed.
The servi...
  Explanation: T