In [26]:
import pandas as pd
import numpy as np
import re
import ast

def analyze_specific_example():
    """Analyze the specific gmap_id example you provided"""
    
    # The specific example data
    gmap_id = "0x872b46a9afb11747:0xbab4f9c1c4625e5e"
    name = "jay Jaysam2415"
    rating = 1
    resp_raw = "{'text': 'Jay, we strive to provide excellent service to our customers. We do not show that we have ever done business with you and you have never been a customer at our shop. I do see that you have recently reviewed 3 other transmission shops around the valley. We would appreciate the opportunity to clear up any misunderstandings and ask that you please contact our shop at your convenience.', 'time': 1559153757964.0}"
    text = "All I can say is SMH about this place be careful"
    business_category = "Transmission shop, Auto machine shop, Auto repair shop, Auto tune up service, Brake shop, Car repair and maintenance"
    
    print("="*80)
    print("SPECIFIC EXAMPLE ANALYSIS")
    print("="*80)
    
    print(f"GMAP_ID: {gmap_id}")
    print(f"REVIEWER NAME: {name}")
    print(f"RATING: {rating}")
    print(f"REVIEW TEXT: {text}")
    print(f"BUSINESS CATEGORY: {business_category}")
    
    # Parse business response
    try:
        resp_dict = ast.literal_eval(resp_raw)
        business_response = resp_dict.get('text', '')
    except:
        business_response = ""
    
    print(f"BUSINESS RESPONSE: {business_response}")
    
    print(f"\n" + "-"*60)
    print("ANALYSIS BREAKDOWN:")
    print("-"*60)
    
    # Run analysis using the main functions
    analysis = analyze_single_review(text, "Unknown", business_category, rating, True, resp_raw)
    
    print(f"Review Length: {len(text)} characters")
    print(f"Rating: {rating}/5 (negative)")
    print(f"Has Business Response: Yes")
    
    print(f"\nDetailed Pattern Analysis:")
    print(f"- Advertisement patterns: {has_advertisement_patterns(text)}")
    print(f"- Irrelevant content patterns: {has_irrelevant_patterns(text, business_category)}")
    print(f"- Rant without visit patterns: {has_rant_no_visit_patterns(text, rating, True, business_response)}")
    print(f"- Is clean review: {is_actually_clean_review(text, business_category, business_response)}")
    
    print(f"\nFINAL CLASSIFICATION:")
    print(f"Suggested Label: {analysis['suggested_label']}")
    print(f"Confidence: {analysis['confidence']}")
    print(f"Reasoning: {analysis['reason']}")
    
    print(f"\nKEY EVIDENCE:")
    print(f"Business explicitly states: 'you have never been a customer at our shop'")
    print(f"This provides definitive proof that the reviewer did not visit/use the service")
    print(f"Combined with negative review = clear 'Rant without visit' violation")
    
    return analysis

def comprehensive_label_validation(df):
    """Validate all classification categories for accuracy"""
    
    print("="*80)
    print("COMPREHENSIVE LABEL VALIDATION ANALYSIS")
    print("="*80)
    
    print("\n1. OVERALL LABEL DISTRIBUTION:")
    distribution = df['classification'].value_counts()
    print(distribution)
    
    validation_results = {}
    all_prompting_issues = []
    
    categories = df['classification'].unique()
    
    for category in categories:
        print(f"\n{'='*60}")
        print(f"ANALYZING: {category.upper()}")
        print(f"{'='*60}")
        
        category_data = df[df['classification'] == category]
        total_in_category = len(category_data)
        sample_size = len(category_data)  # Use ALL data
        sample_data = category_data  # No sampling
        
        correct_count = 0
        mislabeled_count = 0
        correct_examples = []
        mislabeled_examples = []
        
        for idx, row in sample_data.iterrows():
            text = str(row['text'])
            business_category = str(row.get('category_str', 'Unknown'))
            rating = row.get('rating', 'N/A')
            has_response = str(row.get('resp', '')).strip() not in ['', 'nan', 'None']
            
            resp_text = row.get('resp', '')
            analysis = analyze_single_review(text, category, business_category, rating, has_response, resp_text)
            
            if analysis['is_correct']:
                correct_count += 1
                correct_examples.append({
                    'text': text,
                    'business_category': business_category,
                    'rating': rating,
                    'has_response': has_response,
                    'reason': analysis['reason']
                })
            else:
                mislabeled_count += 1
                mislabeled_examples.append({
                    'text': text,
                    'business_category': business_category,
                    'rating': rating,
                    'has_response': has_response,
                    'suggested_label': analysis['suggested_label'],
                    'reason': analysis['reason']
                })
        
        accuracy = (correct_count / sample_size) * 100
        
        print(f"Total samples in category: {total_in_category}")
        print(f"CATEGORY SUMMARY:")
        print(f"Total samples in dataset: {total_in_category}")
        print(f"Analyzed sample: {sample_size}")
        print(f"Correct labels: {correct_count}/{sample_size} ({accuracy:.1f}%)")
        print(f"Mislabeled: {mislabeled_count}/{sample_size} ({100-accuracy:.1f}%)")
        
        print(f"\n--- CORRECTLY LABELED EXAMPLES ---")
        for i, example in enumerate(correct_examples[:10], 1):
            print(f"\n{i}. CORRECT EXAMPLE:")
            print(f"   TEXT: {example['text']}")
            print(f"   BUSINESS CATEGORY: {example['business_category']}")
            print(f"   RATING: {example['rating']}")
            print(f"   HAS BUSINESS RESPONSE: {example['has_response']}")
            print(f"   WHY CORRECT: {example['reason']}")
            print("-" * 60)
        
        print(f"\n--- MISLABELED EXAMPLES ---")
        for i, example in enumerate(mislabeled_examples[:10], 1):
            print(f"\n{i}. MISLABELED EXAMPLE:")
            print(f"   TEXT: {example['text']}")
            print(f"   BUSINESS CATEGORY: {example['business_category']}")
            print(f"   RATING: {example['rating']}")
            print(f"   HAS BUSINESS RESPONSE: {example['has_response']}")
            print(f"   CURRENT LABEL: {category}")
            print(f"   SUGGESTED LABEL: {example['suggested_label']}")
            print(f"   WHY MISLABELED: {example['reason']}")
            print("-" * 60)
        
        prompting_issue = analyze_prompting_issues(category, accuracy, correct_examples, mislabeled_examples)
        all_prompting_issues.append(prompting_issue)
        
        validation_results[category] = {
            'accuracy': accuracy,
            'correct_count': correct_count,
            'mislabeled_count': mislabeled_count,
            'total_samples': total_in_category
        }
    
    create_consolidated_prompting_strategy(all_prompting_issues, validation_results)
    
    return validation_results

def analyze_single_review(text, current_label, business_category, rating, has_response, resp_text=""):
    """Enhanced analysis including business response text"""
    
    # Extract response text if available
    business_response = ""
    if has_response and resp_text and str(resp_text) != 'nan':
        try:
            if isinstance(resp_text, str) and resp_text.startswith('{'):
                resp_dict = ast.literal_eval(resp_text)
                business_response = resp_dict.get('text', '')
            elif isinstance(resp_text, dict):
                business_response = resp_text.get('text', '')
            else:
                business_response = str(resp_text)
        except:
            business_response = str(resp_text)
    
    has_ads = has_advertisement_patterns(text)
    has_irrelevant = has_irrelevant_patterns(text, business_category)
    has_rant = has_rant_no_visit_patterns(text, rating, has_response, business_response)
    is_clean = is_actually_clean_review(text, business_category, business_response)
    
    if has_ads:
        suggested = "Advertisement"
        confidence = "High"
        reason = "Contains promotional content or links"
    elif has_irrelevant:
        suggested = "Irrelevant content"
        confidence = "High"
        reason = f"Content not related to business type ({business_category})"
    elif has_rant:
        suggested = "Rant without visit"
        confidence = "High" if "never been a customer" in business_response.lower() else "Medium"
        reason = "Business confirms no customer relationship" if "never been a customer" in business_response.lower() else "Appears to complain without visiting"
    elif is_clean:
        suggested = "Acceptable review"
        confidence = "Medium"
        reason = "Legitimate review about business experience"
    else:
        if len(text.strip()) < 10:
            suggested = "Low quality review"
            confidence = "Low"
            reason = "Very short review with minimal content"
        else:
            suggested = "Acceptable review"
            confidence = "Low"
            reason = "Cannot determine clear policy violation"
    
    return {
        'suggested_label': suggested,
        'confidence': confidence,
        'reason': reason,
        'is_correct': suggested.lower().replace(' ', '').replace('_', '') == current_label.lower().replace(' ', '').replace('_', '')
    }

def has_advertisement_patterns(text):
    """Much stricter advertisement detection - only clear promotional content"""
    strict_ad_patterns = [
        r'www\.\w+|http[s]?://\w+',  # Actual URLs
        r'\b(use code|promo code|coupon code)\s+\w+',  # Specific promo codes
        r'\b(call|text|contact)\s+(us\s+)?at\s*\d{3}',  # Contact with phone numbers
        r'\b(visit our|check our|go to our)\s+(website|site)',  # Direct website promotion
        r'\b(get|save)\s+\d+%\s+(off|discount)',  # Specific discount offers
    ]
    return any(re.search(pattern, text.lower()) for pattern in strict_ad_patterns)

def has_irrelevant_patterns(text, business_category):
    """Much stricter irrelevant content detection - must be clearly off-topic"""
    business_type = business_category.lower() if business_category else ""
    text_lower = text.lower()
    
    # Only flag if explicitly discussing unrelated items as the main topic
    clear_irrelevant_patterns = [
        r'\b(not about|unrelated to|off topic)\b',
        r'(wrong place|different location|thought this was)',
        r'(my new phone|bought a laptop|car problems).*but.*place',
    ]
    
    if any(re.search(pattern, text_lower) for pattern in clear_irrelevant_patterns):
        return True
    
    # Food businesses - only flag if clearly about non-food as main topic
    if any(word in business_type for word in ['restaurant', 'food', 'cafe', 'bar']):
        if re.search(r'(this review is about my|talking about my|discussing my)\s+(phone|car|laptop)', text_lower):
            return True
    
    return False

def has_rant_no_visit_patterns(text, rating, has_response, business_response_text=""):
    """Enhanced rant detection using business response confirmation"""
    
    # Check business response for confirmation of no visit
    if business_response_text and business_response_text.strip():
        business_confirms_no_visit = [
            r'never been a customer',
            r'have not done business with you',
            r'you have never visited',
            r'no record of your visit',
            r'never provided service to you',
            r'do not show.*ever done business with you'
        ]
        
        if any(re.search(pattern, business_response_text.lower()) for pattern in business_confirms_no_visit):
            return True
    
    # Original text-based detection
    explicit_no_visit_patterns = [
        r'\b(never been here|haven\'?t visited this place|didn\'?t go to this place)\b',
        r'\b(never went to this|haven\'?t been to this)\b',
        r'\b(heard from others|someone told me|people say)\b.*\b(terrible|awful|bad|horrible)\b',
        r'\b(based on what I heard|from what I\'ve heard)\b',
    ]
    
    text_indicates_no_visit = any(re.search(pattern, text.lower()) for pattern in explicit_no_visit_patterns)
    
    # Additional signals: Very short negative review + low rating + business denial
    is_very_short_negative = len(text.strip()) < 30 and rating is not None and isinstance(rating, (int, float)) and rating <= 2
    business_denies_service = any(re.search(pattern, business_response_text.lower()) for pattern in [r'never been a customer', r'do not show.*ever done business']) if business_response_text else False
    
    return text_indicates_no_visit or (is_very_short_negative and business_denies_service)

def is_actually_clean_review(text, business_category, business_response):
    """Most reviews should be clean unless clear violations exist"""
    return (len(text.strip()) > 5 and
            not has_advertisement_patterns(text) and
            not has_irrelevant_patterns(text, business_category) and
            not has_rant_no_visit_patterns(text, None, None, business_response))

def analyze_prompting_issues(category, accuracy, correct_examples, mislabeled_examples):
    """Analyze prompting issues for later consolidation"""
    
    issues = {
        'category': category,
        'accuracy': accuracy,
        'main_problem': '',
        'specific_fixes': []
    }
    
    if category == "Advertisement" and accuracy < 80:
        issues['main_problem'] = "Confusing customer recommendations with advertisements"
        issues['specific_fixes'] = [
            "Only flag reviews with actual URLs, promo codes, or phone numbers",
            "Don't flag positive customer experiences as ads",
            "Look for business self-promotion, not customer enthusiasm"
        ]
    elif category == "Low quality review" and accuracy < 50:
        issues['main_problem'] = "Flagging legitimate detailed reviews as low quality"
        issues['specific_fixes'] = [
            "Only flag extremely short reviews (under 10 characters)",
            "Don't flag reviews with specific experiences as low quality",
            "Focus on meaningless reviews like single words"
        ]
    elif category == "Irrelevant content" and accuracy < 80:
        issues['main_problem'] = "Flagging relevant content as irrelevant"
        issues['specific_fixes'] = [
            "Food/kitchen/service content IS relevant to restaurants",
            "Only flag if review explicitly discusses unrelated topics",
            "Context matters - enthusiastic food reviews are relevant"
        ]
    elif category == "Rant without visit" and accuracy < 80:
        issues['main_problem'] = "Flagging reviews with clear visit evidence as non-visitor rants"
        issues['specific_fixes'] = [
            "Look for explicit 'never been here' language",
            "Reviews mentioning staff interaction show visits",
            "Detailed service descriptions indicate actual experience",
            "Use business responses that deny customer relationship as strong evidence"
        ]
    
    return issues

def create_consolidated_prompting_strategy(all_issues, validation_results):
    """Create consolidated prompting improvements at the end"""
    
    print(f"\n{'='*80}")
    print("CONSOLIDATED PROMPTING IMPROVEMENT STRATEGY")
    print(f"{'='*80}")
    
    print("\nACCURACY SUMMARY BY CATEGORY:")
    for category, results in validation_results.items():
        print(f"{category:25}: {results['accuracy']:5.1f}% accurate")
    
    print("\nMAJOR ISSUES IDENTIFIED:")
    for issue in all_issues:
        if issue['main_problem']:
            print(f"\n{issue['category']}:")
            print(f"  Problem: {issue['main_problem']}")
            for fix in issue['specific_fixes']:
                print(f"  Fix: {fix}")
    
    print(f"\nCORRECTED PROMPT STRATEGY:")
    print("""
TASK: Classify this review for policy violations using STRICT criteria.

CONTEXT:
Business: [Business Name]
Category: [Business Category] 
Rating: [1-5 stars]
Review: "[Review Text]"
Business Response: "[If available]"

STRICT CLASSIFICATION RULES:

1. ADVERTISEMENT - Only if contains:
   - Actual website URLs (www.site.com, http://)
   - Specific promo codes ("use code SAVE20")
   - Phone numbers with "call us at"
   - Direct business promotion language
   NOT customer enthusiasm or recommendations

2. IRRELEVANT CONTENT - Only if review explicitly discusses:
   - Topics completely unrelated to business type
   - Wrong business entirely
   - Personal matters unrelated to the service
   NOT food reviews for restaurants or service reviews for services

3. RANT WITHOUT VISIT - Only if explicitly states OR business confirms:
   - "Never been here but..."
   - "Haven't visited but heard..."
   - "Based on what others told me..."
   - Business responds "never been a customer" or similar
   NOT detailed negative experiences (these show actual visits)

4. ACCEPTABLE REVIEW - Default for legitimate customer experiences:
   - Any review discussing actual business experience
   - Positive, negative, or neutral customer feedback
   - Reviews matching business category appropriately

DEFAULT: When in doubt, classify as ACCEPTABLE REVIEW
Most customer reviews should be acceptable unless clear violations exist.

ANSWER: [Category]
REASONING: [Why this specific classification]
""")

# Main execution
if __name__ == "__main__":
    # First analyze the specific example
    print("STEP 1: ANALYZING SPECIFIC EXAMPLE")
    analyze_specific_example()
    
    print("\n\nSTEP 2: FULL DATASET VALIDATION")
    df = pd.read_csv('classified_reviews_cleaned.csv')
    validation_results = comprehensive_label_validation(df)
    
    print(f"\nValidation complete. The specific example demonstrates how business responses can confirm 'Rant without visit' violations.")

STEP 1: ANALYZING SPECIFIC EXAMPLE
SPECIFIC EXAMPLE ANALYSIS
GMAP_ID: 0x872b46a9afb11747:0xbab4f9c1c4625e5e
REVIEWER NAME: jay Jaysam2415
RATING: 1
REVIEW TEXT: All I can say is SMH about this place be careful
BUSINESS CATEGORY: Transmission shop, Auto machine shop, Auto repair shop, Auto tune up service, Brake shop, Car repair and maintenance
BUSINESS RESPONSE: Jay, we strive to provide excellent service to our customers. We do not show that we have ever done business with you and you have never been a customer at our shop. I do see that you have recently reviewed 3 other transmission shops around the valley. We would appreciate the opportunity to clear up any misunderstandings and ask that you please contact our shop at your convenience.

------------------------------------------------------------
ANALYSIS BREAKDOWN:
------------------------------------------------------------
Review Length: 48 characters
Rating: 1/5 (negative)
Has Business Response: Yes

Detailed Pattern Analysis:


# got to improve

In [27]:
def display_examples_by_suggested_label(df, max_examples_per_category=10):
    """Find and display examples for each suggested label category"""
    
    print("\n" + "="*80)
    print("EXAMPLES BY SUGGESTED LABEL CATEGORIES")
    print("="*80)
    
    # Target categories to find examples for
    target_categories = [
        'Acceptable review',
        'Low quality review', 
        'Rant without visit',
        'Irrelevant content',
        'Advertisement'
    ]
    
    # Store examples for each category
    examples_by_category = {category: [] for category in target_categories}
    
    print("Analyzing entire dataset to find examples for each suggested category...")
    
    # Analyze all reviews to get suggested labels
    for idx, row in df.iterrows():
        text = str(row['text'])
        business_category = str(row.get('category_str', 'Unknown'))
        rating = row.get('rating', 'N/A')
        has_response = str(row.get('resp', '')).strip() not in ['', 'nan', 'None']
        resp_text = row.get('resp', '')
        current_label = row.get('classification', 'Unknown')
        
        # Get suggested label
        analysis = analyze_single_review(text, current_label, business_category, rating, has_response, resp_text)
        suggested_label = analysis['suggested_label']
        
        # Store example if it matches our target categories and we need more examples
        if suggested_label in target_categories and len(examples_by_category[suggested_label]) < max_examples_per_category:
            examples_by_category[suggested_label].append({
                'text': text,
                'current_label': current_label,
                'business_category': business_category,
                'rating': rating,
                'has_response': has_response,
                'confidence': analysis['confidence'],
                'reason': analysis['reason']
            })
        
        # Check if we have enough examples for all categories
        if all(len(examples) >= max_examples_per_category for examples in examples_by_category.values()):
            break
    
    # Display examples for each category
    for category in target_categories:
        examples = examples_by_category[category]
        print(f"\n{'='*60}")
        print(f"SUGGESTED LABEL: {category.upper()}")
        print(f"{'='*60}")
        print(f"Found {len(examples)} examples")
        
        if not examples:
            print("No examples found for this category")
            continue
            
        for i, example in enumerate(examples, 1):
            print(f"\n{i}. EXAMPLE:")
            print(f"   TEXT: {example['text']}")
            print(f"   BUSINESS CATEGORY: {example['business_category']}")
            print(f"   RATING: {example['rating']}")
            print(f"   CURRENT LABEL: {example['current_label']}")
            print(f"   HAS BUSINESS RESPONSE: {example['has_response']}")
            print(f"   CONFIDENCE: {example['confidence']}")
            print(f"   REASONING: {example['reason']}")
            print("-" * 60)
    
    # Summary statistics
    print(f"\n{'='*60}")
    print("SUMMARY OF SUGGESTED LABELS")
    print(f"{'='*60}")
    
    for category in target_categories:
        count = len(examples_by_category[category])
        print(f"{category:25}: {count:3d} examples found")
    
    total_examples = sum(len(examples) for examples in examples_by_category.values())
    print(f"{'Total examples':25}: {total_examples:3d}")

# Run the example display function
if __name__ == "__main__":
    # Add this after the main validation code
    display_examples_by_suggested_label(df, max_examples_per_category=10)


EXAMPLES BY SUGGESTED LABEL CATEGORIES
Analyzing entire dataset to find examples for each suggested category...

SUGGESTED LABEL: ACCEPTABLE REVIEW
Found 10 examples

1. EXAMPLE:
   TEXT: Great people. A one stop store for anything you might need.
   BUSINESS CATEGORY: Grocery store, Propane supplier
   RATING: 5
   CURRENT LABEL: Acceptable review
   HAS BUSINESS RESPONSE: False
   CONFIDENCE: Medium
   REASONING: Legitimate review about business experience
------------------------------------------------------------

2. EXAMPLE:
   TEXT: Always spend too much!
   BUSINESS CATEGORY: Warehouse store, Department store
   RATING: 5
   CURRENT LABEL: Acceptable review
   HAS BUSINESS RESPONSE: False
   CONFIDENCE: Medium
   REASONING: Legitimate review about business experience
------------------------------------------------------------

3. EXAMPLE:
   TEXT: Bring your appetite. Best tortas in town.
   BUSINESS CATEGORY: Mexican restaurant, Breakfast restaurant, Burrito restaurant, Rest