# Evolver Loop 4 Analysis: Understanding exp_003 Failure

**Objective**: Analyze why enhanced keyword features experiment (exp_003) degraded performance from 0.6253 to 0.6196

**Key questions**:
1. Why did count-based keywords perform worse than binary indicators?
2. What can we learn about feature engineering effectiveness?
3. What should we try next based on this failure?

In [None]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

# Load data
train_path = '/home/data/train.json'
with open(train_path, 'r') as f:
    train_data = json.load(f)
train_df = pd.DataFrame(train_data)

print(f"Training samples: {len(train_df)}")
print(f"Positive rate: {train_df['requester_received_pizza'].mean():.3f}")
print(f"\nFirst few rows:")
print(train_df[['request_title', 'request_text', 'requester_received_pizza']].head())

In [None]:
# Analyze keyword patterns in successful vs failed requests
def extract_keywords(text, keywords):
    """Extract keyword counts from text"""
    if pd.isna(text):
        return {kw: 0 for kw in keywords}
    
    text_lower = text.lower()
    counts = {}
    for kw in keywords:
        # Use word boundaries for accurate matching
        import re
        pattern = r'\b' + re.escape(kw) + r'\b'
        counts[kw] = len(re.findall(pattern, text_lower))
    return counts

# Keywords from exp_003 (original + new)
original_keywords = ['thanks', 'thank', 'please', 'because', 'pay', 'forward']
new_keywords = ['appreciate', 'grateful', 'children', 'family', 'need', 'help', 'desperate', 'hungry']
all_keywords = original_keywords + new_keywords

print("Analyzing keyword patterns...")

# Combine title and text for analysis
train_df['full_text'] = train_df['request_title'].fillna('') + ' ' + train_df['request_text'].fillna('')

# Extract keyword counts for all samples
keyword_data = []
for idx, row in train_df.iterrows():
    counts = extract_keywords(row['full_text'], all_keywords)
    counts['requester_received_pizza'] = row['requester_received_pizza']
    keyword_data.append(counts)

keyword_df = pd.DataFrame(keyword_data)

# Calculate success rates by keyword presence
print("\n" + "="*60)
print("KEYWORD ANALYSIS: Success rates by presence/absence")
print("="*60)

results = []
for kw in all_keywords:
    present = keyword_df[keyword_df[kw] > 0]
    absent = keyword_df[keyword_df[kw] == 0]
    
    if len(present) > 20:  # Only analyze keywords with sufficient samples
        success_rate_present = present['requester_received_pizza'].mean()
        success_rate_absent = absent['requester_received_pizza'].mean()
        lift = success_rate_present - success_rate_absent
        
        results.append({
            'keyword': kw,
            'present_count': len(present),
            'absent_count': len(absent),
            'success_rate_present': success_rate_present,
            'success_rate_absent': success_rate_absent,
            'lift': lift,
            'baseline': train_df['requester_received_pizza'].mean()
        })

results_df = pd.DataFrame(results).sort_values('lift', ascending=False)
print(results_df.round(4))