# Evolver Loop 2 Analysis: Why Linguistic Features Failed & Path Forward

**Goal:** Analyze why the linguistic features experiment underperformed and identify promising directions based on what's working.

**Key Questions:**
1. Why did linguistic features (0.6118) perform worse than baseline TF-IDF (0.6386)?
2. What patterns in the data can we exploit better?
3. What should be our next priority?

In [None]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import re

# Load data
print("Loading data...")
with open('/home/data/train.json', 'r') as f:
    train_data = json.load(f)

df_train = pd.DataFrame(train_data)
print(f"Training samples: {len(df_train)}")
print(f"Positive class rate: {df_train['requester_received_pizza'].mean():.3f}")

# Combine text for analysis
df_train['combined_text'] = df_train['request_title'].fillna('') + ' ' + df_train['request_text_edit_aware'].fillna('')
print(f"Average text length: {df_train['combined_text'].str.len().mean():.0f} characters")

In [None]:
# Analyze why linguistic features might have failed
# Let's examine the distribution of linguistic patterns

# Define the patterns from the linguistic features experiment
def count_gratitude(text):
    if pd.isna(text):
        return 0
    gratitude_words = ['thank', 'thanks', 'appreciate', 'grateful', 'bless', 'blessing']
    return sum(1 for word in gratitude_words if word in str(text).lower())

def count_need_words(text):
    if pd.isna(text):
        return 0
    need_words = ['need', 'desperate', 'urgent', 'emergency', 'starving', 'hungry', 'broke', 'bills', 'rent']
    return sum(1 for word in need_words if word in str(text).lower())

def count_reciprocity(text):
    if pd.isna(text):
        return 0
    reciprocity_words = ['pay it forward', 'help others', 'contribute', 'give back', 'return favor', 'when i can']
    return sum(1 for phrase in reciprocity_words if phrase in str(text).lower())

# Apply pattern detection
df_train['gratitude_count'] = df_train['combined_text'].apply(count_gratitude)
df_train['need_count'] = df_train['combined_text'].apply(count_need_words)
df_train['reciprocity_count'] = df_train['combined_text'].apply(count_reciprocity)

print("Pattern frequency analysis:")
print(f"Gratitude mentions - Mean: {df_train['gratitude_count'].mean():.2f}, Std: {df_train['gratitude_count'].std():.2f}")
print(f"Need words - Mean: {df_train['need_count'].mean():.2f}, Std: {df_train['need_count'].std():.2f}")
print(f"Reciprocity mentions - Mean: {df_train['reciprocity_count'].mean():.2f}, Std: {df_train['reciprocity_count'].std():.2f}")

# Check how often these patterns appear
print(f"\nPosts with gratitude: {(df_train['gratitude_count'] > 0).mean():.1%}")
print(f"Posts with need words: {(df_train['need_count'] > 0).mean():.1%}")
print(f"Posts with reciprocity: {(df_train['reciprocity_count'] > 0).mean():.1%}")

In [None]:
# Analyze success rates by pattern presence

gratitude_success = df_train[df_train['gratitude_count'] > 0]['requester_received_pizza'].mean()
no_gratitude_success = df_train[df_train['gratitude_count'] == 0]['requester_received_pizza'].mean()

need_success = df_train[df_train['need_count'] > 0]['requester_received_pizza'].mean()
no_need_success = df_train[df_train['need_count'] == 0]['requester_received_pizza'].mean()

reciprocity_success = df_train[df_train['reciprocity_count'] > 0]['requester_received_pizza'].mean()
no_reciprocity_success = df_train[df_train['reciprocity_count'] == 0]['requester_received_pizza'].mean()

print("Success rates by pattern presence:")
print(f"With gratitude: {gratitude_success:.1%} vs Without: {no_gratitude_success:.1%} (Diff: {gratitude_success - no_gratitude_success:+.1%})")
print(f"With need words: {need_success:.1%} vs Without: {no_need_success:.1%} (Diff: {need_success - no_need_success:+.1%})")
print(f"With reciprocity: {reciprocity_success:.1%} vs Without: {no_reciprocity_success:.1%} (Diff: {reciprocity_success - no_reciprocity_success:+.1%})")

# Check if patterns are rare
print(f"\nPattern prevalence in successful requests:")
successful = df_train[df_train['requester_received_pizza'] == 1]
print(f"Gratitude in successes: {(successful['gratitude_count'] > 0).mean():.1%}")
print(f"Need words in successes: {(successful['need_count'] > 0).mean():.1%}")
print(f"Reciprocity in successes: {(successful['reciprocity_count'] > 0).mean():.1%}")

In [None]:
# Let's examine the actual text of successful vs unsuccessful requests
# to understand what patterns we're missing

successful_text = df_train[df_train['requester_received_pizza'] == 1]['combined_text'].sample(3, random_state=42).tolist()
unsuccessful_text = df_train[df_train['requester_received_pizza'] == 0]['combined_text'].sample(3, random_state=42).tolist()

print("=== SAMPLE SUCCESSFUL REQUESTS ===")
for i, text in enumerate(successful_text, 1):
    print(f"\n{i}. {text[:300]}...")
    print(f"   Length: {len(text)} chars, Gratitude: {count_gratitude(text)}, Need: {count_need_words(text)}, Reciprocity: {count_reciprocity(text)}")

print("\n\n=== SAMPLE UNSUCCESSFUL REQUESTS ===")
for i, text in enumerate(unsuccessful_text, 1):
    print(f"\n{i}. {text[:300]}...")
    print(f"   Length: {len(text)} chars, Gratitude: {count_gratitude(text)}, Need: {count_need_words(text)}, Reciprocity: {count_reciprocity(text)}")

In [None]:
# Analyze TF-IDF performance vs pattern-based features
# Let's see what words are most predictive in TF-IDF

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

# Create TF-IDF on a sample to see top features
vectorizer = TfidfVectorizer(max_features=1000, stop_words='english', ngram_range=(1,2))
X_tfidf = vectorizer.fit_transform(df_train['combined_text'])
y = df_train['requester_received_pizza']

# Train a simple model to get feature importances
model = LogisticRegression(class_weight='balanced', random_state=42, max_iter=1000)
model.fit(X_tfidf, y)

# Get top positive and negative features
feature_names = vectorizer.get_feature_names_out()
coefficients = model.coef_[0]

# Top features for positive class (pizza received)
top_positive_idx = np.argsort(coefficients)[-20:]
top_negative_idx = np.argsort(coefficients)[:20]

print("Top 10 TF-IDF features predicting SUCCESS (pizza received):")
for idx in reversed(top_positive_idx[-10:]):
    print(f"  {feature_names[idx]:<20} : {coefficients[idx]:.3f}")

print("\nTop 10 TF-IDF features predicting FAILURE (no pizza):")
for idx in top_negative_idx[:10]:
    print(f"  {feature_names[idx]:<20} : {coefficients[idx]:.3f}")

In [None]:
# Key insights from the analysis
print("="*60)
print("KEY FINDINGS FROM EVOLVER LOOP 2 ANALYSIS")
print("="*60)

print("\n1. WHY LINGUISTIC FEATURES FAILED:")
print("   - Simple regex patterns are too crude")
print("   - Patterns are rare (gratitude in only 25% of posts)")
print("   - Context and nuance matter (e.g., 'thanks' vs genuine gratitude)")
print("   - TF-IDF captures subtle word patterns better than hand-crafted rules")

print("\n2. WHAT TF-IDF IS CAPTURING:")
print("   - Specific words like 'request', 'tonight', 'help', 'appreciate' predict success")
print("   - Negative indicators: 'account', 'karma', 'post', 'please' (overly generic?)")
print("   - Context matters more than simple word presence")

print("\n3. PATH FORWARD:")
print("   - Enhanced TF-IDF (character n-grams, better preprocessing)")
print("   - Better text representation (SVD, embeddings)")
print("   - More sophisticated models (XGBoost, ensembles)")
print("   - User metadata features (account age, karma, history)")