In [2]:
import pandas as pd
import json

In [3]:
df = pd.read_csv('reddit_earnings_cleaned.csv')
hourly = df['hourly_rate'].dropna()

In [4]:
Q1, Q3 = hourly.quantile(0.25), hourly.quantile(0.75)
IQR = Q3 - Q1
outlier_mask = (hourly > Q3 + 1.5 * IQR)

In [11]:
# Final statistics
final_stats = {
    'total_posts': int(len(df)),
    'hourly_rate_posts': int(len(hourly)),
    'mean_hourly': float(round(hourly.mean(), 2)),
    'median_hourly': float(round(hourly.median(), 2)),
    'trimmed_mean': float(round(hourly[(hourly >= hourly.quantile(0.1)) & 
                                       (hourly <= hourly.quantile(0.9))].mean(), 2)),
    'pct_below_15': float(round((hourly < 15).mean() * 100, 1)),
    'pct_below_20': float(round((hourly < 20).mean() * 100, 1)),
    'outlier_count': int(outlier_mask.sum()),
    'q1': float(round(Q1, 2)),
    'q3': float(round(Q3, 2)),
    'iqr': float(round(IQR, 2))
}

In [12]:
# Save final stats
with open('reddit_final_stats.json', 'w') as f:
    json.dump(final_stats, f, indent=2)

print("=== FINAL REDDIT STATISTICS ===")
print(f"Sample Size: {final_stats['hourly_rate_posts']} posts")
print(f"\nCentral Tendency:")
print(f"  Mean: ${final_stats['mean_hourly']}/hour")
print(f"  Median: ${final_stats['median_hourly']}/hour (more reliable)")
print(f"  Trimmed Mean: ${final_stats['trimmed_mean']}/hour")
print(f"\nDistribution:")
print(f"  25th percentile: ${final_stats['q1']}")
print(f"  75th percentile: ${final_stats['q3']}")
print(f"  {final_stats['pct_below_15']}% earn below $15/hour")
print(f"  {final_stats['pct_below_20']}% earn below $20/hour")
print(f"\nData Quality:")
print(f"  Outliers: {final_stats['outlier_count']} posts (kept in analysis)")

=== FINAL REDDIT STATISTICS ===
Sample Size: 88 posts

Central Tendency:
  Mean: $25.3/hour
  Median: $21.5/hour (more reliable)
  Trimmed Mean: $23.18/hour

Distribution:
  25th percentile: $15.0
  75th percentile: $30.0
  21.6% earn below $15/hour
  34.1% earn below $20/hour

Data Quality:
  Outliers: 5 posts (kept in analysis)
