In [None]:
# Load Models & Data
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json

model_7day = joblib.load('../churn_model_7day.pkl')
model_30day = joblib.load('../churn_model_30day.pkl')

with open('../feature_metadata.json', 'r') as f:
    metadata = json.load(f)
    feature_names = metadata['feature_names']

In [None]:
# Feature Importance Comparison
fig, axes = plt.subplots(1, 2, figsize=(16, 8))

In [None]:
# 7-day model
importance_7day = pd.DataFrame({
    'feature': feature_names,
    'importance': model_7day.feature_importances_
}).sort_values('importance', ascending=True).tail(15)

axes[0].barh(importance_7day['feature'], importance_7day['importance'], color='#8b5cf6')
axes[0].set_title('Top 15 Features - 7-Day Model', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Importance')


In [None]:
# 30-day model
importance_30day = pd.DataFrame({
    'feature': feature_names,
    'importance': model_30day.feature_importances_
}).sort_values('importance', ascending=True).tail(15)

axes[1].barh(importance_30day['feature'], importance_30day['importance'], color='#10b981')
axes[1].set_title('Top 15 Features - 30-Day Model', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Importance')

plt.tight_layout()
plt.savefig('../visualizations/feature_importance_comparison.png', dpi=300)
plt.show()

In [None]:
# Feature Groups Analysis
feature_groups = {
    'Engagement': ['engagement_score', 'session_frequency_per_week',               'total_playtime_hours', 'play_intensity'],
    'Inactivity': ['days_since_last_login', 'recency_score',                   'inactivity_risk', 'activity_decay'],
    'Spending': ['total_spending_usd', 'spending_per_hour',                 'is_spender', 'purchase_frequency'],
    'Social': ['friend_count', 'chat_messages_sent',               'social_score', 'is_social']
}

group_importance = {}
for group, features in feature_groups.items():
    group_features = [f for f in features if f in feature_names]
    if group_features:
        indices = [feature_names.index(f) for f in group_features]
        total_importance = model_7day.feature_importances_[indices].sum()
        group_importance[group] = total_importance

plt.figure(figsize=(10, 6))
plt.bar(group_importance.keys(), group_importance.values(), color='#f59e0b')
plt.title('Feature Importance by Category', fontsize=14, fontweight='bold')
plt.ylabel('Total Importance')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('../visualizations/group_importance.png', dpi=300)
plt.show()

print("\nFeature Group Importance:")
for group, imp in sorted(group_importance.items(), key=lambda x: x[1], reverse=True):
    print(f"{group}: {imp:.4f}")