In [None]:
"""
# Feature Analysis & Engineering
## Deep Dive into Customer Behavior Patterns

### Feature Importance Analysis
"""

# Import modules
from feature_engineering import FeatureEngineer
from statistical_tests import StatisticalAnalyzer
from visualization import AdvancedVisualizer

# Load cleaned data
df = pd.read_pickle('data/processed/cleaned_data.pkl')

print("🔧 Starting feature engineering pipeline...")

# Initialize feature engineer
engineer = FeatureEngineer(df)

# Step 1: Create temporal features
print("\\n1. Creating temporal features...")
engineer.create_temporal_features()

# Step 2: Create behavioral features  
print("2. Creating behavioral features...")
engineer.create_behavioral_features()

# Step 3: Create interaction features
print("3. Creating interaction features...")
engineer.create_interaction_features()

# Step 4: Encode categorical variables
print("4. Encoding categorical features...")
engineer.encode_categorical_features()

# Step 5: Feature selection
print("5. Performing feature selection...")
final_df, feature_scores = engineer.select_best_features(target_column='churn', k=15)

print("\\n🎯 Top 10 Most Predictive Features:")
display(feature_scores.head(10))

"""
### Statistical Validation of Features
"""

# Perform statistical tests
analyzer = StatisticalAnalyzer(final_df)
stat_report = analyzer.generate_statistical_report()

print("\\n📊 Statistical Significance Summary:")
significant_tests = [t for t in stat_report['hypothesis_tests'] if t['significant']]
for test in significant_tests[:5]:
    print(f"✅ {test['test']}: p-value = {test['p_value']:.4f}")

"""
### Feature Distribution Analysis
"""

# Analyze distributions of top features
top_features = feature_scores.head(6)['feature'].tolist()

fig, axes = plt.subplots(2, 3, figsize=(15, 10))
axes = axes.ravel()

for i, feature in enumerate(top_features):
    if feature in final_df.columns:
        # Plot distribution by churn status
        final_df[final_df['churn'] == 0][feature].hist(ax=axes[i], alpha=0.7, 
                                                      label='Retained', bins=20)
        final_df[final_df['churn'] == 1][feature].hist(ax=axes[i], alpha=0.7, 
                                                      label='Churned', bins=20)
        axes[i].set_title(f'{feature}\\nDistribution by Churn')
        axes[i].legend()
        axes[i].set_xlabel(feature.replace('_', ' ').title())

plt.tight_layout()
plt.show()

"""
### Correlation Network Analysis
"""

# Create correlation network for top features
import networkx as nx

top_features_network = top_features + ['churn']
corr_network = final_df[top_features_network].corr()

# Create graph
G = nx.Graph()

# Add nodes and edges
for i, feature1 in enumerate(top_features_network):
    for j, feature2 in enumerate(top_features_network):
        if i < j and abs(corr_network.iloc[i, j]) > 0.3:
            G.add_edge(feature1, feature2, weight=abs(corr_network.iloc[i, j]))

# Plot network
plt.figure(figsize=(12, 8))
pos = nx.spring_layout(G)

# Node colors based on correlation with churn
node_colors = [corr_network.loc[node, 'churn'] for node in G.nodes()]

nx.draw_networkx_nodes(G, pos, node_color=node_colors, 
                      node_size=800, cmap='coolwarm', 
                      vmin=-1, vmax=1)
nx.draw_networkx_edges(G, pos, alpha=0.5, 
                      width=[G[u][v]['weight'] * 3 for u, v in G.edges()])
nx.draw_networkx_labels(G, pos, font_size=8)

plt.title('Feature Correlation Network\\nRelationship Strength Visualization')
plt.colorbar(plt.cm.ScalarMappable(cmap='coolwarm', 
                                  norm=plt.Normalize(-1, 1)), 
             label='Correlation with Churn')
plt.axis('off')
plt.show()

print("\\n💡 Feature analysis completed!")