In [None]:

"""
# Customer Churn Analysis - Exploratory Data Analysis
## GlobalStream Inc. - STANDARD Package Delivery

### 1. Project Overview
- **Client**: GlobalStream Inc.
- **Objective**: Identify key drivers of customer churn
- **Dataset**: 5,000 customers, 15 features
- **Timeline**: 2-week analysis

### 2. Initial Data Assessment
"""

# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from data_processing import DataProcessor
from feature_engineering import FeatureEngineer
from exploratory_analysis import EDA

# Load and inspect data
print("📁 Loading customer data...")
processor = DataProcessor('data/raw/customer_data.csv')
processor.load_data()

print("\\n📋 Data Overview:")
print(f"Shape: {processor.df.shape}")
print(f"Columns: {list(processor.df.columns)}")

print("\\n📊 First look at the data:")
display(processor.df.head())

print("\\n🔍 Data types and missing values:")
display(processor.df.info())

"""
### 3. Data Quality Assessment
"""

# Check for missing values
missing_data = processor.df.isnull().sum()
missing_percent = (missing_data / len(processor.df)) * 100

print("\\n📉 Missing Value Analysis:")
missing_report = pd.DataFrame({
    'missing_count': missing_data,
    'missing_percent': missing_percent
}).sort_values('missing_percent', ascending=False)

display(missing_report[missing_report['missing_count'] > 0])

"""
### 4. Target Variable Analysis
"""

# Analyze churn distribution
if 'churn' in processor.df.columns:
    churn_distribution = processor.df['churn'].value_counts()
    churn_rate = processor.df['churn'].mean()
    
    print(f"\\n🎯 Churn Distribution:")
    print(f"Retained: {churn_distribution[0]:,} customers ({churn_distribution[0]/len(processor.df):.1%})")
    print(f"Churned: {churn_distribution[1]:,} customers ({churn_distribution[1]/len(processor.df):.1%})")
    print(f"Overall Churn Rate: {churn_rate:.1%}")

    # Visualize churn distribution
    plt.figure(figsize=(8, 6))
    plt.pie(churn_distribution, labels=['Retained', 'Churned'], autopct='%1.1f%%', 
            colors=['lightblue', 'lightcoral'], startangle=90)
    plt.title('Customer Churn Distribution')
    plt.show()

"""
### 5. Feature Distributions
"""

# Analyze numerical features
numerical_cols = processor.df.select_dtypes(include=[np.number]).columns

print("\\n📈 Numerical Features Summary:")
display(processor.df[numerical_cols].describe())

# Create distribution plots for key numerical features
key_numerical = ['monthly_charges', 'tenure_days', 'viewing_hours', 'devices_connected']

fig, axes = plt.subplots(2, 2, figsize=(12, 10))
axes = axes.ravel()

for i, col in enumerate(key_numerical):
    if col in processor.df.columns:
        processor.df[col].hist(bins=30, ax=axes[i], alpha=0.7, color='steelblue')
        axes[i].set_title(f'Distribution of {col}')
        axes[i].set_xlabel(col)
        axes[i].set_ylabel('Frequency')

plt.tight_layout()
plt.show()

"""
### 6. Initial Correlation Analysis
"""

# Calculate correlations with churn
if 'churn' in processor.df.columns:
    numerical_with_churn = numerical_cols.tolist() + ['churn']
    correlation_matrix = processor.df[numerical_with_churn].corr()
    
    # Focus on churn correlations
    churn_correlations = correlation_matrix['churn'].sort_values(ascending=False)
    
    print("\\n🔗 Correlation with Churn:")
    display(pd.DataFrame(churn_correlations).T)

"""
### 7. Next Steps
Based on initial analysis, we will proceed with:
1. Data cleaning and preprocessing
2. Advanced feature engineering
3. Deep-dive statistical analysis
4. Customer segmentation
5. Insight generation and recommendations
"""

: 