# Customer Churn Analysis - Interactive Notebook

This notebook demonstrates how to use the Customer Churn Analytics project interactively.

## 1. Load Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys

# Add src to path
sys.path.append('../src')

# Set plot style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("Libraries loaded successfully!")

## 2. Load and Explore Data

In [None]:
# Load the customer churn data
df = pd.read_csv('../data/customer_churn_data.csv')

print(f"Dataset shape: {df.shape}")
df.head()

In [None]:
# Basic statistics
df.describe()

In [None]:
# Churn distribution
churn_dist = df['Churn'].value_counts()
print("\nChurn Distribution:")
print(churn_dist)
print(f"\nChurn Rate: {(df['Churn'] == 'Yes').sum() / len(df) * 100:.2f}%")

## 3. Visualize Churn Patterns

In [None]:
# Churn by Contract Type
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Count plot
pd.crosstab(df['Contract'], df['Churn']).plot(kind='bar', ax=axes[0], color=['#2ecc71', '#e74c3c'])
axes[0].set_title('Churn Count by Contract Type', fontweight='bold')
axes[0].set_xlabel('Contract Type')
axes[0].set_ylabel('Count')
axes[0].legend(['No Churn', 'Churn'])

# Percentage plot
churn_by_contract = df.groupby('Contract')['Churn'].apply(lambda x: (x == 'Yes').sum() / len(x) * 100)
churn_by_contract.plot(kind='bar', ax=axes[1], color='#e74c3c')
axes[1].set_title('Churn Rate by Contract Type', fontweight='bold')
axes[1].set_xlabel('Contract Type')
axes[1].set_ylabel('Churn Rate (%)')

plt.tight_layout()
plt.show()

In [None]:
# Tenure vs Monthly Charges (colored by churn)
plt.figure(figsize=(12, 6))

for churn_status in ['No', 'Yes']:
    data = df[df['Churn'] == churn_status]
    plt.scatter(data['Tenure'], data['MonthlyCharges'], 
               alpha=0.5, label=f'Churn: {churn_status}', s=30)

plt.xlabel('Tenure (months)', fontsize=12)
plt.ylabel('Monthly Charges ($)', fontsize=12)
plt.title('Tenure vs Monthly Charges by Churn Status', fontweight='bold', fontsize=14)
plt.legend()
plt.grid(alpha=0.3)
plt.show()

## 4. Run Full EDA Pipeline

In [None]:
from exploratory_analysis import ChurnEDA

# Create EDA instance
eda = ChurnEDA('../data/customer_churn_data.csv')

# Generate summary statistics
eda.generate_summary_statistics()

## 5. Train Models

In [None]:
from preprocess_data import ChurnDataPreprocessor
from train_models import ChurnModelTrainer

# Preprocess data
preprocessor = ChurnDataPreprocessor('../data/customer_churn_data.csv')
preprocessor.handle_missing_values()
preprocessor.engineer_features()
preprocessor.encode_categorical_features()
X_train, X_test, y_train, y_test = preprocessor.prepare_for_modeling()

print("Data preprocessing completed!")

In [None]:
# Train models
trainer = ChurnModelTrainer(X_train, X_test, y_train, y_test)

# Train individual models
trainer.train_logistic_regression()
trainer.train_random_forest()
trainer.train_gradient_boosting()

In [None]:
# Compare models
comparison = trainer.compare_models()
comparison

## 6. Make Predictions on New Data

In [None]:
import pickle

# Load the best model
with open('../models/best_model.pkl', 'rb') as f:
    best_model = pickle.load(f)

# Make predictions on test set
predictions = best_model.predict(X_test)
prediction_proba = best_model.predict_proba(X_test)[:, 1]

# Create results dataframe
results = pd.DataFrame({
    'Actual': y_test.values,
    'Predicted': predictions,
    'Churn_Probability': prediction_proba
})

print("Sample predictions:")
results.head(10)

## 7. Key Insights

Based on the analysis:

1. **Contract Type**: Month-to-month contracts have significantly higher churn rates
2. **Tenure**: Customers with shorter tenure are more likely to churn
3. **Monthly Charges**: Higher monthly charges correlate with increased churn probability
4. **Support Services**: Tech support and online security services reduce churn
5. **Payment Method**: Electronic check users tend to have higher churn rates

### Recommendations:
- Focus retention efforts on month-to-month customers
- Engage new customers early (first 12 months)
- Promote value-added services (tech support, security)
- Review pricing strategy for high-charge customers
- Encourage more stable payment methods