# Customer Churn EDA â€” Insurance Dataset
**Author:** Nagul Meera Shaik | Data Analyst

Exploratory analysis to understand churn patterns, demographics, and policy behavior.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Load dataset
df = pd.read_csv('../data/raw/insurance_churn.csv')
print('Shape:', df.shape)
print('\nColumns:', df.columns.tolist())
print('\nChurn Rate:', df['churn'].mean().round(4) * 100, '%')
df.head()

In [None]:
# Missing values check
missing = df.isnull().sum()
print('Missing Values:\n', missing[missing > 0])

# Data types
print('\nData Types:\n', df.dtypes)

In [None]:
# Churn distribution
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Churn count
df['churn'].value_counts().plot(kind='bar', ax=axes[0], color=['#63D2FF','#FF7B7B'], edgecolor='black')
axes[0].set_title('Churn Distribution', fontweight='bold')
axes[0].set_xticklabels(['Retained', 'Churned'], rotation=0)
axes[0].set_ylabel('Count')

# Churn rate by policy type (if column exists)
if 'policy_type' in df.columns:
    df.groupby('policy_type')['churn'].mean().sort_values().plot(
        kind='barh', ax=axes[1], color='#7BF5A0')
    axes[1].set_title('Churn Rate by Policy Type', fontweight='bold')
    axes[1].set_xlabel('Churn Rate')

plt.tight_layout()
plt.savefig('../reports/churn_distribution.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Correlation heatmap
numeric_cols = df.select_dtypes(include=[np.number]).columns
plt.figure(figsize=(10, 8))
sns.heatmap(df[numeric_cols].corr(), annot=True, fmt='.2f',
            cmap='coolwarm', center=0, square=True)
plt.title('Feature Correlation Matrix', fontweight='bold', fontsize=13)
plt.tight_layout()
plt.savefig('../reports/correlation_heatmap.png', dpi=150, bbox_inches='tight')
plt.show()