# 📊 Telco Customer Churn Analysis: Customer Retention Insights

Comprehensive analysis of telecom customer churn patterns. Identifying at-risk customers and retention strategies.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# For Kaggle environment
df = pd.read_csv('/kaggle/input/telco-customer-churn/WA_Fn-UseC_-Telco-Customer-Churn.csv')

# For local environment (uncomment if running locally)
# df = pd.read_excel('Telco_customer_churn.xlsx')

sns.set_theme(style='whitegrid')

def data_inv(df):
    print("Number of rows:",df.shape[0])
    print("Dataset variables:", df.shape[1])
    print("-"*10)
    print("Dataset columns:\n")
    print(df.columns)
    print("-"*10)
    print('data-type of each column: \n')
    print(df.dtypes)
    print('-'*10)
    print('missing rows in each column: \n')
    c=df.isnull().sum()
    print(c[c>0])
    print('-'*10)
    print(df.head())
    print('-'*10)
    print(df.tail())

## Data Exploration and Feature Selection

In [None]:
# Note: Column names might be different in Kaggle dataset
# Let's first check the actual column names
print("Dataset columns:")
print(df.columns.tolist())
print("\nDataset shape:", df.shape)
print("\nFirst few rows:")
df.head()

In [None]:
# Check for churn column (might be named differently)
churn_cols = [col for col in df.columns if 'churn' in col.lower()]
print("Churn-related columns:", churn_cols)

# Basic info about the dataset
df.info()

## Churn Analysis

**Note:** The analysis below assumes standard Telco dataset column names. If column names are different in the Kaggle dataset, they will need to be adjusted accordingly.

In [None]:
# Distribution of churn
if 'Churn' in df.columns:
    plt.figure(figsize=(8, 6))
    df['Churn'].value_counts().plot(kind='bar', color=['skyblue', 'orange'])
    plt.title('Customer Churn Distribution')
    plt.xlabel('Churn Status')
    plt.ylabel('Number of Customers')
    plt.xticks(rotation=0)
    plt.show()
    
    print("Churn Rate:", df['Churn'].value_counts(normalize=True))
else:
    print("Churn column not found. Available columns:")
    print(df.columns.tolist())

In [None]:
# Internet Service vs Churn
if 'InternetService' in df.columns and 'Churn' in df.columns:
    plt.figure(figsize=(10, 6))
    pd.crosstab(df['InternetService'], df['Churn'], normalize='index').plot(kind='bar')
    plt.title('Churn Rate by Internet Service Type')
    plt.xlabel('Internet Service')
    plt.ylabel('Churn Rate')
    plt.xticks(rotation=45)
    plt.legend(['No Churn', 'Churn'])
    plt.show()
    
    print("\nChurn rate by Internet Service:")
    print(pd.crosstab(df['InternetService'], df['Churn'], normalize='index'))

In [None]:
# Monthly Charges vs Churn
if 'MonthlyCharges' in df.columns and 'Churn' in df.columns:
    plt.figure(figsize=(10, 6))
    sns.boxplot(x='Churn', y='MonthlyCharges', data=df)
    plt.title('Monthly Charges Distribution by Churn Status')
    plt.xlabel('Churn Status')
    plt.ylabel('Monthly Charges')
    plt.show()
    
    print("\nAverage Monthly Charges by Churn Status:")
    print(df.groupby('Churn')['MonthlyCharges'].mean())

In [None]:
# Tenure vs Churn
if 'tenure' in df.columns and 'Churn' in df.columns:
    plt.figure(figsize=(12, 6))
    sns.histplot(data=df, x='tenure', hue='Churn', bins=30, alpha=0.7)
    plt.title('Customer Tenure Distribution by Churn Status')
    plt.xlabel('Tenure (Months)')
    plt.ylabel('Number of Customers')
    plt.show()
    
    print("\nAverage Tenure by Churn Status:")
    print(df.groupby('Churn')['tenure'].mean())

In [None]:
# Contract Type vs Churn
if 'Contract' in df.columns and 'Churn' in df.columns:
    plt.figure(figsize=(10, 6))
    pd.crosstab(df['Contract'], df['Churn'], normalize='index').plot(kind='bar')
    plt.title('Churn Rate by Contract Type')
    plt.xlabel('Contract Type')
    plt.ylabel('Churn Rate')
    plt.xticks(rotation=45)
    plt.legend(['No Churn', 'Churn'])
    plt.show()
    
    print("\nChurn rate by Contract Type:")
    print(pd.crosstab(df['Contract'], df['Churn'], normalize='index'))

## Key Insights

Based on the analysis above, here are the key findings:

1. **Internet Service Impact**: Fiber optic customers typically show higher churn rates compared to DSL customers
2. **Contract Duration**: Month-to-month contracts have significantly higher churn rates
3. **Tenure Effect**: Customers with longer tenure are less likely to churn
4. **Pricing Sensitivity**: Higher monthly charges correlate with increased churn probability

## Business Recommendations

1. **Retention Strategy**: Focus on fiber optic customers with targeted retention campaigns
2. **Contract Incentives**: Encourage longer-term contracts with discounts or additional services
3. **Early Intervention**: Implement early warning systems for new customers (first 6 months)
4. **Value Proposition**: Enhance service value for high-paying customers to justify pricing