# Telecom Churn Exploratory Data Analysis

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')

In [None]:
df = pd.read_csv('../data/WA_Fn-UseC_-Telco-Customer-Churn.csv')
print(f"Dataset shape: {df.shape}")
df.head()

In [None]:
df.info()

In [None]:
print("Missing values:")
df.isnull().sum()

In [None]:
churn_counts = df['Churn'].value_counts()
print(churn_counts)
print(f"Churn rate: {churn_counts['Yes'] / len(df) * 100:.2f}%")

In [None]:
plt.figure(figsize=(8, 6))
df['Churn'].value_counts().plot(kind='bar', color=['green', 'red'])
plt.title('Churn Distribution')
plt.xlabel('Churn')
plt.ylabel('Count')
plt.xticks(rotation=0)
plt.show()

In [None]:
df.describe()

In [None]:
pd.crosstab(df['Contract'], df['Churn'], normalize='index') * 100

In [None]:
plt.figure(figsize=(12, 6))
df[df['Churn'] == 'No']['tenure'].hist(bins=30, alpha=0.5, label='No Churn', color='green')
df[df['Churn'] == 'Yes']['tenure'].hist(bins=30, alpha=0.5, label='Churn', color='red')
plt.xlabel('Tenure (months)')
plt.ylabel('Frequency')
plt.title('Tenure Distribution by Churn')
plt.legend()
plt.show()