# Exploratory Data Analysis (EDA)
## Telco Customer Churn Prediction

Analysis of customer data to identify factors contributing to churn.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from pathlib import Path

sns.set_style("whitegrid")
%matplotlib inline

### 1. Load Data

In [None]:
# Load data using pathlib for robustness
PROJECT_ROOT = Path("..").resolve()
DATA_PATH = PROJECT_ROOT / "data" / "raw" / "Telco-Customer-Churn.csv"

if not DATA_PATH.exists():
    # Fallback if notebook is not in 'notebooks' dir when running
    DATA_PATH = Path("..") / "data" / "raw" / "Telco-Customer-Churn.csv"

df = pd.read_csv(DATA_PATH)
print(f"Dataset Shape: {df.shape}")
df.head()

### 2. Data Cleaning Check

In [None]:
# Check missing values
print(df.isnull().sum())

# Check data types
print(df.dtypes)

# TotalCharges is often object type, need to convert
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
print(f"Missing TotalCharges after conversion: {df['TotalCharges'].isnull().sum()}")

### 3. Target Distribution (Churn)

In [None]:
target_count = df['Churn'].value_counts()
fig = px.pie(values=target_count.values, names=target_count.index, title='Churn Distribution')
fig.show()

### 4. Categorical Feature Analysis

In [None]:
def plot_categorical(feature):
    plt.figure(figsize=(10, 5))
    sns.countplot(x=feature, hue='Churn', data=df, palette='viridis')
    plt.title(f'Churn by {feature}')
    plt.show()

plot_categorical('Contract')
plot_categorical('PaymentMethod')
plot_categorical('InternetService')

### 5. Numerical Feature Analysis

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(x='Tenure', hue='Churn', data=df, multiple='stack', palette='coolwarm')
plt.title('Churn by Tenure')
plt.show()

plt.figure(figsize=(10, 5))
sns.histplot(x='MonthlyCharges', hue='Churn', data=df, multiple='stack', palette='coolwarm')
plt.title('Churn by Monthly Charges')
plt.show()