# Classification Algorithm Comparison

## 1. Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

%matplotlib inline

## 2. Load Dataset

In [None]:
# Load the dataset
df = pd.read_csv('data/telco_churn.csv')
print(f'Dataset Shape: {df.shape}')
df.head()

## 3. Exploratory Data Analysis (EDA)

In [None]:
# Check for missing values
df.isnull().sum()

In [None]:
# Check data types
df.info()

In [None]:
# Target Variable Distribution
plt.figure(figsize=(6, 4))
sns.countplot(x='Churn', data=df)
plt.title('Churn Distribution')
plt.show()

In [None]:
# Convert TotalCharges to numeric (it has some empty strings)
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')
df.isnull().sum()

In [None]:
# Numerical Features Distribution
numerical_features = ['tenure', 'MonthlyCharges', 'TotalCharges']
df[numerical_features].hist(bins=30, figsize=(10, 7))
plt.show()