### Data Loading and Cleaning

In [2]:
import pandas as pd
import numpy as np

# 1. Load the dataset
try:
    df = pd.read_csv('data/Telco-Customer-Churn.csv')
    print("Dataset loaded successfully.")
except FileNotFoundError:
    print("Error: File not found. Check if the CSV is in the 'data' folder.")

# 2. Fix 'TotalCharges'
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

# 3. Handle Missing Values
df['TotalCharges'] = df['TotalCharges'].fillna(0)

# 4. Remove Useless Columns
if 'customerID' in df.columns:
    df.drop(columns=['customerID'], inplace=True)

# 5. Fix Target Variable
df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

print(f"Remaining Missing Values: {df.isnull().sum().sum()}")
df.head()

Dataset loaded successfully.
Remaining Missing Values: 0


Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,0
1,Male,0,No,No,34,Yes,No,DSL,Yes,No,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,0
2,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,1
3,Male,0,No,No,45,No,No phone service,DSL,Yes,No,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,0
4,Female,0,No,No,2,Yes,No,Fiber optic,No,No,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,1
