In [None]:
import pandas as pd
import numpy as np
import os

# ------------------------------
# File paths (relative paths)
# ------------------------------
RAW_PATH = "data/raw/WA_Fn-UseC_-Telco-Customer-Churn.csv"
PROCESSED_PATH = "data/processed/Telco-Customer-Churn-Clean.csv"

# Make sure processed folder exists
os.makedirs("data/processed", exist_ok=True)

# ------------------------------
# Load data
# ------------------------------
df = pd.read_csv(RAW_PATH)

print("Shape:", df.shape)
print("\nData types:\n")
print(df.dtypes)
print("\nMissing values:\n")
print(df.isna().sum())

# ------------------------------
# Convert TotalCharges â†’ numeric
# ------------------------------
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")
df["TotalCharges"] = df["TotalCharges"].fillna(df["TotalCharges"].median())

# ------------------------------
# Add ServicesCount feature
# ------------------------------
service_cols = [
    'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
    'OnlineBackup', 'DeviceProtection', 'TechSupport',
    'StreamingTV', 'StreamingMovies'
]

df["ServicesCount"] = df[service_cols].apply(lambda x: (x == "Yes").sum(), axis=1)

# ------------------------------
# Final sanity check
# ------------------------------
print("\nMissing values after cleaning:\n")
print(df.isna().sum())

df.head()
df.describe(include='all')

# ------------------------------
# Save cleaned dataset
# ------------------------------
df.to_csv(PROCESSED_PATH, index=False)
print("Saved cleaned data to:", PROCESSED_PATH)
