# Telco Customer Churn – Multi‑Model + E‑waste Notebook

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neural_network import MLPClassifier

from imblearn.over_sampling import SMOTE
import xgboost as xgb
import warnings

warnings.filterwarnings("ignore")
sns.set_style("darkgrid")

plt.rcParams["figure.figsize"] = (6, 4)

## 1. Load dataset

In [None]:
DATA_PATH = "Dataset/WA_Fn-UseC_-Telco-Customer-Churn.csv"
df = pd.read_csv(DATA_PATH)
df.head()

### Target distribution (Churn vs No Churn)

In [None]:
plt.figure()
sns.countplot(x="Churn", data=df, palette="crest")
plt.title("Original Churn Distribution")
plt.tight_layout()
plt.show()

## 2. Basic cleaning

In [None]:
df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")
df["TotalCharges"].fillna(df["TotalCharges"].mean(), inplace=True)
df.replace(["No internet service", "No phone service"], "No", inplace=True)
df["Churn"] = (df["Churn"] == "Yes").astype(int)

## 3. Feature engineering

In [None]:
conditions = [
    (df.tenure <= 12),
    (df.tenure > 12) & (df.tenure <= 24),
    (df.tenure > 24) & (df.tenure <= 36),
    (df.tenure > 36) & (df.tenure <= 48),
    (df.tenure > 48) & (df.tenure <= 60),
    (df.tenure > 60),
]
choices = [0, 1, 2, 3, 4, 5]
df["tenure_range"] = np.select(conditions, choices)

df["MonthlyCharges"] = np.log1p(df["MonthlyCharges"])
df["TotalCharges"] = np.log1p(df["TotalCharges"])

plt.figure()
sns.countplot(x="tenure_range", hue="Churn", data=df, palette="crest")
plt.title("Churn by Tenure Range")
plt.tight_layout()
plt.show()

## 4. One-hot encoding

In [None]:
cat_cols = df.select_dtypes(include=["object"]).columns.tolist()
if "customerID" in cat_cols:
    cat_cols.remove("customerID")

df_encoded = pd.get_dummies(df, columns=cat_cols, drop_first=False)
df_encoded.head()

## 5. Train / Test split

In [None]:
X = df_encoded.drop(columns=["customerID", "Churn"])
y = df_encoded["Churn"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

X_train.shape, X_test.shape

## 6. Handle imbalance with SMOTE

In [None]:
sm = SMOTE(random_state=0)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

plt.figure()
sns.countplot(x=y_train_res, palette="crest")
plt.title("Class distribution after SMOTE")
plt.xticks([0, 1], ["No churn", "Churn"])
plt.tight_layout()
plt.show()

## 7. E‑waste functions

In [None]:
DEVICE_MASS_KG = 0.8
DEVICES_PER_CUSTOMER = 1
DEVICE_LIFETIME_YEARS = 5

def estimate_ewaste_mn_over_l(device_weight, num_devices, lifetime_years):
    if lifetime_years <= 0:
        raise ValueError("Lifetime (L) must be > 0.")
    return (device_weight * num_devices) / lifetime_years

def ewaste_from_churn_predictions(y_pred, device_mass_kg=DEVICE_MASS_KG,
                                  devices_per_customer=DEVICES_PER_CUSTOMER,
                                  lifetime_years=DEVICE_LIFETIME_YEARS):
    num_churn_customers = int(np.sum(y_pred))
    num_devices = num_churn_customers * devices_per_customer
    ewaste_kg_per_year = estimate_ewaste_mn_over_l(device_mass_kg, num_devices, lifetime_years)
    return ewaste_kg_per_year, num_churn_customers, num_devices

example_E = estimate_ewaste_mn_over_l(0.8, 1000, 5)
example_E

### Confusion matrix helper

In [None]:
def plot_confusion_matrix(y_true, y_pred, title):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure()
    sns.heatmap(cm, annot=True, fmt="d", cmap="crest")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.title(title)
    plt.tight_layout()
    plt.show()

ewaste_results = {}


## 8. Logistic Regression

In [None]:
log_model = LogisticRegression(C=200, max_iter=1000)
log_model.fit(X_train_res, y_train_res)

log_pred = log_model.predict(X_test)
print(classification_report(y_test, log_pred))

plot_confusion_matrix(y_test, log_pred, "Logistic Regression")

log_ewaste, log_n, log_d = ewaste_from_churn_predictions(log_pred)
ewaste_results["Logistic Regression"] = log_ewaste
log_ewaste, log_n, log_d

## 9. SVM (Linear)

In [None]:
svm_model = SVC(kernel="linear", C=20)
svm_model.fit(X_train_res, y_train_res)

svm_pred = svm_model.predict(X_test)
print(classification_report(y_test, svm_pred))

plot_confusion_matrix(y_test, svm_pred, "SVM (Linear)")

svm_ewaste, svm_n, svm_d = ewaste_from_churn_predictions(svm_pred)
ewaste_results["SVM (Linear)"] = svm_ewaste
svm_ewaste, svm_n, svm_d

## 10. XGBoost

In [None]:
params = {
    "min_child_weight": [1, 5, 10],
    "gamma": [1.5, 2, 5, 7],
    "subsample": [0.6, 0.8, 1.0],
    "colsample_bytree": [0.6, 0.8, 1.0],
    "max_depth": [4, 5, 8],
}

xgb_base = xgb.XGBClassifier(
    learning_rate=0.08,
    n_estimators=100,
    objective="binary:logistic",
    eval_metric="logloss",
    use_label_encoder=False,
)

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=0)

random_search = RandomizedSearchCV(
    xgb_base, param_distributions=params, n_iter=5,
    scoring="roc_auc", cv=skf.split(X_train_res, y_train_res),
    random_state=0, verbose=0
)

random_search.fit(X_train_res, y_train_res)
xgb_pred = random_search.predict(X_test)

print(classification_report(y_test, xgb_pred))

plot_confusion_matrix(y_test, xgb_pred, "XGBoost")

xgb_ewaste, xgb_n, xgb_d = ewaste_from_churn_predictions(xgb_pred)
ewaste_results["XGBoost"] = xgb_ewaste
xgb_ewaste, xgb_n, xgb_d

## 11. MLP (Neural Network)

In [None]:
mlp_model = MLPClassifier(random_state=1, max_iter=500)
mlp_model.fit(X_train_res, y_train_res)

mlp_pred = mlp_model.predict(X_test)
print(classification_report(y_test, mlp_pred))

plot_confusion_matrix(y_test, mlp_pred, "MLP Neural Network")

mlp_ewaste, mlp_n, mlp_d = ewaste_from_churn_predictions(mlp_pred)
ewaste_results["MLP"] = mlp_ewaste
mlp_ewaste, mlp_n, mlp_d

## 12. Compare E‑waste across models

In [None]:
models = list(ewaste_results.keys())
ewaste_values = [ewaste_results[m] for m in models]

plt.figure(figsize=(7, 4))
sns.barplot(x=models, y=ewaste_values, palette="crest")
plt.ylabel("E-waste (kg/year)")
plt.title("E-waste estimate per model")
plt.xticks(rotation=20)
plt.tight_layout()
plt.show()

ewaste_results