In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
# Data handling ke liye
import pandas as pd
import numpy as np

# Train-test split aur models
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

# Evaluation ke liye
from sklearn.metrics import roc_auc_score, classification_report

# Model save/load ke liye
import joblib


In [19]:
file_path = "/content/drive/MyDrive/Customer_churn_Telecom/data/processed/churn_cleaned.csv"

df = pd.read_csv(file_path)

df.head()


Unnamed: 0,Tenure Months,Monthly Charges,Senior Citizen,Churn Value,Contract_Month-to-month,Contract_One year,Contract_Two year,Internet Service_DSL,Internet Service_Fiber optic,Internet Service_No,Payment Method_Bank transfer (automatic),Payment Method_Credit card (automatic),Payment Method_Electronic check,Payment Method_Mailed check
0,2,53.85,0,1,True,False,False,True,False,False,False,False,False,True
1,2,70.7,0,1,True,False,False,False,True,False,False,False,True,False
2,8,99.65,0,1,True,False,False,False,True,False,False,False,True,False
3,28,104.8,0,1,True,False,False,False,True,False,False,False,True,False
4,49,103.7,0,1,True,False,False,False,True,False,True,False,False,False


In [20]:
# FINAL FEATURES (dashboard-compatible)
features = [
    "Tenure Months",
    "Monthly Charges",
    "Senior Citizen",
    "Contract_Month-to-month",
    "Contract_One year",
    "Contract_Two year",
    "Internet Service_DSL",
    "Internet Service_Fiber optic",
    "Internet Service_No",
    "Payment Method_Bank transfer (automatic)",
    "Payment Method_Credit card (automatic)",
    "Payment Method_Electronic check",
    "Payment Method_Mailed check"
]

# TARGET
target = "Churn Value"

# Filter dataset
df = df[features + [target]]

df.head()


Unnamed: 0,Tenure Months,Monthly Charges,Senior Citizen,Contract_Month-to-month,Contract_One year,Contract_Two year,Internet Service_DSL,Internet Service_Fiber optic,Internet Service_No,Payment Method_Bank transfer (automatic),Payment Method_Credit card (automatic),Payment Method_Electronic check,Payment Method_Mailed check,Churn Value
0,2,53.85,0,True,False,False,True,False,False,False,False,False,True,1
1,2,70.7,0,True,False,False,False,True,False,False,False,True,False,1
2,8,99.65,0,True,False,False,False,True,False,False,False,True,False,1
3,28,104.8,0,True,False,False,False,True,False,False,False,True,False,1
4,49,103.7,0,True,False,False,False,True,False,True,False,False,False,1


In [21]:
X = df.drop(columns=[target])
y = df[target]

X.head()


Unnamed: 0,Tenure Months,Monthly Charges,Senior Citizen,Contract_Month-to-month,Contract_One year,Contract_Two year,Internet Service_DSL,Internet Service_Fiber optic,Internet Service_No,Payment Method_Bank transfer (automatic),Payment Method_Credit card (automatic),Payment Method_Electronic check,Payment Method_Mailed check
0,2,53.85,0,True,False,False,True,False,False,False,False,False,True
1,2,70.7,0,True,False,False,False,True,False,False,False,True,False
2,8,99.65,0,True,False,False,False,True,False,False,False,True,False
3,28,104.8,0,True,False,False,False,True,False,False,False,True,False
4,49,103.7,0,True,False,False,False,True,False,True,False,False,False


In [22]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [23]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)


In [24]:
rf_model = RandomForestClassifier(
    n_estimators=200,     # strong learning
    max_depth=8,          # overfitting control
    random_state=42
)

rf_model.fit(X_train, y_train)


In [25]:
lr_model = LogisticRegression(
    max_iter=1000
)

lr_model.fit(X_train, y_train)


In [26]:
rf_auc = roc_auc_score(y_test, rf_model.predict_proba(X_test)[:, 1])
lr_auc = roc_auc_score(y_test, lr_model.predict_proba(X_test)[:, 1])

print("Random Forest AUC:", rf_auc)
print("Logistic Regression AUC:", lr_auc)


Random Forest AUC: 0.8318523484373949
Logistic Regression AUC: 0.8314627972107613


In [27]:
print("Random Forest Classification Report:\n")
print(classification_report(y_test, rf_model.predict(X_test)))


Random Forest Classification Report:

              precision    recall  f1-score   support

           0       0.83      0.90      0.86      1033
           1       0.64      0.49      0.55       374

    accuracy                           0.79      1407
   macro avg       0.73      0.69      0.71      1407
weighted avg       0.78      0.79      0.78      1407



In [28]:
rf_model.feature_names_in_


array(['Tenure Months', 'Monthly Charges', 'Senior Citizen',
       'Contract_Month-to-month', 'Contract_One year',
       'Contract_Two year', 'Internet Service_DSL',
       'Internet Service_Fiber optic', 'Internet Service_No',
       'Payment Method_Bank transfer (automatic)',
       'Payment Method_Credit card (automatic)',
       'Payment Method_Electronic check', 'Payment Method_Mailed check'],
      dtype=object)

In [30]:
joblib.dump(rf_model, "/content/drive/MyDrive/Customer_churn_Telecom/models/churn_model.pkl")
joblib.dump(lr_model, "/content/drive/MyDrive/Customer_churn_Telecom/models/churn_logistic.pkl")


['/content/drive/MyDrive/Customer_churn_Telecom/models/churn_logistic.pkl']