In [1]:
# Import libraries

import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

In [2]:
# Load the data

train_df = pd.read_csv('C:/Users/xpert/Downloads/churn-bigml-80.csv')
test_df = pd.read_csv('C:/Users/xpert/Downloads/churn-bigml-20.csv')

print(train_df.head())
print(train_df.info())

  State  Account length  Area code International plan Voice mail plan  \
0    KS             128        415                 No             Yes   
1    OH             107        415                 No             Yes   
2    NJ             137        415                 No              No   
3    OH              84        408                Yes              No   
4    OK              75        415                Yes              No   

   Number vmail messages  Total day minutes  Total day calls  \
0                     25              265.1              110   
1                     26              161.6              123   
2                      0              243.4              114   
3                      0              299.4               71   
4                      0              166.7              113   

   Total day charge  Total eve minutes  Total eve calls  Total eve charge  \
0             45.07              197.4               99             16.78   
1             27.47   

In [3]:
# Separate features and target

x_train = train_df.drop("Churn", axis=1)
y_train = train_df["Churn"]

x_test = test_df.drop("Churn", axis=1)
y_test = test_df["Churn"]

In [4]:
# Encode categorical features using One-Hot Encoding

x_train = pd.get_dummies(x_train, drop_first=True)
x_test = pd.get_dummies(x_test, drop_first=True)

# Align train and test columns (important after One-Hot encoding)
x_train, x_test = x_train.align(x_test, join='left', axis=1, fill_value=0)

In [5]:
# Scale the features

scale = StandardScaler()
x_train_scaled = scale.fit_transform(x_train)
x_test_scaled = scale.transform(x_test)

In [6]:
# Train classification models
log_model = LogisticRegression(max_iter=1000)
log_model.fit(x_train_scaled, y_train)

y_pred_log = log_model.predict(x_test_scaled)

# Decision Tree
dt = DecisionTreeClassifier(random_state=42)
dt.fit(x_train, y_train)

y_pred_dt = dt.predict(x_test)

# Random Forest
rf = RandomForestClassifier(random_state=42)
rf.fit(x_train, y_train)

y_pred_rf = rf.predict(x_test)

In [7]:
# Evaluate each model

print("Logistic Regression")
print("Accuracy:", accuracy_score(y_test, y_pred_log))
print("Precision:", precision_score(y_test, y_pred_log))
print("Recall:", recall_score(y_test, y_pred_log))
print("F1-score:", f1_score(y_test, y_pred_log))

print("\nDecision Tree")
print("Accuracy:", accuracy_score(y_test, y_pred_dt))
print("Precision:", precision_score(y_test, y_pred_dt))
print("Recall:", recall_score(y_test, y_pred_dt))
print("F1-score:", f1_score(y_test, y_pred_dt))

print("\nRandom Forest")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Precision:", precision_score(y_test, y_pred_rf))
print("Recall:", recall_score(y_test, y_pred_rf))
print("F1-score:", f1_score(y_test, y_pred_rf))

Logistic Regression
Accuracy: 0.8590704647676162
Precision: 0.5106382978723404
Recall: 0.25263157894736843
F1-score: 0.3380281690140845

Decision Tree
Accuracy: 0.9340329835082459
Precision: 0.7802197802197802
Recall: 0.7473684210526316
F1-score: 0.7634408602150538

Random Forest
Accuracy: 0.9415292353823088
Precision: 0.9827586206896551
Recall: 0.6
F1-score: 0.7450980392156863


In [10]:
# Hyperparameter tuning for Random Forest (gives biggest improvement in F1-score/accuracy)
# Define the parameter grid

param_grid = {
    "n_estimators":[100, 200],
    "max_depth":[None, 20],
    "min_samples_split":[2, 5],
    "min_samples_leaf":[1, 2]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(
    estimator = RandomForestClassifier(random_state=42),
    param_grid = param_grid,
    cv=3,
    scoring='f1',
    n_jobs=-1
)

# Fit GridSearchCV on the training data

grid_search.fit(x_train, y_train)

# Get best parameters and the best model
print("Best parameters found:", grid_search.best_params_)
best_rf = grid_search.best_estimator_

Best parameters found: {'max_depth': 20, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}


In [11]:
# Evaluate the tuned model on the test set
y_pred_best = best_rf.predict(x_test)

print("\nTuned Random Forest Performance")
print("Accuracy:", accuracy_score(y_test, y_pred_best))
print("Precision:", precision_score(y_test, y_pred_best))
print("Recall:", recall_score(y_test, y_pred_best))
print("F1-score:", f1_score(y_test, y_pred_best))



Tuned Random Forest Performance
Accuracy: 0.9445277361319341
Precision: 1.0
Recall: 0.6105263157894737
F1-score: 0.7581699346405228


## Interpretation

The perfect precision (1.0) indicates that all customers predicted as churn actually churned â€” there were no false positives.

The recall (61%) shows that while the model is very cautious, it does not capture all churn cases.

The F1-score improvement confirms a better balance between precision and recall compared to the untuned model.

High accuracy reflects strong overall classification performance, especially given class imbalance.