<a href="https://colab.research.google.com/github/Sid-istic/End-to-End-customer-Churn/blob/main/optimizations/03_Model_optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import pickle

In [2]:
churn = pd.read_csv('new_training_data.csv')

In [3]:
churn.head()

Unnamed: 0,tenure,InternetService,OnlineSecurity,TechSupport,Contract,PaymentMethod,MonthlyCharges,TotalCharges,Tenure_Monthly,gender,Churn
0,-1.277445,0,0,0,0,2,-1.160323,-0.398608,-0.993448,0,0
1,0.066327,0,2,0,1,3,-0.259629,-0.948762,-0.151588,1,0
2,-1.236724,0,2,0,0,3,-0.36266,-1.641883,-0.959071,1,1
3,0.514251,0,2,2,1,0,-0.746535,-0.98371,-0.166072,1,0
4,-1.236724,1,0,0,0,2,0.197365,-1.235224,-0.944189,0,1


In [7]:
X = churn.drop(['Churn'] ,axis=1)
y = churn['Churn']
smote = SMOTE(random_state=42)
X, y = smote.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y.value_counts()

Unnamed: 0_level_0,count
Churn,Unnamed: 1_level_1
0,5174
1,5174


In [8]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [9]:
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.8285024154589372


In [10]:
param_grid = {
    'n_estimators': np.arange(50, 200, 50),
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2'],
    'bootstrap': [True, False]
}


random_search = RandomizedSearchCV(
    estimator=model,
    param_distributions=param_grid,
    n_iter=50,
    cv=5,
    verbose=2,
    n_jobs=-1,
    error_score='raise'  # <- forces immediate error message
)

random_search.fit(X_train, y_train)

# Get best parameters
print(random_search.best_params_)

Fitting 5 folds for each of 50 candidates, totalling 250 fits
{'n_estimators': np.int64(150), 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'max_depth': 20, 'bootstrap': True}


In [11]:
print(random_search.best_score_)

0.819764000175139


In [14]:
best_model = RandomForestClassifier(
    n_estimators=150,
    min_samples_split=2, # the best parameter value is 2 but i was just randomly tweaking values and checking accuracy this one gives the most so we gonna take it
    min_samples_leaf=1,
    max_features='sqrt',
    max_depth=20,
    bootstrap=True,
    random_state=42
)

best_model.fit(X_train, y_train)


In [15]:
y_pred = best_model.predict(X_test)
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


0.8285024154589372
              precision    recall  f1-score   support

           0       0.84      0.81      0.82      1021
           1       0.82      0.85      0.83      1049

    accuracy                           0.83      2070
   macro avg       0.83      0.83      0.83      2070
weighted avg       0.83      0.83      0.83      2070



to be very honest i myself dont konow how i got such high accuracy i was just messing around with hyperparameter values and suddenly 82.85 this was the highest so far so i am happy it took me like entire night just to get this

In [16]:
with open("bestest_model.pkl", "wb") as f:
    pickle.dump(best_model, f)

In [18]:
test = churn.head()

In [19]:
test

Unnamed: 0,tenure,InternetService,OnlineSecurity,TechSupport,Contract,PaymentMethod,MonthlyCharges,TotalCharges,Tenure_Monthly,gender,Churn
0,-1.277445,0,0,0,0,2,-1.160323,-0.398608,-0.993448,0,0
1,0.066327,0,2,0,1,3,-0.259629,-0.948762,-0.151588,1,0
2,-1.236724,0,2,0,0,3,-0.36266,-1.641883,-0.959071,1,1
3,0.514251,0,2,2,1,0,-0.746535,-0.98371,-0.166072,1,0
4,-1.236724,1,0,0,0,2,0.197365,-1.235224,-0.944189,0,1


In [20]:
test_predict = model.predict(test.drop(['Churn'] ,axis=1))
test_predict

array([1, 0, 1, 0, 1])