In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier
import warnings
warnings.filterwarnings("ignore")

In [2]:
df=pd.read_csv("cust_churn_preprocessed_data.csv")

In [3]:
# splitting the features and target
x = df[['tenure', 'MonthlyCharges','TotalCharges','OnlineSecurity_Yes','Contract_Two year']]
y = df["Churn_Yes"]

In [4]:
sd=StandardScaler()
X_t=sd.fit_transform(x)

param_grid = {'criterion':['gini', 'entropy', 'log_loss'],
              'max_features': [None,'sqrt','log2'],
              'n_estimators':[10,100]} 
grid=GridSearchCV(RandomForestClassifier(), param_grid, refit=True, verbose=3,n_jobs=-1)
grid.fit(X_t,y)

Fitting 5 folds for each of 18 candidates, totalling 90 fits


In [5]:
result=grid.cv_results_
y_test_pred = grid.predict(X_t)
print("Accuracy Score:\n", accuracy_score(y, y_test_pred))
print("Confusion Matrix:\n", confusion_matrix(y, y_test_pred))
print("Classification Report:\n", classification_report(y, y_test_pred))
table=pd.DataFrame.from_dict(result)
table

Accuracy Score:
 0.9879292757565454
Confusion Matrix:
 [[4451   39]
 [  32 1360]]
Classification Report:
               precision    recall  f1-score   support

           0       0.99      0.99      0.99      4490
           1       0.97      0.98      0.97      1392

    accuracy                           0.99      5882
   macro avg       0.98      0.98      0.98      5882
weighted avg       0.99      0.99      0.99      5882



Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_criterion,param_max_features,param_n_estimators,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.150335,0.004416,0.004134,0.000584,gini,,10,"{'criterion': 'gini', 'max_features': None, 'n...",0.790994,0.785896,0.779762,0.786565,0.789966,0.786637,0.003947,12
1,2.122059,0.217221,0.027037,0.003352,gini,,100,"{'criterion': 'gini', 'max_features': None, 'n...",0.780799,0.790994,0.779762,0.79932,0.788265,0.787828,0.007166,10
2,0.088717,0.010917,0.004922,0.001216,gini,sqrt,10,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.783347,0.788445,0.789966,0.777211,0.788265,0.785447,0.004682,14
3,1.156775,0.099376,0.04281,0.013009,gini,sqrt,100,"{'criterion': 'gini', 'max_features': 'sqrt', ...",0.792693,0.784197,0.787415,0.788265,0.791667,0.788847,0.003058,8
4,0.101005,0.018079,0.005005,0.001709,gini,log2,10,"{'criterion': 'gini', 'max_features': 'log2', ...",0.789295,0.779949,0.77551,0.785714,0.789966,0.784087,0.005568,16
5,1.068678,0.082312,0.032258,0.00942,gini,log2,100,"{'criterion': 'gini', 'max_features': 'log2', ...",0.795242,0.795242,0.795068,0.791667,0.794218,0.794287,0.001364,1
6,0.270449,0.032907,0.008186,0.006844,entropy,,10,"{'criterion': 'entropy', 'max_features': None,...",0.779949,0.794393,0.784864,0.784014,0.784864,0.785617,0.004751,13
7,2.843322,0.241332,0.029312,0.00633,entropy,,100,"{'criterion': 'entropy', 'max_features': None,...",0.787596,0.792693,0.789116,0.787415,0.796769,0.790718,0.00357,4
8,0.144308,0.023382,0.006296,0.003294,entropy,sqrt,10,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.783347,0.788445,0.788265,0.77551,0.791667,0.785447,0.005635,14
9,1.644384,0.152632,0.039418,0.01605,entropy,sqrt,100,"{'criterion': 'entropy', 'max_features': 'sqrt...",0.787596,0.790994,0.789966,0.787415,0.791667,0.789527,0.001739,7


In [6]:
filename="final_model_sales.sav"
pickle.dump(grid,open(filename,'wb'))

In [7]:
input_data = {
    'tenure':2, 
    'MonthlyCharges':29.5,
    'TotalCharges':28.5, 
    'OnlineSecurity_Yes':1, 
    'Contract_Two year':0
}

In [8]:
input_data_df = pd.DataFrame([input_data])
preinput=sd.transform(input_data_df)
loaded_model=pickle.load(open("final_model_sales.sav",'rb'))
prediction=loaded_model.predict(preinput)
pred_prob = loaded_model.predict_proba(preinput)
print(prediction)
print(f"Prediction: {'Churn' if prediction[0] == 1 else 'No Churn'}")
print(f"Prediciton Probability: {pred_prob}")



[1]
Prediction: Churn
Prediciton Probability: [[0.35 0.65]]
