In [3]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
import optuna 
from sklearn.ensemble import AdaBoostClassifier,RandomForestClassifier,GradientBoostingClassifier
from sklearn.metrics import accuracy_score,classification_report,f1_score,recall_score,precision_score

In [2]:
data=pd.read_csv('../Dataset/preprocessed_dataset.csv')
df=data.copy()

In [3]:
df.sample(5)

Unnamed: 0,number_of_dependents,city,contract,total_charges,total_long_distance_charges,total_revenue,tenure,number_of_referrals,customer_status,churn_value
5992,-0.485996,79,1,0.860135,0.556265,0.828248,0.326387,-0.31346,Stayed,0
5293,-0.485996,511,0,0.452353,-0.067548,0.339393,0.244421,-0.647645,Churned,1
6773,0.552958,346,1,-0.691146,-0.578937,-0.720279,-0.780162,1.02328,Stayed,0
4014,2.630867,463,1,-0.569545,-0.892725,-0.699859,-0.165412,0.35491,Stayed,0
4625,1.591913,306,1,0.125333,0.389122,0.217856,0.039504,0.020725,Stayed,0


In [4]:
df.columns

Index(['number_of_dependents', 'city', 'contract', 'total_charges',
       'total_long_distance_charges', 'total_revenue', 'tenure',
       'number_of_referrals', 'customer_status', 'churn_value'],
      dtype='object')

In [5]:
x=df[['number_of_dependents', 'city', 'contract', 'total_charges',
       'total_long_distance_charges', 'total_revenue', 'tenure',
       'number_of_referrals']]
y=df['customer_status']

In [6]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)

In [16]:
models={"Gradient Boosting":GradientBoostingClassifier(),"AdaBoosting":AdaBoostClassifier(),"Random Forest":RandomForestClassifier()}
for i in models.items():
    model=i[1]
    model.fit(x_train,y_train)
    print(model.get_params())
    pred=model.predict(x_test)
    print(i[0],end="\n")
    print("Accuracy:",accuracy_score(y_test,pred),end="\n")
    print("Precision:",precision_score(y_test,pred,average='weighted'),end="\n")
    print("Precision:",recall_score(y_test,pred,average='weighted'),end="\n")
    print("F1-Score:",f1_score(y_test,pred,average='weighted'),end="\n")
    print("------"*10,end="\n")


{'ccp_alpha': 0.0, 'criterion': 'friedman_mse', 'init': None, 'learning_rate': 0.1, 'loss': 'log_loss', 'max_depth': 3, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_iter_no_change': None, 'random_state': None, 'subsample': 1.0, 'tol': 0.0001, 'validation_fraction': 0.1, 'verbose': 0, 'warm_start': False}
Gradient Boosting
Accuracy: 0.8124552612741589
Precision: 0.8027373917661468
Precision: 0.8124552612741589
F1-Score: 0.8052368453093047
------------------------------------------------------------
{'algorithm': 'deprecated', 'estimator': None, 'learning_rate': 1.0, 'n_estimators': 50, 'random_state': None}
AdaBoosting
Accuracy: 0.7465998568360773
Precision: 0.7139640792515666
Precision: 0.7465998568360773
F1-Score: 0.7038752318368289
------------------------------------------------------------
{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 

In [1]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
def objective(trail,data=x,target=y):
    param={
        "subsample":trail.suggest_float('subsample',0.5,1),
        "learning_rate":trail.suggest_float("learning_rate",.0001,.1),
        "n_estimators":trail.suggest_int('n_estimators',50,500),
        "max_depth":trail.suggest_int('max_depth',2,30),
        "min_samples_split":trail.suggest_int('min_samples_split',2,20),
        "min_samples_leaf":trail.suggest_int('min_samples_leaf',1,20)
    }
    model=GradientBoostingClassifier(**param)
    model.fit(x_train,y_train)
    y_pred=model.predict(x_test)
    return accuracy_score(y_test,y_pred)

trials=optuna.create_study(direction='maximize')
trials.optimize(objective,n_trials=5)

NameError: name 'train_test_split' is not defined

In [77]:
params=trials.best_params
model=GradientBoostingClassifier(**params)
model.fit(x_train,y_train)

In [78]:
pred=model.predict(x_test)
print("Training Score:",model.score(x_train,y_train),"\n")
print("Testing Accuracy:",accuracy_score(y_test,pred),"\n")
print("Classification report:\n",classification_report(y_test,pred),"\n")
print("F1-Score:",f1_score(y_test,pred,average='weighted'))

Training Score: 0.8444046553267681 

Testing Accuracy: 0.8346456692913385 

Classification report:
               precision    recall  f1-score   support

     Churned       0.73      0.57      0.64       364
      Joined       0.71      0.56      0.62        98
      Stayed       0.87      0.97      0.92       935

    accuracy                           0.83      1397
   macro avg       0.77      0.70      0.73      1397
weighted avg       0.82      0.83      0.82      1397
 

F1-Score: 0.8246368897198869


In [79]:
import pickle 
pickle.dump(model,open('../Models/Predictor_Model.sav','wb'))