# GradientBoost Classifier

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings 
warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
df=pd.read_csv('Travel.csv')
df.head()

Unnamed: 0,CustomerID,ProdTaken,Age,TypeofContact,CityTier,DurationOfPitch,Occupation,Gender,NumberOfPersonVisiting,NumberOfFollowups,ProductPitched,PreferredPropertyStar,MaritalStatus,NumberOfTrips,Passport,PitchSatisfactionScore,OwnCar,NumberOfChildrenVisiting,Designation,MonthlyIncome
0,200000,1,41.0,Self Enquiry,3,6.0,Salaried,Female,3,3.0,Deluxe,3.0,Single,1.0,1,2,1,0.0,Manager,20993.0
1,200001,0,49.0,Company Invited,1,14.0,Salaried,Male,3,4.0,Deluxe,4.0,Divorced,2.0,0,3,1,2.0,Manager,20130.0
2,200002,1,37.0,Self Enquiry,1,8.0,Free Lancer,Male,3,4.0,Basic,3.0,Single,7.0,1,3,0,0.0,Executive,17090.0
3,200003,0,33.0,Company Invited,1,9.0,Salaried,Female,2,3.0,Basic,3.0,Divorced,2.0,1,5,1,1.0,Executive,17909.0
4,200004,0,,Self Enquiry,1,8.0,Small Business,Male,2,3.0,Basic,4.0,Divorced,1.0,0,5,1,0.0,Executive,18468.0


In [3]:
# Age
df.Age.fillna(df.Age.median(),inplace=True)

# TypeofContact
df.TypeofContact.fillna(df.TypeofContact.mode()[0],inplace=True)

#DurationofPitch
df.DurationOfPitch.fillna(df.DurationOfPitch.median(),inplace=True)

#NumberOfFollowups
df.NumberOfFollowups.fillna(df.NumberOfFollowups.mode()[0],inplace=True)

#PreferredPropertyStar
df.PreferredPropertyStar.fillna(df.PreferredPropertyStar.mode()[0],inplace=True)

#NumberOfTrips
df.NumberOfTrips.fillna(df.NumberOfTrips.median(0),inplace=True)

#NumberOfChildrenVisiting
df.NumberOfChildrenVisiting.fillna(df.NumberOfChildrenVisiting.mode()[0],inplace=True)

#MonthlyIncome
df.MonthlyIncome.fillna(df.MonthlyIncome.median(),inplace=True)





In [4]:
df.drop('CustomerID',inplace=True,axis=1)

In [5]:
# Merging two columns
df['TotalVisiting']=df['NumberOfChildrenVisiting']+df['NumberOfPersonVisiting']
df.drop(columns=['NumberOfPersonVisiting','NumberOfChildrenVisiting'],axis=1)

Unnamed: 0,ProdTaken,Age,TypeofContact,CityTier,DurationOfPitch,Occupation,Gender,NumberOfFollowups,ProductPitched,PreferredPropertyStar,MaritalStatus,NumberOfTrips,Passport,PitchSatisfactionScore,OwnCar,Designation,MonthlyIncome,TotalVisiting
0,1,41.0,Self Enquiry,3,6.0,Salaried,Female,3.0,Deluxe,3.0,Single,1.0,1,2,1,Manager,20993.0,3.0
1,0,49.0,Company Invited,1,14.0,Salaried,Male,4.0,Deluxe,4.0,Divorced,2.0,0,3,1,Manager,20130.0,5.0
2,1,37.0,Self Enquiry,1,8.0,Free Lancer,Male,4.0,Basic,3.0,Single,7.0,1,3,0,Executive,17090.0,3.0
3,0,33.0,Company Invited,1,9.0,Salaried,Female,3.0,Basic,3.0,Divorced,2.0,1,5,1,Executive,17909.0,3.0
4,0,36.0,Self Enquiry,1,8.0,Small Business,Male,3.0,Basic,4.0,Divorced,1.0,0,5,1,Executive,18468.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4883,1,49.0,Self Enquiry,3,9.0,Small Business,Male,5.0,Deluxe,4.0,Unmarried,2.0,1,1,1,Manager,26576.0,4.0
4884,1,28.0,Company Invited,1,31.0,Salaried,Male,5.0,Basic,3.0,Single,3.0,1,3,1,Executive,21212.0,6.0
4885,1,52.0,Self Enquiry,3,17.0,Salaried,Female,4.0,Standard,4.0,Married,7.0,0,1,1,Senior Manager,31820.0,7.0
4886,1,19.0,Self Enquiry,3,16.0,Small Business,Male,4.0,Basic,3.0,Single,3.0,0,5,0,Executive,20289.0,5.0


In [6]:
## train test split
from sklearn.model_selection import train_test_split
X=df.drop(['ProdTaken'],axis=1)
y=df['ProdTaken']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.20,random_state=10)

In [7]:
# Creating Column Transformer with 3 types of Transformer
cat_features=X.select_dtypes(include="object").columns
num_features = X.select_dtypes(exclude="object").columns
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
numeric_transformer=StandardScaler()
ohe_transformer=OneHotEncoder(drop='first')
preprocessor=ColumnTransformer([
    ("OneHotEncoder",ohe_transformer,cat_features),
    ("StandardScaler",numeric_transformer,num_features)
])

In [8]:
X_train=preprocessor.fit_transform(X_train)

In [9]:
X_test=preprocessor.transform(X_test)

In [12]:

from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier,GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix,precision_score,recall_score,f1_score,roc_auc_score

In [13]:
models={
    "Decisiontree":DecisionTreeClassifier(),
    "AdaBoostClassifier":AdaBoostClassifier(),
    "GradientBoostingClassifier":GradientBoostingClassifier(),
    "Random Forest":RandomForestClassifier()}
for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train) ## Training Model

    ## Making Prediction
    y_train_pred=model.predict(X_train)
    y_test_pred=model.predict(X_test)

    ## Training set performance
    model_train_accuracy=accuracy_score(y_train_pred,y_train)
    model_train_f1score=f1_score(y_train_pred,y_train,average='weighted')
    model_train_cr=classification_report(y_train_pred,y_train)
    model_train_recall=recall_score(y_train_pred,y_train)

    
    # ## Testing set performance
    model_test_accuracy=accuracy_score(y_test_pred,y_test)
    model_test_f1score=f1_score(y_test_pred,y_test,average='weighted')
    model_test_cr=classification_report(y_test_pred,y_test)
    model_test_recall=recall_score(y_test_pred,y_test)

    print(list(models.keys())[i])
    print("Model performance for training set")
    print('Accuracy:{:.4f}'.format(model_train_accuracy))
    print('f1score:{:.4f}'.format(model_train_f1score))
    print('recallscore:{:.4f}'.format(model_train_recall))
    
    print('************************')

    print("Model performance for test set")
    print('Accuracy:{:.4f}'.format(model_test_accuracy))
    print('f1score:{:.4f}'.format(model_test_f1score))
    print('recallscore:{:.4f}'.format(model_test_recall))
    #print('='*35)
    print('\n')

Decisiontree
Model performance for training set
Accuracy:1.0000
f1score:1.0000
recallscore:1.0000
************************
Model performance for test set
Accuracy:0.8896
f1score:0.8857
recallscore:0.6373


AdaBoostClassifier
Model performance for training set
Accuracy:0.8432
f1score:0.8752
recallscore:0.7744
************************
Model performance for test set
Accuracy:0.8579
f1score:0.8898
recallscore:0.7115


GradientBoostingClassifier
Model performance for training set
Accuracy:0.8864
f1score:0.8985
recallscore:0.8523
************************
Model performance for test set
Accuracy:0.8793
f1score:0.8977
recallscore:0.7792


Random Forest
Model performance for training set
Accuracy:1.0000
f1score:1.0000
recallscore:1.0000
************************
Model performance for test set
Accuracy:0.9284
f1score:0.9354
recallscore:0.9505


