In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
#To load dataset bank.csv
df=pd.read_csv("bank.csv")
#to display first 5 records
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,0,1,1,0,2343,1,0,2,5,8,1042,1,-1,0,3,1
1,56,0,1,1,0,45,0,0,2,5,8,1467,1,-1,0,3,1
2,41,9,1,1,0,1270,1,0,2,5,8,1389,1,-1,0,3,1
3,55,7,1,1,0,2476,1,0,2,5,8,579,1,-1,0,3,1
4,54,0,1,2,0,184,0,0,2,5,8,673,2,-1,0,3,1


In [3]:
#To show no. of rows and columns
df.shape

(11162, 17)

In [4]:
#Here target : deposit that means we have predict whether the customer will 
#subscribe the term deposit or not in bank.

In [5]:
#To check null values
df.isnull().sum()

age          0
job          0
marital      0
education    0
default      0
balance      0
housing      0
loan         0
contact      0
day          0
month        0
duration     0
campaign     0
pdays        0
previous     0
poutcome     0
deposit      0
dtype: int64

In [6]:
df.dtypes

age          int64
job          int64
marital      int64
education    int64
default      int64
balance      int64
housing      int64
loan         int64
contact      int64
day          int64
month        int64
duration     int64
campaign     int64
pdays        int64
previous     int64
poutcome     int64
deposit      int64
dtype: object

In [7]:
#check data balance or not
df['deposit'].value_counts()

0    5873
1    5289
Name: deposit, dtype: int64

In [8]:
#Select input and output from given dataset
X=df.drop("deposit",axis=1) #input
Y=df["deposit"]#output

In [9]:
#Train_test_split
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.3,random_state=1)

In [10]:
#First apply scaling on input data before train the data
#Apply standard scaler for input data training and testing
from sklearn.preprocessing import StandardScaler
#Create a object of StandardScaler class
ss=StandardScaler()
#means apply standard scaler for X_train data
X_train=ss.fit_transform(X_train)
X_test=ss.transform(X_test)

In [11]:
X_train

array([[ 2.76552511,  0.15596991, -0.32335267, ..., -0.48203214,
        -0.35894691,  0.52287872],
       [ 1.16824122, -1.0830671 , -0.32335267, ..., -0.48203214,
        -0.35894691,  0.52287872],
       [ 1.33637636,  1.39500692, -0.32335267, ..., -0.48203214,
        -0.35894691,  0.52287872],
       ...,
       [-1.18565083, -0.77330785, -0.32335267, ..., -0.48203214,
        -0.35894691,  0.52287872],
       [-0.00870481, -1.39282635,  1.27358436, ...,  3.07264515,
         4.3350541 , -0.47109973],
       [-0.42904267, -1.0830671 , -0.32335267, ..., -0.48203214,
        -0.35894691,  0.52287872]])

In [12]:
#create a function
def create_model(model): 
    model.fit(X_train,Y_train) #train the model
    Y_pred=model.predict(X_test) #test the model
    print(classification_report(Y_test,Y_pred))
    print(confusion_matrix(Y_test,Y_pred))
    return model

In [13]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [14]:
#Use baseline model : LogisticRegression 
from sklearn.linear_model import LogisticRegression 

In [15]:
#Create object of LogisticRegression  class
lr=LogisticRegression ()

In [16]:
#call function
lr=create_model(lr)

              precision    recall  f1-score   support

           0       0.80      0.82      0.81      1760
           1       0.79      0.77      0.78      1589

    accuracy                           0.80      3349
   macro avg       0.80      0.80      0.80      3349
weighted avg       0.80      0.80      0.80      3349

[[1441  319]
 [ 359 1230]]


In [17]:
#apply DecisionTreeClassifier class 
#perform dataset with the help of DecisionTreeClassifier 
#call DecisionTreeClassifier class
from sklearn.tree import DecisionTreeClassifier

In [18]:
#Create object of DecisionTreeClassifier class
dt=DecisionTreeClassifier(random_state=1) #by default gini index

In [19]:
#call function
dt=create_model(dt)

              precision    recall  f1-score   support

           0       0.79      0.80      0.80      1760
           1       0.78      0.77      0.77      1589

    accuracy                           0.78      3349
   macro avg       0.78      0.78      0.78      3349
weighted avg       0.78      0.78      0.78      3349

[[1407  353]
 [ 372 1217]]


In [20]:
#show the information gain of all input features 
IG=dt.feature_importances_
I=X.columns
d={'Input Column':I,'Information Gain':IG}
#converts into dataframe
F=pd.DataFrame(d)
F

Unnamed: 0,Input Column,Information Gain
0,age,0.077136
1,job,0.03422
2,marital,0.016968
3,education,0.014356
4,default,0.000456
5,balance,0.084437
6,housing,0.04262
7,loan,0.006934
8,contact,0.061793
9,day,0.076581


In [21]:
F.sort_values('Information Gain',ascending=False,ignore_index=True)
#descending order

Unnamed: 0,Input Column,Information Gain
0,duration,0.354008
1,month,0.098357
2,balance,0.084437
3,age,0.077136
4,day,0.076581
5,contact,0.061793
6,pdays,0.047856
7,housing,0.04262
8,poutcome,0.03611
9,job,0.03422


In [22]:
#create the tree 
from sklearn import tree
features=X.columns #input features
fig=plt.figure(figsize=(20,20))
#_=tree.plot_tree(dt,feature_names=features,filled=True)
plt.show()

<Figure size 1440x1440 with 0 Axes>

In [23]:
#But we got less score 0.77 % its good but not excellent , 
#region behind less score , overfit means 
#model is overfit so reduced the overfitting situation : - 
#then we use pruning technique 

#How to reduced a overfitting situation By using the Pruning technique : -
#There are 2 types of pruning technique : -
#1. max_depth : inbulit parameter
#2. min_samples_leaf  : inbuilt parameter

# pruning technique

In [24]:
#max_depth : - #Note : max_depth can not more than 8
#1. max_depth parameter 
##create object of DecisionTreeClassifier class and passing the parameter
#max_depth

In [25]:
#create the object of DecisionTreeClassifier and pass the max_depth 
#parameter
dt1=DecisionTreeClassifier(random_state=1,max_depth=5) #by default gini index

In [26]:
#call function
dt1=create_model(dt1)

              precision    recall  f1-score   support

           0       0.85      0.75      0.80      1760
           1       0.75      0.86      0.80      1589

    accuracy                           0.80      3349
   macro avg       0.80      0.80      0.80      3349
weighted avg       0.81      0.80      0.80      3349

[[1316  444]
 [ 228 1361]]


In [27]:
#show the information gain of all input features 
IG=dt1.feature_importances_
I=X.columns
d={'Input Column':I,'Information Gain':IG}
#converts into dataframe
F=pd.DataFrame(d)
F.sort_values('Information Gain',ascending=False,ignore_index=True)
#descending order

Unnamed: 0,Input Column,Information Gain
0,duration,0.576413
1,contact,0.119728
2,housing,0.066337
3,month,0.052438
4,poutcome,0.051127
5,pdays,0.050013
6,previous,0.04495
7,age,0.026846
8,loan,0.003762
9,day,0.003512


In [28]:
#create the tree 
from sklearn import tree
features=X.columns #input features
fig=plt.figure(figsize=(20,20))
#_=tree.plot_tree(dt1,feature_names=features,filled=True)

<Figure size 1440x1440 with 0 Axes>

In [29]:
##2. min_samples_leaf
#2nd purning technique : min_samples_leaf
##create object of DecisionTreeClassifier class
dt2=DecisionTreeClassifier(random_state=1,min_samples_leaf=50) #gini index
#min_samples_leaf =50 or more means not less  50 ,can be more than 50

In [30]:
#call function
dt2=create_model(dt2)

              precision    recall  f1-score   support

           0       0.87      0.78      0.82      1760
           1       0.78      0.87      0.83      1589

    accuracy                           0.83      3349
   macro avg       0.83      0.83      0.83      3349
weighted avg       0.83      0.83      0.83      3349

[[1378  382]
 [ 204 1385]]


In [31]:
#Note : here best method DecisionTree pruning tech : min_samples_leaf=50 
#and method=gini index
#recall .87 means 87% predict

In [32]:
#Randomforest tree : -  it is ensembling tech. it is also used for classifiation
#problem.
#Use Random Forest Tree 
'''
Train the dataset on multiple decisiontreeclassifier algorithm 
'''
#Random Forest Tree 
#call Random Forest Tree from package
from sklearn.ensemble import RandomForestClassifier
#ensemble means to train the same dataset from multiple algorithm 

In [33]:
X.shape

(11162, 16)

In [34]:
#Create object of RandomForestClassifier class
rfc=RandomForestClassifier(random_state=1,n_estimators=100,max_features=4)
#here n_estimators means take how many no. of decisiontree 
#max_features means take how many input 

In [35]:
#call function
rfc=create_model(rfc)

              precision    recall  f1-score   support

           0       0.88      0.81      0.85      1760
           1       0.81      0.88      0.84      1589

    accuracy                           0.85      3349
   macro avg       0.85      0.85      0.85      3349
weighted avg       0.85      0.85      0.85      3349

[[1434  326]
 [ 193 1396]]


In [36]:
#show the information gain of all input features 
IG=rfc.feature_importances_
I=X.columns
d={'Input Column':I,'Information Gain':IG}

#converts into dataframe
F=pd.DataFrame(d)
F.sort_values('Information Gain',ascending=False,ignore_index=True)
#descending order

Unnamed: 0,Input Column,Information Gain
0,duration,0.362295
1,balance,0.089021
2,age,0.088193
3,month,0.083337
4,day,0.071265
5,pdays,0.049338
6,job,0.039578
7,contact,0.038811
8,poutcome,0.03568
9,campaign,0.033898


In [37]:
#Boosting Technique : -
#1. ADA Boost : Adaptor Boosting
#2. Gradient Boost
#3. Extreme Gradient Boost

In [38]:
#Boosting Technique 
#1. ADA Boosting means Adaptor Boosting
from sklearn.ensemble import AdaBoostClassifier

In [39]:
#create the object of AdaBoostClassifier class
ada=AdaBoostClassifier(n_estimators=25,random_state=1)
##AdaBoost create a decision stump means understood use model
#DecisionTreeClassifier
#can be pass the value in n_estimators : no. of input features in given 
#dataset 
# n_estimators can be <=100

In [40]:
#call function
ada=create_model(ada)

              precision    recall  f1-score   support

           0       0.82      0.83      0.82      1760
           1       0.81      0.80      0.80      1589

    accuracy                           0.81      3349
   macro avg       0.81      0.81      0.81      3349
weighted avg       0.81      0.81      0.81      3349

[[1459  301]
 [ 325 1264]]


In [41]:
#Gradient Boosting : - 
#2. Gradient Boosting(GB) : - 
#Its basically focus on short comings error  means fully grown tree 
#error means residual =actual output-predicted output

#2. gradient Boost technuique : second tech. of Boosting Ensembling technique 
# GB  : - its create a fully grown tree.this algorithm is focus on short comings 
#short comings means error 

In [42]:
#call GradientBoostingClassifier class from following package
from sklearn.ensemble import GradientBoostingClassifier

In [43]:
#Create the object of GradientBoostingClassifier class
gbc=GradientBoostingClassifier(n_estimators=75,random_state=1)
#n_estimators can be <=100  bydefault algorithm decisiontreeclassifier

In [44]:
#call function
gbc=create_model(gbc)

              precision    recall  f1-score   support

           0       0.86      0.82      0.84      1760
           1       0.81      0.86      0.83      1589

    accuracy                           0.84      3349
   macro avg       0.84      0.84      0.84      3349
weighted avg       0.84      0.84      0.84      3349

[[1437  323]
 [ 228 1361]]


In [45]:
#show the information gain of all input features 
IG=gbc.feature_importances_
I=X.columns
d={'Input Column':I,'Information Gain':IG}

#converts into dataframe
F=pd.DataFrame(d)
F.sort_values('Information Gain',ascending=False,ignore_index=True)
#descending order

Unnamed: 0,Input Column,Information Gain
0,duration,0.533194
1,month,0.092246
2,pdays,0.089386
3,contact,0.08568
4,housing,0.061025
5,poutcome,0.046646
6,age,0.042787
7,balance,0.017926
8,day,0.010389
9,loan,0.006428


In [46]:
#3. Extreme Gradient Boosting (XGB): 3rd technique of Boosting 
#This is better version of gradient boosting . . short form XGB 
#why call better version of Gradient Boosting : -
#1. XG Bossting use : - Multithreading technique
#2. It takes less memory space and faster 
#3. It is very useful to handle huge amt of data .Its deal large amount of data
#4. Its handle outlier because have inbuilt capability
#5. Its handle null values 
#6. Its handle automatic overfitting situation .

In [47]:
#install the external package from python's community first for XGBoost
#!pip install xgboost 

In [48]:
from xgboost import XGBClassifier

In [49]:
#create object of class XGBClassifier
xgc=XGBClassifier(n_estimators=30,reg_alpha=1,random_state=1)
#automatic overfitting : reg means regularation and alpha or lambda : 
#hyperparameter
#n_estimators can be less than and equal to  100
#use algorithm DecisionTreeClassifier 

In [50]:
# call function
xgc=create_model(xgc)

              precision    recall  f1-score   support

           0       0.89      0.83      0.86      1760
           1       0.82      0.88      0.85      1589

    accuracy                           0.85      3349
   macro avg       0.86      0.86      0.85      3349
weighted avg       0.86      0.85      0.85      3349

[[1460  300]
 [ 186 1403]]


In [51]:
#show the information gain of all input features 
IG=xgc.feature_importances_
I=X.columns
d={'Input Column':I,'Information Gain':IG}

#converts into dataframe
F=pd.DataFrame(d)
F.sort_values('Information Gain',ascending=False,ignore_index=True)
#descending order

Unnamed: 0,Input Column,Information Gain
0,poutcome,0.181971
1,contact,0.17694
2,duration,0.150279
3,housing,0.144867
4,pdays,0.065897
5,month,0.058938
6,loan,0.040036
7,age,0.031114
8,education,0.026892
9,day,0.025787


In [52]:
#Stacking Classifier : it is a tehnique of Ensembling method
 #we install a inbuilt package mlxtend for stacking : - only first time 
#!pip install mlxtend

In [53]:
#call mlxtend package : StackingClassifier inbuilt class
from mlxtend.classifier import StackingClassifier

In [54]:
#create object of LogisticRegression
lr=LogisticRegression(random_state=1)
#create object of DecisionTreeClassifier with gini index
dt1=DecisionTreeClassifier(random_state=1) #befault gini index
#create object of DecisionTreeClassifier with entropy
dt2=DecisionTreeClassifier(criterion="entropy",random_state=1)

In [55]:
#Create the model list 
model_list=[lr,dt1,dt2]

In [56]:
#define meta classifier class : it is meta classifier : LogisticRegression  , 
#create object  of meta classifier class
meta=LogisticRegression()  #it is used for meta classifier 

In [57]:
#create the object of StackingClassifier class
sc=StackingClassifier(classifiers=model_list,meta_classifier=meta)

In [58]:
#call function
sc=create_model(sc)

              precision    recall  f1-score   support

           0       0.83      0.83      0.83      1760
           1       0.81      0.81      0.81      1589

    accuracy                           0.82      3349
   macro avg       0.82      0.82      0.82      3349
weighted avg       0.82      0.82      0.82      3349

[[1458  302]
 [ 296 1293]]


In [None]:
#AutoML : -
#required package tpot 
#!pip install tpot

In [59]:
#call 
from tpot import TPOTClassifier

In [60]:
#create the object of TPOTClassifier class
tpc=TPOTClassifier(generations=5,random_state=1)

In [61]:
#train the model with 70% training data
tpc.fit(X_train,Y_train)

TPOTClassifier(generations=5, random_state=1)