In [28]:
# Load the libraries
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical

In [60]:
# Load the data
import pandas as pd
df=pd.read_csv("bank-full.csv",sep=";")
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


### Prepare the data for the classification model

In [61]:
# Load the libraries for integer encoding.
from sklearn.preprocessing import LabelEncoder

The following code blocks apply integer encoding to all the categorical variables.

In [62]:
from numpy import array
data=df["job"]
values=array(data)
label_encoder=LabelEncoder()
integer_encoder=label_encoder.fit_transform(values)
job_integer_encoder=pd.DataFrame(data=integer_encoder)
df=df.assign(job_code=job_integer_encoder.values)
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y,job_code
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no,4
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no,9
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no,2
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no,1
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no,11


In [63]:
import numpy as np
print(np.unique(df["job"]))
print(np.unique(df["job_code"]))

['admin.' 'blue-collar' 'entrepreneur' 'housemaid' 'management' 'retired'
 'self-employed' 'services' 'student' 'technician' 'unemployed' 'unknown']
[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [64]:
data=df["marital"]
values=array(data)
label_encoder=LabelEncoder()
integer_encoder=label_encoder.fit_transform(values)
marital_integer_encoder=pd.DataFrame(data=integer_encoder)
df=df.assign(marital_code = marital_integer_encoder)
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y,job_code,marital_code
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no,4,1
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no,9,2
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no,2,1
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no,1,1
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no,11,2


In [65]:
print(np.unique(df["marital"]))
print(np.unique(df["marital_code"]))

['divorced' 'married' 'single']
[0 1 2]


In [66]:
data=df["education"]
values=array(data)
label_encoder=LabelEncoder()
integer_encoder=label_encoder.fit_transform(values)
education_integer_encoder=pd.DataFrame(data=integer_encoder)
df=df.assign(education_code=education_integer_encoder)
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y,job_code,marital_code,education_code
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no,4,1,2
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no,9,2,1
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no,2,1,1
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no,1,1,3
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no,11,2,3


In [67]:
print(np.unique(df["education"]))
print(np.unique(df["education_code"]))

['primary' 'secondary' 'tertiary' 'unknown']
[0 1 2 3]


In [68]:
data=df["default"]
values=array(data)
label_encoder=LabelEncoder()
integer_encoder=label_encoder.fit_transform(values)
default_integer_encoder=pd.DataFrame(data=integer_encoder)
df=df.assign(default_code=default_integer_encoder)
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,...,duration,campaign,pdays,previous,poutcome,y,job_code,marital_code,education_code,default_code
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,...,261,1,-1,0,unknown,no,4,1,2,0
1,44,technician,single,secondary,no,29,yes,no,unknown,5,...,151,1,-1,0,unknown,no,9,2,1,0
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,...,76,1,-1,0,unknown,no,2,1,1,0
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,...,92,1,-1,0,unknown,no,1,1,3,0
4,33,unknown,single,unknown,no,1,no,no,unknown,5,...,198,1,-1,0,unknown,no,11,2,3,0


In [69]:
print(np.unique(df["default"]))
print(np.unique(df["default_code"]))

['no' 'yes']
[0 1]


In [70]:
data=df["housing"]
values=array(data)
label_encoder=LabelEncoder()
integer_encoder=label_encoder.fit_transform(values)
housing_integer_encoder=pd.DataFrame(data=integer_encoder)
df=df.assign(housing_code=housing_integer_encoder)
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,...,campaign,pdays,previous,poutcome,y,job_code,marital_code,education_code,default_code,housing_code
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,...,1,-1,0,unknown,no,4,1,2,0,1
1,44,technician,single,secondary,no,29,yes,no,unknown,5,...,1,-1,0,unknown,no,9,2,1,0,1
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,...,1,-1,0,unknown,no,2,1,1,0,1
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,...,1,-1,0,unknown,no,1,1,3,0,1
4,33,unknown,single,unknown,no,1,no,no,unknown,5,...,1,-1,0,unknown,no,11,2,3,0,0


In [71]:
print(np.unique(df["housing"]))
print(np.unique(df["housing_code"]))

['no' 'yes']
[0 1]


In [72]:
data=df["loan"]
values=array(data)
label_encoder=LabelEncoder()
integer_encoder=label_encoder.fit_transform(values)
loan_integer_encoder=pd.DataFrame(data=integer_encoder)
df=df.assign(loan_code=loan_integer_encoder)
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,...,pdays,previous,poutcome,y,job_code,marital_code,education_code,default_code,housing_code,loan_code
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,...,-1,0,unknown,no,4,1,2,0,1,0
1,44,technician,single,secondary,no,29,yes,no,unknown,5,...,-1,0,unknown,no,9,2,1,0,1,0
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,...,-1,0,unknown,no,2,1,1,0,1,1
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,...,-1,0,unknown,no,1,1,3,0,1,0
4,33,unknown,single,unknown,no,1,no,no,unknown,5,...,-1,0,unknown,no,11,2,3,0,0,0


In [73]:
print(np.unique(df["loan"]))
print(np.unique(df["loan_code"]))

['no' 'yes']
[0 1]


In [74]:
data=df["contact"]
values=array(data)
label_encoder=LabelEncoder()
integer_encoder=label_encoder.fit_transform(values)
contact_integer_encoder=pd.DataFrame(data=integer_encoder)
df=df.assign(contact_code=contact_integer_encoder)
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,...,previous,poutcome,y,job_code,marital_code,education_code,default_code,housing_code,loan_code,contact_code
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,...,0,unknown,no,4,1,2,0,1,0,2
1,44,technician,single,secondary,no,29,yes,no,unknown,5,...,0,unknown,no,9,2,1,0,1,0,2
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,...,0,unknown,no,2,1,1,0,1,1,2
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,...,0,unknown,no,1,1,3,0,1,0,2
4,33,unknown,single,unknown,no,1,no,no,unknown,5,...,0,unknown,no,11,2,3,0,0,0,2


In [75]:
print(np.unique(df["contact"]))
print(np.unique(df["contact_code"]))

['cellular' 'telephone' 'unknown']
[0 1 2]


In [76]:
data=df["poutcome"]
values=array(data)
label_encoder=LabelEncoder()
integer_encoder=label_encoder.fit_transform(values)
poutcome_integer_encoder=pd.DataFrame(data=integer_encoder)
df=df.assign(poutcome_code=poutcome_integer_encoder)
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,...,poutcome,y,job_code,marital_code,education_code,default_code,housing_code,loan_code,contact_code,poutcome_code
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,...,unknown,no,4,1,2,0,1,0,2,3
1,44,technician,single,secondary,no,29,yes,no,unknown,5,...,unknown,no,9,2,1,0,1,0,2,3
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,...,unknown,no,2,1,1,0,1,1,2,3
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,...,unknown,no,1,1,3,0,1,0,2,3
4,33,unknown,single,unknown,no,1,no,no,unknown,5,...,unknown,no,11,2,3,0,0,0,2,3


In [77]:
print(np.unique(df["poutcome"]))
print(np.unique(df["poutcome_code"]))

['failure' 'other' 'success' 'unknown']
[0 1 2 3]


In [78]:
data=df["month"]
values=array(data)
label_encoder=LabelEncoder()
integer_encoder=label_encoder.fit_transform(values)
month_integer_encoder=pd.DataFrame(data=integer_encoder)
df=df.assign(month_code=month_integer_encoder)
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,...,y,job_code,marital_code,education_code,default_code,housing_code,loan_code,contact_code,poutcome_code,month_code
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,...,no,4,1,2,0,1,0,2,3,8
1,44,technician,single,secondary,no,29,yes,no,unknown,5,...,no,9,2,1,0,1,0,2,3,8
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,...,no,2,1,1,0,1,1,2,3,8
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,...,no,1,1,3,0,1,0,2,3,8
4,33,unknown,single,unknown,no,1,no,no,unknown,5,...,no,11,2,3,0,0,0,2,3,8


In [79]:
print(np.unique(df["month"]))
print(np.unique(df["month_code"]))

['apr' 'aug' 'dec' 'feb' 'jan' 'jul' 'jun' 'mar' 'may' 'nov' 'oct' 'sep']
[ 0  1  2  3  4  5  6  7  8  9 10 11]


In [80]:
data=df["y"]
values=array(data)
label_encoder=LabelEncoder()
integer_encoder=label_encoder.fit_transform(values)
y_integer_encoder=pd.DataFrame(data=integer_encoder)
df=df.assign(outcome_code=y_integer_encoder)
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,...,job_code,marital_code,education_code,default_code,housing_code,loan_code,contact_code,poutcome_code,month_code,outcome_code
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,...,4,1,2,0,1,0,2,3,8,0
1,44,technician,single,secondary,no,29,yes,no,unknown,5,...,9,2,1,0,1,0,2,3,8,0
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,...,2,1,1,0,1,1,2,3,8,0
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,...,1,1,3,0,1,0,2,3,8,0
4,33,unknown,single,unknown,no,1,no,no,unknown,5,...,11,2,3,0,0,0,2,3,8,0


Form a new dataset with only the integer labels and numeric variables.

In [81]:
df=df[["age","balance","day","campaign","previous","job_code","marital_code","education_code","housing_code","loan_code","contact_code","poutcome_code","month_code","outcome_code"]]
df.head()

Unnamed: 0,age,balance,day,campaign,previous,job_code,marital_code,education_code,housing_code,loan_code,contact_code,poutcome_code,month_code,outcome_code
0,58,2143,5,1,0,4,1,2,1,0,2,3,8,0
1,44,29,5,1,0,9,2,1,1,0,2,3,8,0
2,33,2,5,1,0,2,1,1,1,1,2,3,8,0
3,47,1506,5,1,0,1,1,3,1,0,2,3,8,0
4,33,1,5,1,0,11,2,3,0,0,2,3,8,0


In [86]:
X=df[["age","balance","day","campaign","previous","job_code","marital_code","education_code","housing_code","loan_code","contact_code","poutcome_code","month_code"]]
X

Unnamed: 0,age,balance,day,campaign,previous,job_code,marital_code,education_code,housing_code,loan_code,contact_code,poutcome_code,month_code
0,58,2143,5,1,0,4,1,2,1,0,2,3,8
1,44,29,5,1,0,9,2,1,1,0,2,3,8
2,33,2,5,1,0,2,1,1,1,1,2,3,8
3,47,1506,5,1,0,1,1,3,1,0,2,3,8
4,33,1,5,1,0,11,2,3,0,0,2,3,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...
45206,51,825,17,3,0,9,1,2,0,0,0,3,9
45207,71,1729,17,2,0,5,0,0,0,0,0,3,9
45208,72,5715,17,5,3,5,1,1,0,0,0,2,9
45209,57,668,17,4,0,1,1,1,0,0,1,3,9


In [87]:
from sklearn import preprocessing
names=X.columns
scaler=preprocessing.StandardScaler()
X=scaler.fit_transform(X)
X=pd.DataFrame(X, columns=names)
X

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


Unnamed: 0,age,balance,day,campaign,previous,job_code,marital_code,education_code,housing_code,loan_code,contact_code,poutcome_code,month_code
0,1.606965,0.256419,-1.298476,-0.569351,-0.251940,-0.103820,-0.275762,1.036362,0.893915,-0.436803,1.514306,0.444898,0.823773
1,0.288529,-0.437895,-1.298476,-0.569351,-0.251940,1.424008,1.368372,-0.300556,0.893915,-0.436803,1.514306,0.444898,0.823773
2,-0.747384,-0.446762,-1.298476,-0.569351,-0.251940,-0.714951,-0.275762,-0.300556,0.893915,2.289359,1.514306,0.444898,0.823773
3,0.571051,0.047205,-1.298476,-0.569351,-0.251940,-1.020516,-0.275762,2.373280,0.893915,-0.436803,1.514306,0.444898,0.823773
4,-0.747384,-0.447091,-1.298476,-0.569351,-0.251940,2.035139,1.368372,2.373280,-1.118674,-0.436803,1.514306,0.444898,0.823773
...,...,...,...,...,...,...,...,...,...,...,...,...,...
45206,0.947747,-0.176460,0.143418,0.076230,-0.251940,1.424008,-0.275762,1.036362,-1.118674,-0.436803,-0.713012,0.444898,1.156344
45207,2.831227,0.120447,0.143418,-0.246560,-0.251940,0.201746,-1.919895,-1.637474,-1.118674,-0.436803,-0.713012,0.444898,1.156344
45208,2.925401,1.429593,0.143418,0.721811,1.050473,0.201746,-0.275762,-0.300556,-1.118674,-0.436803,-0.713012,-0.566175,1.156344
45209,1.512791,-0.228024,0.143418,0.399020,-0.251940,-1.020516,-0.275762,-0.300556,-1.118674,-0.436803,0.400647,0.444898,1.156344


In [88]:
y=df[["outcome_code"]]
y

Unnamed: 0,outcome_code
0,0
1,0
2,0
3,0
4,0
...,...
45206,1
45207,1
45208,1
45209,0


Split the data into train and test datasets.

In [113]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3,random_state=42)

In [114]:
length=X_test.shape[1]
length

13

In [115]:
from keras.utils import to_categorical
y_train=to_categorical(y_train)
y_test=to_categorical(y_test)
num_classes=y_test.shape[1]
print(num_classes)

2


In [116]:
# define classification model
def classification_model():
    # create model
    model = Sequential()
    model.add(Dense(length, activation='relu', input_shape=(length,)))
    model.add(Dense(300, activation='sigmoid'))
    model.add(Dense(num_classes, activation='softmax'))
    
    
    # compile model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [117]:
model=classification_model()
model.fit(X_train,y_train,validation_data=(X_test, y_test),epochs=20, verbose=2)
model.evaluate(X_test,y_test,verbose=0)

Train on 31647 samples, validate on 13564 samples
Epoch 1/20
 - 16s - loss: 0.3370 - acc: 0.8831 - val_loss: 0.3532 - val_acc: 0.8824
Epoch 2/20
 - 14s - loss: 0.3254 - acc: 0.8840 - val_loss: 0.3225 - val_acc: 0.8838
Epoch 3/20
 - 13s - loss: 0.3174 - acc: 0.8878 - val_loss: 0.3273 - val_acc: 0.8846
Epoch 4/20
 - 14s - loss: 0.3151 - acc: 0.8892 - val_loss: 0.3152 - val_acc: 0.8886
Epoch 5/20
 - 14s - loss: 0.3125 - acc: 0.8898 - val_loss: 0.3142 - val_acc: 0.8892
Epoch 6/20
 - 14s - loss: 0.3120 - acc: 0.8904 - val_loss: 0.3158 - val_acc: 0.8895
Epoch 7/20
 - 14s - loss: 0.3113 - acc: 0.8901 - val_loss: 0.3210 - val_acc: 0.8886
Epoch 8/20
 - 14s - loss: 0.3093 - acc: 0.8902 - val_loss: 0.3140 - val_acc: 0.8904
Epoch 9/20
 - 14s - loss: 0.3094 - acc: 0.8910 - val_loss: 0.3125 - val_acc: 0.8894
Epoch 10/20
 - 14s - loss: 0.3080 - acc: 0.8911 - val_loss: 0.3138 - val_acc: 0.8897
Epoch 11/20
 - 16s - loss: 0.3084 - acc: 0.8916 - val_loss: 0.3132 - val_acc: 0.8904
Epoch 12/20
 - 13s - los

[0.3137866260906266, 0.890297847225124]