In [1]:
import pandas as pd
from sklearn.neural_network import MLPClassifier as MLPC
from sklearn.model_selection import train_test_split as TTS
from sklearn import metrics
import numpy as np
from ann_visualizer.visualize import ann_viz

In [2]:
data = pd.read_csv('C:/Users/ankur/Documents/Datasets/bank-additional-full.csv', sep=';')
y = data['y']
data.drop('y', axis='columns', inplace=True)
len(data)

41188

In [3]:
data.head(n=10)

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,duration,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,261,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,149,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0
2,37,services,married,high.school,no,yes,no,telephone,may,mon,226,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,151,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0
4,56,services,married,high.school,no,no,yes,telephone,may,mon,307,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0
5,45,services,married,basic.9y,unknown,no,no,telephone,may,mon,198,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0
6,59,admin.,married,professional.course,no,no,no,telephone,may,mon,139,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0
7,41,blue-collar,married,unknown,unknown,no,no,telephone,may,mon,217,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0
8,24,technician,single,professional.course,no,yes,no,telephone,may,mon,380,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0
9,25,services,single,high.school,no,yes,no,telephone,may,mon,50,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0


In [4]:
data['Product_Subscribed'] = y.map({'yes':1, 'no':0})

In [5]:
data.head(n=10)

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,Product_Subscribed
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,0
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,0
2,37,services,married,high.school,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,0
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,0
4,56,services,married,high.school,no,no,yes,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,0
5,45,services,married,basic.9y,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,0
6,59,admin.,married,professional.course,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,0
7,41,blue-collar,married,unknown,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,0
8,24,technician,single,professional.course,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,0
9,25,services,single,high.school,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,0


In [6]:
jobs = list(data.job)
for i in range(0, len(jobs)):
    if jobs[i] == 'housemaid' or jobs[i] == 'student' or jobs[i] == 'unknown':       #Assumed unemployed & inexperienced
        jobs[i] = 0
    elif jobs[i] == 'blue-collar' or jobs[i] == 'retired' or jobs[i] == 'unemployed':  #Assumed to be in need of money but experienced
        jobs[i] = 1
    else:                                                                            #People with sufficiently paying jobs
        jobs[i] = 2
data['job'] = pd.Series(jobs)

data.drop('default', axis='columns', inplace=True)    #Gives way too less and irrelevant information

data.drop('day_of_week', axis='columns', inplace=True)    #Irrelevant information

data.drop('month', axis='columns', inplace=True)    #According to Newton, people buy products in May and September only

data['marital'] = data['marital'].map({'single':0, 'unknown':0, 'married': 1, 'divorced': 2})  #Unknown = single? Yep. Whatcha gonna do about it?

education = list(data.education)
for i in range(len(education)):
    if education[i] == 'illiterate' or education[i] == 'high.school' or education[i] == 'unknown':  #Y'all ain't know what money is. Shut up.
        education[i] = 0
    else:
        education[i] = 1
data['education'] = pd.Series(education)

data['housing'] = data['housing'].map({'no':0, 'unknown':0, 'yes': 1})  #If ya unknown, ya hOmElEsS

data['loan'] = data['loan'].map({'no':0, 'unknown':0, 'yes': 1})

data.drop('contact', axis='columns', inplace=True)     #Y'all basically contacted them. Why include seperately -_-

data.drop('poutcome', axis='columns', inplace=True)    #Everyone is non-existent. Bro this dataset dumb

In [7]:
data.head(10) #Preprocessing complete. Remind me to never take Machine Learning as a subject at any point in my life. Not even at gunpoint.

Unnamed: 0,age,job,marital,education,housing,loan,duration,campaign,pdays,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,Product_Subscribed
0,56,0,1,1,0,0,261,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0
1,57,2,1,0,0,0,149,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0
2,37,2,1,0,1,0,226,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0
3,40,2,1,1,0,0,151,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0
4,56,2,1,0,0,1,307,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0
5,45,2,1,1,0,0,198,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0
6,59,2,1,1,0,0,139,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0
7,41,1,1,0,0,0,217,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0
8,24,2,0,1,1,0,380,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0
9,25,2,0,0,1,0,50,1,999,0,1.1,93.994,-36.4,4.857,5191.0,0


In [8]:
data[~data.applymap(np.isreal).all(1)]

Unnamed: 0,age,job,marital,education,housing,loan,duration,campaign,pdays,previous,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,Product_Subscribed


In [9]:
X = data.drop('Product_Subscribed', axis='columns', inplace=False)
y = data.Product_Subscribed

In [10]:
X_train, X_test, y_train, y_test = TTS(X, y, test_size=0.25, random_state=69)

In [13]:
clf = MLPC(hidden_layer_sizes=(200, 150, 100), activation='logistic', solver='adam', learning_rate_init=0.0001, max_iter=1000000)
clf.fit(X_train, y_train)

MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(200, 150, 100), learning_rate='constant',
       learning_rate_init=0.0001, max_iter=1000000, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=None, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=False, warm_start=False)

In [14]:
pred = clf.predict(X_test)
print('Accuracy = ', metrics.accuracy_score(y_test, pred)*100, '%')
print('Recall Score = ',metrics.recall_score(y_test, pred))
print('Precision = ',metrics.precision_score(y_test, pred))
print('Confusion matrix : \n',metrics.confusion_matrix(y_test, pred))

Accuracy =  90.8808390793435 %
Recall Score =  0.4333612740989103
Precision =  0.6628205128205128
Confusion matrix : 
 [[8841  263]
 [ 676  517]]
