In [97]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split


In [98]:
#Load the data set
client = pd.read_csv("bank-full.csv", sep =';',header=0)
client.head()


Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,may,261,1,-1,0,unknown,no
1,44,technician,single,secondary,no,29,yes,no,unknown,5,may,151,1,-1,0,unknown,no
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,may,76,1,-1,0,unknown,no
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,may,92,1,-1,0,unknown,no
4,33,unknown,single,unknown,no,1,no,no,unknown,5,may,198,1,-1,0,unknown,no


In [99]:
print(client.shape)

(45211, 17)


In [100]:
client=client.dropna()

In [101]:
#Dropping columns which are not needed for analysis
client.drop(client.columns[[0, 3, 5, 8, 9, 10, 11, 12, 13,14]], 
   axis = 1, inplace = True)

In [102]:
client.head()

Unnamed: 0,job,marital,default,housing,loan,poutcome,y
0,management,married,no,yes,no,unknown,no
1,technician,single,no,yes,no,unknown,no
2,entrepreneur,married,no,yes,yes,unknown,no
3,blue-collar,married,no,yes,no,unknown,no
4,unknown,single,no,no,no,unknown,no


In [103]:
# creating one hot encoding of the categorical columns.(dummy data)
data = pd.get_dummies(client, columns =['job', 'marital', 'default', 'housing', 'loan', 'poutcome'])
data.head()

Unnamed: 0,y,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,...,default_no,default_yes,housing_no,housing_yes,loan_no,loan_yes,poutcome_failure,poutcome_other,poutcome_success,poutcome_unknown
0,no,0,0,0,0,1,0,0,0,0,...,1,0,0,1,1,0,0,0,0,1
1,no,0,0,0,0,0,0,0,0,0,...,1,0,0,1,1,0,0,0,0,1
2,no,0,0,1,0,0,0,0,0,0,...,1,0,0,1,0,1,0,0,0,1
3,no,0,1,0,0,0,0,0,0,0,...,1,0,0,1,1,0,0,0,0,1
4,no,0,0,0,0,0,0,0,0,0,...,1,0,1,0,1,0,0,0,0,1


In [104]:
data

Unnamed: 0,y,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,...,default_no,default_yes,housing_no,housing_yes,loan_no,loan_yes,poutcome_failure,poutcome_other,poutcome_success,poutcome_unknown
0,no,0,0,0,0,1,0,0,0,0,...,1,0,0,1,1,0,0,0,0,1
1,no,0,0,0,0,0,0,0,0,0,...,1,0,0,1,1,0,0,0,0,1
2,no,0,0,1,0,0,0,0,0,0,...,1,0,0,1,0,1,0,0,0,1
3,no,0,1,0,0,0,0,0,0,0,...,1,0,0,1,1,0,0,0,0,1
4,no,0,0,0,0,0,0,0,0,0,...,1,0,1,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45206,yes,0,0,0,0,0,0,0,0,0,...,1,0,1,0,1,0,0,0,0,1
45207,yes,0,0,0,0,0,1,0,0,0,...,1,0,1,0,1,0,0,0,0,1
45208,yes,0,0,0,0,0,1,0,0,0,...,1,0,1,0,1,0,0,0,1,0
45209,no,0,1,0,0,0,0,0,0,0,...,1,0,1,0,1,0,0,0,0,1


In [105]:
#display columns
data.columns

Index(['y', 'job_admin.', 'job_blue-collar', 'job_entrepreneur',
       'job_housemaid', 'job_management', 'job_retired', 'job_self-employed',
       'job_services', 'job_student', 'job_technician', 'job_unemployed',
       'job_unknown', 'marital_divorced', 'marital_married', 'marital_single',
       'default_no', 'default_yes', 'housing_no', 'housing_yes', 'loan_no',
       'loan_yes', 'poutcome_failure', 'poutcome_other', 'poutcome_success',
       'poutcome_unknown'],
      dtype='object')

In [106]:
#dropping unknown columns
data.drop(data.columns[[12,25]], axis=1, inplace = True)

In [107]:
data.columns

Index(['y', 'job_admin.', 'job_blue-collar', 'job_entrepreneur',
       'job_housemaid', 'job_management', 'job_retired', 'job_self-employed',
       'job_services', 'job_student', 'job_technician', 'job_unemployed',
       'marital_divorced', 'marital_married', 'marital_single', 'default_no',
       'default_yes', 'housing_no', 'housing_yes', 'loan_no', 'loan_yes',
       'poutcome_failure', 'poutcome_other', 'poutcome_success'],
      dtype='object')

In [108]:
#Creating input array,separating X from input
X = data.iloc[:,1:]
X.head()

Unnamed: 0,job_admin.,job_blue-collar,job_entrepreneur,job_housemaid,job_management,job_retired,job_self-employed,job_services,job_student,job_technician,...,marital_single,default_no,default_yes,housing_no,housing_yes,loan_no,loan_yes,poutcome_failure,poutcome_other,poutcome_success
0,0,0,0,0,1,0,0,0,0,0,...,0,1,0,0,1,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,1,...,1,1,0,0,1,1,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,1,0,0,1,0,1,0,0,0
3,0,1,0,0,0,0,0,0,0,0,...,0,1,0,0,1,1,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,1,1,0,1,0,1,0,0,0,0


In [109]:
#Creating output array
Y = data.iloc[:,0]
Y.head()

0    no
1    no
2    no
3    no
4    no
Name: y, dtype: object

In [110]:
#Splitting data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=0)


In [111]:
classifier = LogisticRegression(solver='lbfgs',random_state=0)
classifier.fit(X_train, Y_train)

LogisticRegression(random_state=0)

In [112]:
predicted_y = classifier.predict(X_test)
predicted_y

array(['no', 'no', 'no', ..., 'no', 'no', 'no'], dtype=object)

In [113]:
for x in range(len(predicted_y)):
   if (predicted_y[x] == 1):
      print(x, end="\t")

In [114]:
#Verifying accuracy
print('Accuracy: {:.2f}'.format(classifier.score(X_test, Y_test)))

Accuracy: 0.89
