# Import library

In [213]:
import numpy as np
from pandas import *
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.naive_bayes import GaussianNB

# Read data

In [214]:
data = read_csv("./banking.csv")
data.head()
# convert field of 'month'
dict_month = {'jan' : 1, 'feb' : 2, 'mar' : 3, 'apr' : 4, 'may' : 5, 'jun' : 6,
'jul' : 7, 'aug' : 8, 'sep' : 9, 'oct' : 10, 'nov' : 11, 'dec' : 12}

data['month'] = data['month'].map(dict_month)
# convert field of dayOfweek
dict_day = {'sun' : 1, 'mon' : 2, 'tue' : 3, 'wed' : 4, 'thu' : 5, 'fri' : 6,
'sat' : 7}
data['day_of_week'] = data['day_of_week'].map(dict_day)
# conver binary fields
#default :
data.default.replace({'no' : 0, 'yes' : 1, 'unknown': -1}, inplace = True)
#housing :
data.housing.replace({'no' : 0, 'yes' : 1, 'unknown': -1}, inplace = True)
#loan :
data.loan.replace({'no' : 0, 'yes' : 1, 'unknown': -1}, inplace = True)
# convert categories field by one host coding
marital_dummies = get_dummies(data['marital'], prefix = 'marital')
marital_dummies.drop('marital_divorced', axis=1, inplace=True)
data = concat([data, marital_dummies], axis=1)
job_dummies = get_dummies(data['job'], prefix = 'job')
job_dummies.drop('job_unknown', axis=1, inplace=True)
data= concat([data, job_dummies], axis=1)
education_dummies = get_dummies(data['education'], prefix = 'education')
education_dummies.drop('education_unknown', axis=1, inplace=True)
data = concat([data, education_dummies], axis=1)
contact_dummies = get_dummies(data['contact'], prefix = 'contact')
#contact_dummies.drop('contact_unknown', axis=1, inplace=True)
data = concat([data, contact_dummies], axis=1)
poutcome_dummies = get_dummies(data['poutcome'], prefix = 'poutcome')
#poutcome_dummies.drop('poutcome_unknown', axis=1, inplace=True)
data = concat([data, poutcome_dummies], axis=1)
data['pdays'] = data['pdays'].apply(lambda row: 0 if row == -1 else 1)
data.drop(['job', 'education', 'marital', 'contact', 'poutcome'], axis=1,inplace=True)
data = np.asarray(data.T)

# Split data to train data and test data

In [215]:
pivot = (round(len(data) * 80 / 100))

sp_label = data[-1].T
sp_data = data[:-1].T

train_data = sp_data[:pivot]
train_label = sp_label[:pivot]

test_data = sp_data[pivot:]
test_label = sp_label[pivot:]

# Fitting model

In [216]:
logReg = LogisticRegression(penalty='none')
logReg.fit(train_data, train_label)


#


# Predict test data

In [217]:
y_pre = logReg.predict(test_data)
print("predict validate", y_pre)
print("accuracy score", accuracy_score(test_label, y_pre))
print("recall score", recall_score(test_label, y_pre))
print("precision score", precision_score(test_label, y_pre))

predict validate [0. 1. 0. ... 0. 0. 0.]
accuracy score 0.9230937454439423
recall score 0.5756026296566837
precision score 0.23368920521945433


# Fitting model with naiveBayes

In [218]:
gauModel = GaussianNB()
gauModel.fit(train_data, train_label)

# Predict test data 

In [219]:
y_preGaus = gauModel.predict(test_data)
print("predict with gaussian naive bayes: ", y_preGaus)
print("accuracy score", accuracy_score(test_label, y_preGaus))
print("recall score", recall_score(test_label, y_preGaus))
print("precision score", precision_score(test_label, y_preGaus))
print("f1 score", f1_score(test_label, y_preGaus))

predict with gaussian naive bayes:  [0. 0. 0. ... 0. 0. 0.]
accuracy score 0.9714001069154882
recall score 0.42804967129291455
precision score 0.5979591836734693
f1 score 0.49893571732652187
