## DATA Check

In [None]:
cd /home

In [None]:
ls

In [None]:
import pandas as pd
import os.path
import io
from google.colab import files

if(os.path.isfile('bank-additional-full.csv')):
  data = pd.read_csv('bank-additional-full.csv',sep=';')
  print('File ready.')
else:
  print('File not found, please reuplod.')
  uploaded = files.upload()
  data = pd.read_csv('bank-additional-full.csv',sep=';')
  print('File ready.') 


## DATA Description

In [None]:
data.head()

In [None]:
#Seperate categorical data & numeric data
categoyData=data.loc[:, data.dtypes == object]
numericData=data.loc[:, data.dtypes != object]

In [None]:
#Describe categorical data
categoyData.describe().transpose()

In [None]:
#Describe numeric data
numericData.describe().transpose()

In [None]:
#Label encoding
data['job']=pd.Categorical(data.job).codes
data['marital']=pd.Categorical(data.marital).codes
data['education']=pd.Categorical(data.education).codes 
data['default']=pd.Categorical(data.default).codes
data['housing']=pd.Categorical(data.housing).codes
data['loan']=pd.Categorical(data.loan).codes
data['contact']=pd.Categorical(data.contact).codes
data['month']=pd.Categorical(data.month).codes
data['day_of_week']=pd.Categorical(data.day_of_week).codes
data['poutcome']=pd.Categorical(data.poutcome).codes

data=pd.get_dummies(data=data,columns='y')

In [None]:
data.head()

## Preprcessing

In [None]:
from sklearn.preprocessing import StandardScaler
#Seperate feaures and classify target label
labels= data[['y_no','y_yes']]
features=data.drop(data[['y_no','y_yes']], axis=1)
#Data standardization
scaler = StandardScaler()
scaler.fit(features) 
features= scaler.transform(features)


## K-Fold (for checking data)

In [None]:
#Stratified K-Fold
K_FOLD=10
RANDOM_STATE=111

In [None]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=K_FOLD,random_state=RANDOM_STATE)
#Dataframe to arrays
labels=labels.get_values()

In [None]:
""" 
----------------------------------------------
|                               |          |
|                               |          |
|                               |          |
|         X_train               |  y_train |
|                               |          |
|                               |          |
|                               |          |
|------------------------------------------|
|          X_test               |  y_test  |
---------------------------------------------
""" 

#For testing, slice data by index
for train_index, test_index in skf.split(features, labels[0:,0]):
    X_train, X_test = features[train_index], features[test_index]    
    y_train, y_test = labels[train_index], labels[test_index]


## Tensorflow

In [None]:
import tensorflow as tf

## Hyper parameter

In [None]:
N_INPUT =len(features[0])
N_CLASSES =len(labels[0])

LEARNING_RATE = 0.001                               			  
TRAINING_EPOCHS = 500                               			    
DISPLAY_STEP = 10                                    			    
STDDEV = 0.1  #Distribution of initial weight & baias                                			   
						                            
DROP_OUT=1.0

ACTVATION=tf.nn.tanh 
#tf.nn.sigmoid
#tf.nn.relu

In [None]:
n_input = N_INPUT       # input
n_hidden_1 = 4          # 1st hidden layer
n_hidden_2 = 4          # 2nd hidden layer
n_hidden_3 = 4          # 3rd hidden layer
n_classes = N_CLASSES   # output

## Placeholders

In [None]:
#Data entry 
X = tf.placeholder(tf.float32, [None, n_input])
y = tf.placeholder(tf.float32, [None, n_classes])
dropout_keep_prob = tf.placeholder(tf.float32)


## MLP Perceptron part

In [None]:
#MLP define
def mlp(_X, _weights, _biases, dropout_keep_prob):
    layer1 = tf.nn.dropout( ACTVATION(tf.add(tf.matmul(_X, _weights['w1']), _biases['b1'])), dropout_keep_prob)
    layer2 = tf.nn.dropout( ACTVATION(tf.add(tf.matmul(layer1, _weights['w2']), _biases['b2'])), dropout_keep_prob)
    layer3 = tf.nn.dropout( ACTVATION(tf.add(tf.matmul(layer2, _weights['w3']), _biases['b3'])), dropout_keep_prob)
    out = ACTVATION(tf.add(tf.matmul(layer3, _weights['wout']), _biases['bout']))
    return out

weights = {
    'w1': tf.Variable(tf.random_normal([n_input, n_hidden_1],stddev=STDDEV)),
    'w2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2],stddev=STDDEV)),
    'w3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3],stddev=STDDEV)),
    'wout': tf.Variable(tf.random_normal([n_hidden_3, n_classes],stddev=STDDEV))                                  
}

biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'b3': tf.Variable(tf.random_normal([n_hidden_3])),
    'bout': tf.Variable(tf.random_normal([n_classes]))
}


In [None]:
pred = mlp(X, weights, biases, dropout_keep_prob)

## Cost function & Gradient Decent

In [None]:
# Cost function & optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate = LEARNING_RATE).minimize(cost)

## Confusion Matrix

In [None]:
"""""
                    output
                |  0  |  1      
-----------------------------
            | 0 | TN  | FP
desire ----------------------
            | 1 | FN  | TP
         
Precision=tp/tp+fp  
Recall=tp/tp+fn

"""""
#Confusion matrix
confmat=tf.confusion_matrix(tf.argmax(pred, 1),tf.argmax(y, 1),num_classes=2)

## Graph session Start

In [None]:
#Timer
import time
import math

In [None]:
history=[]
tStart = time.time()
currentFold=0


for train_index, test_index in skf.split(features, labels[0:,0]):
    currentFold=currentFold+1
    X_train, X_test = features[train_index], features[test_index]    
    y_train, y_test = labels[train_index], labels[test_index]
    print("CurrentFold: ",currentFold)
    
    with tf.Session() as sess:      
        sess.run(tf.global_variables_initializer())
        
        for epoch in range(TRAINING_EPOCHS):
            TrainTP,TrainFP,TrainFN,TestTP,TestFP,TestFN=0,0,0,0,0,0
                        
            _,costTrain,confusionTrain=sess.run([optimizer,cost,confmat], feed_dict={X: X_train, y: y_train,dropout_keep_prob: DROP_OUT})
            
                  
            if (epoch+1) % (DISPLAY_STEP) == 0 :
                costTest,confusionTest = sess.run([cost,confmat], feed_dict={X: X_test, y: y_test,dropout_keep_prob: 1.0})
                TrainTP,TrainFP,TrainFN=confusionTrain[1,1],confusionTrain[0,1],confusionTrain[1,0]
                TestTP,TestFP,TestFN=confusionTest[1,1],confusionTest[0,1],confusionTest[1,0]
                                
                FscoreTrain = 2*TrainTP/(2*TrainTP+TrainFP+TrainFN or not 0) #'or not' Avoid ZeroDivisionError
                FscoreTest = 2*TestTP/(2*TestTP+TestFP+TestFN or not 0)
                
                history.append([currentFold,epoch+1,costTrain,costTest, FscoreTrain, FscoreTest])
        
        tEnd = time.time()
        print("--------------------------------------------------Timer: %.5f sec----------------------------------------------------------" %(tEnd-tStart))
    sess.close()

## Draw plot

In [None]:
#Draw plot
import matplotlib.pyplot as plt
import numpy as np

In [None]:
#Plot frame
frame=int(TRAINING_EPOCHS/DISPLAY_STEP)

In [None]:
#Loss curve
for fold in range(K_FOLD):
    plt.plot(np.transpose(history)[1,fold*frame:(fold+1)*frame],np.transpose(history)[2,fold*frame:(fold+1)*frame],linestyle='-')
    plt.plot(np.transpose(history)[1,fold*frame:(fold+1)*frame],np.transpose(history)[3,fold*frame:(fold+1)*frame],linestyle='-')
    plt.title('fold-'+str(fold+1))
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['costTrain','costTest'], loc='upper right')
    plt.show()


In [None]:
#F1 score curve
for fold in range(K_FOLD):
    plt.plot(np.transpose(history)[1,fold*frame:(fold+1)*frame],np.transpose(history)[4,fold*frame:(fold+1)*frame],linestyle='-')
    plt.plot(np.transpose(history)[1,fold*frame:(fold+1)*frame],np.transpose(history)[5,fold*frame:(fold+1)*frame],linestyle='-')
    plt.title('fold-'+str(fold+1))
    plt.ylabel('F1score')
    plt.xlabel('epoch')
    plt.legend(['Train','Test'], loc='lower right')
    plt.show()


In [None]:
#Result table
for fold in range(K_FOLD):
  MaxTrain = np.transpose(history)[4,fold*frame:(fold+1)*frame].max()
  Maxloc=np.transpose(history)[4,fold*frame:(fold+1)*frame].argmax()
  followingTest=np.transpose(history)[5,fold*frame:(fold+1)*frame][Maxloc]
  print('fold-%d\tBest Train F1 score: %.5f\tepoch: %d\tTest F1 score: %.5f' % ((fold+1),MaxTrain,(Maxloc+1)*DISPLAY_STEP,followingTest))