# Machine Learning Mini Project 

### MSBA Seminar Spring 2021

### By Glen Barlow Via Nickolas Freeman

## Step One: Read and prepare data for ML Algorithms

In [1]:
import pandas as pd
import pathlib

pd.set_option('display.max_columns', None)
%config Completer.use_jedi = False

data_path = pathlib.Path('data', 'train.csv')
data = pd.read_csv(data_path)

cols_to_consider = ['Geography','Gender']
for col in cols_to_consider:
    data = pd.concat([data, pd.get_dummies(data[col])], axis = 1)
    data = data.drop(columns = [col])
    
#data.head()

## Step Two: Feature Engineering and Data Transformation for Machine Learning Format

In [2]:
target = 'Exited'
features = [col for col in data.columns if col != target]
data.loc[0,features].to_dict()

from sklearn import preprocessing

scaler = preprocessing.StandardScaler()
scaled_data = scaler.fit(data[features])
scaled_data = scaler.fit_transform(data[features])
scaled_data = pd.DataFrame(scaled_data, columns = features)
scaled_data[target] = data[target]

scaler_means = {key: val for key, val in zip(features, scaler.mean_)}
scaler_sigmas = {key: val for key, val in zip(features, scaler.scale_)}


import json
with open('scaler_means.json', 'w') as fout: 
    json.dump(scaler_means, fout)
    
with open('scaler_sigmas.json', 'w') as fout: 
    #json.dump(scaler_sigmas, fout)

## Step Three: Fit and Save the Models

In [3]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(scaled_data, stratify = scaled_data[target], random_state = 0)
x_train, y_train = train[features], train[target]
x_test, y_test = test[features], test[target]

### Gradient Boosted Decision Tree Model

In [8]:
from sklearn.ensemble import GradientBoostingClassifier

clf = GradientBoostingClassifier(random_state = 0)

clf = clf.fit(x_train, y_train)

with open('gradientboost.pkl', 'wb') as f:
    pickle.dump(clf, f)
    
#with open('gradientboost.pkl', 'rb') as f:
    #clf2 = pickle.load(f)

### AdaBoosted Decision Trees with Hyperparameter Tuning

In [6]:
import pickle
from sklearn.ensemble import AdaBoostClassifier

adaboost_params = {
    'learning_rate': 0.09000000000000001, 
    'n_estimators': 149,
}

clf = AdaBoostClassifier(random_state = 0, **adaboost_params)
clf = clf.fit(x_train, y_train)
with open('adaboost.pkl', 'wb') as f:
    pickle.dump(clf, f)
    
#with open('adaboost.pkl', 'rb') as f:
#    clf2 = pickle.load(f)

### Deep Neural Network With Dropout

In [7]:
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, Input

tf.random.set_seed(0)

model = Sequential()
model.add(Dense(9, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(6, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss = 'binary_crossentropy', 
              optimizer = 'adam', 
              metrics = ['accuracy'])

X, y = train[features].values, train[target].values
history = model.fit(X, y, 
          epochs = 95, 
          batch_size = 10, 
          verbose = 0,
          validation_split = 0.2);

with open('neural.pkl', 'wb') as f:
    pickle.dump(clf, f)
    
#with open('neural.pkl', 'rb') as f:
#    clf2 = pickle.load(f)

### Combine to one Ensamble Model

In [None]:
responces = GradientResponse + AdaBoostResponse + NeuralResponce  #Psuedo Code
output = 0.5
if responces >= 2:
    output = 1
else:
    output = 0
    

## Step 4: Deploy HTTP Endpoint

In [None]:
#See Demo

## Step 5: Make Requests to Deployed HTTP Endpoints

In [None]:
import requests

neural_url = <nueral enpoint>
adaboost_url = <adaboost endpoint>
gradient_url = <gradient url>

In [None]:
index = 100
data_dict = data.loc[index, features].to_dict()
data_dict

In [None]:
response = requests.post(neural_url, json = data_dict)
nn_prediction = response.json()

response = requests.post(adaboost_url, json = data_dict)
ab_prediction = response.json()

response = requests.post(gradient_url, json = data_dict)
gb_prediction = response.json()

responces = nn_prediction + ab_prediction + gb_prediction

if responces >= 2:
    output = 1
else:
    output = 0
    
print(f'{index}: NN -> {nn_prediction}, Ada -> {ab_prediction}, Grad -> {gb_prediction}')  
print(f'{index}: Ensemble Response -> {output}')

In [None]:
prediction_comparison = {}
for index in data.index:
    if (index % 25) == 0:
        print(f'Starting index {index}')
    data_dict = data.loc[index, features].to_dict()

    response = requests.post(neural_url, json = data_dict)
    nn_prediction = response.json()

    response = requests.post(adaboost_url, json = data_dict)
    ab_prediction = response.json()
    
    response = requests.post(gradient_url, json = data_dict)
    gb_prediction = response.json()
    
    prediction_comparison[index] = {
        'NN': nn_prediction,
        'AdaBoost': ab_prediction,
        'GradBoost': gb_prediction,
        'Ensemble': output,
        'Actual': data.loc[index, target]
    }

prediction_comparison = pd.DataFrame().from_dict(prediction_comparison, orient = 'index')   

In [None]:
(prediction_comparison['NN'] == prediction_comparison['Actual']).sum()/len(data)

In [None]:
(prediction_comparison['AdaBoost'] == prediction_comparison['Actual']).sum()/len(data)

In [None]:
(prediction_comparison['GradBoost'] == prediction_comparison['Actual']).sum()/len(data)

In [None]:
(prediction_comparison['Ensemble'] == prediction_comparison['Actual']).sum()/len(data)