# Neural Network

## Setup

### Run the Data Preprocessing notebook once to import the get_data method

In [1]:
%run data_preprocessing.ipynb

### Run the Data Evaluation notebook once to import the show_evaluation method

In [2]:
%run data_evaluation.ipynb

### Imports

In [3]:
import numpy as np
import pandas as pd

from keras import regularizers
from keras.models import Sequential
from keras.layers import BatchNormalization, Dense, Dropout    

### Hyperparmeters

In [4]:
# for get_data
enable_feature_engineering = True
enable_outlier_handling = True
normalize = 'minmax'
use_one_hot_encoding = True
split_size = (0.7, 0.0, 0.3)

### Get the data

In [5]:
y_train, x_train, y_val, x_val, y_test, x_test = get_data(enable_feature_engineering, enable_outlier_handling, normalize, use_one_hot_encoding, split_size)

## Model

### Create the model

In [6]:
classifier = Sequential()

classifier.add(Dense(256, activation='relu', input_shape=(x_train.shape[1],)))
classifier.add(Dense(256, activation='relu', ))
classifier.add(Dense(256, activation='relu', ))
classifier.add(Dense(1, activation='sigmoid'))

classifier.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
classifier.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 256)               4352      
_________________________________________________________________
dense_1 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_2 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 257       
Total params: 136,193
Trainable params: 136,193
Non-trainable params: 0
_________________________________________________________________


### Train the model

In [7]:
def plot_hist(h, xsize=6, ysize=10):
    # Prepare plotting
    fig_size = plt.rcParams["figure.figsize"]
    plt.rcParams["figure.figsize"] = [xsize, ysize]
    fig, axes = plt.subplots(nrows=4, ncols=4, sharex=True)
    
    # summarize history for MAE
    plt.subplot(211)
    plt.plot(h['accuracy'])
    plt.plot(h['val_accuracy'])
    plt.title('Training vs Validation Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    
    # summarize history for loss
    plt.subplot(212)
    plt.plot(h['loss'])
    plt.plot(h['val_loss'])
    plt.title('Training vs Validation Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    
    # Plot it all in IPython (non-interactive)
    plt.draw()
    plt.show()

In [8]:
history = classifier.fit(
    x_train.astype('float32'),
    y_train.astype('float32'),
    batch_size=128,
    epochs=25,
    validation_data=(x_val.astype('float32'), y_val.astype('float32'))
)

plot_hist(history.history)

"""
x_train = x_train
y_train = y_train

from sklearn.model_selection import KFold
kf = KFold(n_splits=5, random_state=42, shuffle = True)
for train_index, test_index in kf.split(x_train):
    x_fold_train, x_fold_val = x_train.iloc[train_index], x_train.iloc[test_index]
    y_fold_train, y_fold_val = y_train.iloc[train_index], y_train.iloc[test_index]
    
    history = classifier.fit(
        x_fold_train.astype('float32'),
        y_fold_train.astype('float32'),
        batch_size=64,
        epochs=10,
        validation_data=(x_fold_val.astype('float32'), y_fold_val.astype('float32'))
    )
    
    plot_hist(history.history)
    
"""

AttributeError: 'NoneType' object has no attribute 'astype'

## Evaluation

### Predict the test set

In [None]:
y_prob = classifier.predict(x_test.astype('float32'))
y_pred = np.round(y_prob)

### Apply the evaluation metrics

In [None]:
show_evaluation(y_test, y_pred, y_prob)