# 16th February
## Tensorflow / Keras
##### Preamble


In [1]:
import tensorflow as tf
import keras
import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt
from tqdm import tqdm
#import ipympl
%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [3]:
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

### Data Ingestion and QC

Firstly, let's check for any corruted rows - let's look for any NaN values, and drop any corrupted row. Then, we will rearrange the positions of the labels to the furthest right column, to make it easier to select and hide later.

In [4]:
noise_dataframe = pd.read_csv("../TensorFlow/Attributes_noise.csv")
event_dataframe = pd.read_csv("../TensorFlow/Attributes_events.csv")

#data cleaning
def nan_rows_sweeper(dataframe, transpose = True):
    if transpose == True:
        temp_dataframe = dataframe.transpose()
    if transpose == False:
        temp_dataframe = dataframe
    i = 0
    nan_rows = []
    while i < len(dataframe):
        if temp_dataframe.iloc[:, i].isnull().any():
            nan_rows.append(i)
        i += 1
    return dataframe.drop(nan_rows)

noise_dataframe = nan_rows_sweeper(noise_dataframe)
event_dataframe = nan_rows_sweeper(event_dataframe)

#drop the autogenerated pandas index
noise_dataframe.drop("Unnamed: 0", axis=1, inplace=True)
event_dataframe.drop("Unnamed: 0", axis=1, inplace=True)

def mean_med_label_move(dataframe):
    mean_series = dataframe.Mean
    median_series = dataframe.Median
    label_series = dataframe.Label
    dataframe.drop(["Mean", "Median", "Label"], axis=1, inplace=True)
    dataframe["Mean"]=mean_series
    dataframe["Median"]=median_series
    dataframe["Label"]=label_series
    return dataframe

#rearrange column names and positions
noise_dataframe = mean_med_label_move(noise_dataframe)
event_dataframe = mean_med_label_move(event_dataframe)

#downsample the noise dataset to match the event dataset
chosen_noise_rows_array = random.sample(list(noise_dataframe.index), len(event_dataframe))

#join the two datase
downsampled_noise_df = pd.DataFrame(index=chosen_noise_rows_array)
downsampled_noise_df = downsampled_noise_df.join(noise_dataframe, how='inner')

attributes_df = downsampled_noise_df.append(event_dataframe)

### Keras

In [13]:
dataset = attributes_df.values

In [14]:
X = dataset[:,0:202].astype(float)
Y = dataset[:,202].astype(float)

In [15]:
total = len(X)
thirty_percent= total//30

In [16]:
X_test= X[:thirty_percent]
X_train = X[thirty_percent:]

Y_test = Y[:thirty_percent]
Y_train = Y[thirty_percent:]

In [62]:
#!mkdir my_log_dirii
#! tensorboard --logdir=my_log_dir/

In [17]:
# baseline model
#def create_baseline():
    # create model
model = Sequential()
model.add(Dense(202, input_dim=(202), activation='relu'))
model.add(Dense(1, activation='sigmoid'))
    
    # Compile model
#model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#return model

In [18]:
# Compile model
#opt = keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
opt = keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

In [19]:
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 202)               41006     
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 203       
Total params: 41,209
Trainable params: 41,209
Non-trainable params: 0
_________________________________________________________________
None


In [74]:
scores = model.evaluate(X_train, Y_train)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


acc: 44.45%


------------------

In [21]:
# baseline model
def create_baseline():
    # create model
    model = Sequential()
    model.add(Dense(202, input_dim=202, kernel_initializer='normal', activation='relu'))
    model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [30]:
# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=create_baseline, epochs=5, batch_size=512, verbose=1)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X_train, Y_train, cv=kfold)
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Results: 64.82% (23.07%)


In [31]:
# evaluate baseline model with standardized dataset
estimators = []
estimators.append(('standardize', StandardScaler()))
wrapper = KerasClassifier(build_fn=create_baseline, epochs=5, batch_size=512, verbose=1)
estimators.append(('mlp', wrapper))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X_train, Y_train, cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Standardized: 100.00% (0.00%)


Evaulate smaller topology

In [18]:
# smaller model
def create_smaller():
	# create model
	model = Sequential()
	model.add(Dense(101, input_dim=202, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_smaller, epochs=5, batch_size=512, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X_train, Y_train, cv=kfold)
print("Smaller: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Smaller: 100.00% (0.00%)


evaluate larger topology

In [21]:
# larger model
def create_larger():
	# create model
	model = Sequential()
	model.add(Dense(202, input_dim=202, kernel_initializer='normal', activation='relu'))
	model.add(Dense(101, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_larger, epochs=5, batch_size=512, verbose=1)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X_train, Y_train, cv=kfold)
print("Larger: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Larger: 100.00% (0.00%)
