# 1D Convolutional Neural Networks

In [2]:
import time
import gc
import pandas as pd
import numpy as np
import sys
sys.path.append("../src")
from preprocessing import *
from plotting import *

In [3]:
df_db = group_datafiles_byID('../datasets/preprocessed/HT_Sensor_prep_metadata.dat', '../datasets/preprocessed/HT_Sensor_prep_dataset.dat')
df_db = reclassify_series_samples(df_db)

In [4]:
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score, accuracy_score


features_id = ['R1', 'R2', 'R3', 'R4', 'R5', 'R6', 'R7', 'R8', 'Temp.', 'Humidity', 'id']
features = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

class_dict = {
    'background':0,
    'banana':1,
    'wine':2
}

def class_to_int(y):
  return np.array([class_dict[x] for x in y.tolist()])

def get_majority_class(y):
  (values, counts) = np.unique(y,return_counts=True)
  ind = np.argmax(counts)
  return values[ind]

def separate_ids(df_db):
  X = df_db[features_id].values
  y = df_db['class'].values
  y = class_to_int(y)
  ids = np.unique(X[:,-1])
  new_X = []
  new_y = []
  for id in ids:
    idx = np.where(X[:,-1] == id)
    new_X.append(X[idx][:,features])
    new_y.append(y[idx])
  return new_X, new_y

def generate_sequences(X, y, seq_len, step):
  new_X = []
  new_y = []
  for i in range(0, X.shape[0] - seq_len, step):
    new_X.append( X[i: i + seq_len] )
    new_y.append( get_majority_class(y[i: i + seq_len]) )
  num_seqs = len(new_X)
  new_X = np.array(new_X)
  new_y = to_categorical(new_y, num_classes=3)
  return new_X, new_y, num_seqs

def raw_windows(X, y, seq_len, step):
  new_X = []
  new_y = []
  total_seqs = 0
  for id in range(len(X)):
    X_id, y_id, num_seqs = generate_sequences(X[id], y[id], seq_len, step)
    new_X.append(X_id)
    new_y.append(y_id)
    total_seqs += num_seqs
  return new_X, new_y, total_seqs

def get_train_test_split(X, y, test_size=0.33, concatenate_train=True, cnn=False):
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
  if concatenate_train:
    X_train = np.concatenate(X_train)
    y_train = np.concatenate(y_train)
    if cnn:
      X_train = np.reshape(X_train, (-1,seq_len,10,1))
  X_test = np.concatenate(X_test)
  y_test = np.concatenate(y_test)
  if cnn:
    X_test = np.reshape(X_test, (-1,seq_len,10,1))
  return X_train, X_test, y_train, y_test

def get_kfold_split(X, y, k=5, cnn=False):
  kf = KFold(k, shuffle=True)
  X = np.array(X, dtype='O')
  y = np.array(y, dtype='O')
  train_val_sets = []
  for train_idx, val_idx in kf.split(X, y):
    if cnn:
      train_val_sets.append( (np.reshape(np.concatenate(X[train_idx]), (-1,seq_len,10,1)), np.reshape(np.concatenate(X[val_idx]), (-1,seq_len,10,1)), np.concatenate(y[train_idx]), np.concatenate(y[val_idx])) )
    else:
      train_val_sets.append( (np.concatenate(X[train_idx]), np.concatenate(X[val_idx]), np.concatenate(y[train_idx]), np.concatenate(y[val_idx])) )
  return train_val_sets

def model_ensemble_pred(models, X_test):
  preds = np.zeros((len(X_test), 3))
  for model in models:
    preds += model.predict(X_test)
  return preds/len(X_test)

def get_f1score(y_pred, y_true):
  y_true_bool = np.argmax(y_true, axis=1)
  y_pred_bool = np.argmax(y_pred, axis=1)
  return f1_score(y_true_bool, y_pred_bool, average='weighted')

def info_summary(info):
  for key in info.keys():
    print('hyperparameters:\t', key, "\n")
    print("\taverage accuracy:\t", np.mean(info[key]['val_accs']), "\tstd:\t", np.std(info[key]['val_accs']))
    print("\taverage f1score:\t", np.mean(info[key]['val_f1scores']), "\tstd:\t", np.std(info[key]['val_f1scores']), "\n")

def ensemble_test(models, X_test, y_test):
  y_pred = model_ensemble_pred(models, X_test)
  y_test_bool = np.argmax(y_test, axis=1)
  y_pred_bool = np.argmax(y_pred, axis=1)
  print("final:")
  print("\ttest accuracy:\t", accuracy_score(y_test_bool, y_pred_bool))
  print("\ttest f1score:\t", f1_score(y_test_bool, y_pred_bool, average='weighted'))

In [5]:
seq_len = 120
step = 10

X, y = separate_ids(df_db)
X, y, total_seqs = raw_windows(X, y, seq_len, step)

In [6]:
from tensorflow.keras.layers import Dense, GRU, LSTM, Input, Conv1D, MaxPooling1D, Flatten
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.callbacks import EarlyStopping

opti = Adam()
er = EarlyStopping(monitor='val_accuracy',
                   min_delta=0,
                   patience=10,
                   restore_best_weights=True,
                   verbose=0,
                   mode='auto')

def get_cnn_model(kernel_size, pool_size, filters):
  model = Sequential()
  model.add(Conv1D(filters, kernel_size, activation='relu', input_shape=(seq_len, 10)))
  model.add(MaxPooling1D(pool_size))
  model.add(Conv1D(filters, kernel_size, activation='relu'))
  model.add(MaxPooling1D(pool_size))
  model.add(Conv1D(filters, kernel_size, activation='relu'))
  model.add(MaxPooling1D(pool_size))
  model.add(Flatten())
  model.add(Dense(8, activation='relu'))
  model.add(Dense(3, activation='softmax'))
  model.compile(loss='categorical_crossentropy',
              optimizer=opti,
              metrics = ['accuracy'])
  model.summary()
  return model

In [7]:
X_train, X_test, y_train, y_test = get_train_test_split(X, y, concatenate_train=False, test_size=0.2)

In [8]:
epochs=500
batch_size=512
train_val_sets = get_kfold_split(X_train, y_train, k=5)

In [10]:
kernel_sizes = [3,5,7]
pool_sizes = [2]
filters = [2,4,8,16]

info = {}

for x in kernel_sizes:
  for y in pool_sizes:
    for z in filters:
      info[(x,y,z)] = {'models':[], 'val_accs':[], 'val_f1scores':[]}
      for bag in train_val_sets:
        model = get_cnn_model(x,y,z)
        X_train_, X_val, y_train_, y_val = bag
        h = model.fit(X_train_, y_train_, epochs=epochs, batch_size=batch_size, shuffle=True, validation_data=(X_val, y_val), callbacks=[er])
        info[(x,y,z)]['models'].append(model)
        info[(x,y,z)]['val_accs'].append(h.history['val_accuracy'][-11])
        info[(x,y,z)]['val_f1scores'].append( get_f1score( model.predict(X_val), y_val) )

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_3 (Conv1D)            (None, 118, 2)            62        
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 59, 2)             0         
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 57, 2)             14        
_________________________________________________________________
max_pooling1d_4 (MaxPooling1 (None, 28, 2)             0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 26, 2)             14        
_________________________________________________________________
max_pooling1d_5 (MaxPooling1 (None, 13, 2)             0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 26)               

In [11]:
info_summary(info)

hyperparameters:	 (3, 2, 2) 

	average accuracy:	 0.7806770205497742 	std:	 0.06671767588545371
	average f1score:	 0.692969124645818 	std:	 0.09962185723380607 

hyperparameters:	 (3, 2, 4) 

	average accuracy:	 0.8031344890594483 	std:	 0.05776991399936723
	average f1score:	 0.7360321428538947 	std:	 0.0895970591267578 

hyperparameters:	 (3, 2, 8) 

	average accuracy:	 0.8067289113998413 	std:	 0.054736802382283116
	average f1score:	 0.7580280152670313 	std:	 0.07218934442370248 

hyperparameters:	 (3, 2, 16) 

	average accuracy:	 0.8038125991821289 	std:	 0.07325254729242815
	average f1score:	 0.7616960722416355 	std:	 0.09520779396416186 

hyperparameters:	 (5, 2, 2) 

	average accuracy:	 0.7735772490501404 	std:	 0.06118514634020522
	average f1score:	 0.687879253652621 	std:	 0.09245990047116275 

hyperparameters:	 (5, 2, 4) 

	average accuracy:	 0.8182581305503845 	std:	 0.054143315074945304
	average f1score:	 0.766855500337456 	std:	 0.06733634265480719 

hyperparameters:	 (5, 2

In [19]:
ensemble_test(info[(5, 2, 4)]['models'], X_test, y_test)

final:
	test accuracy:	 0.8398548437993058
	test f1score:	 0.7937572059704397
