In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import preprocessing
from tensorflow.keras.preprocessing import timeseries_dataset_from_array
from tensorflow.keras import regularizers

# Data preparation

In [2]:
df = pd.read_pickle("../project_data/every_half_second.pkl")
df.reset_index(inplace = True, drop = True)
df['time'] = df['time'].astype(float)
# we need to order by time to make splitting easier
df.sort_values(by = ['time'], ignore_index = True, inplace = True)
# prepare target
df['TAC_Reading_binary'] = np.array([1 if tac >= 0.08 else 0 for tac in df['TAC_Reading'].values])

For our sequence networks, we'll be using a slightly different version of the data. Instead of every row corresponding to a sample taken every ten seconds, now every row corresponds to a sample taken every half second. Our problem frame is still: given a ten-second window of accelerometer data, can we accurately classify the person as sober or intoxicated. The only difference now is that our networks will take as input a sequence of 20 rows, sampled every half-second in time. And just as before we'll be using 3-fold cross-validation, implemented manually, to evaluate our models. 

In [3]:
# store indices for each split
train_1_indices = np.arange(177755, len(df))
test_1_indices = np.arange(0, 177755)
train_2_indices = np.concatenate((np.arange(0, 177755), np.arange(177755*2, len(df))))
test_2_indices = np.arange(177755, 177755*2)
train_3_indices = np.arange(0, 177755*2)
test_3_indices = np.arange(177755*2, len(df))

# how many training observations in each fold?
print('there are', len(df), 'observations in the training data')
print('train_fold_1 will have', len(train_1_indices), 'observations')
print('train_fold_2 will have', len(train_2_indices), 'observations')
print('train_fold_3 will have', len(train_3_indices), 'observations')
print('test_fold_1 will have', len(test_1_indices), 'observations')
print('test_fold_2 will have', len(test_2_indices), 'observations')
print('test_fold_3 will have', len(test_3_indices), 'observations')

there are 533265 observations in the training data
train_fold_1 will have 355510 observations
train_fold_2 will have 355510 observations
train_fold_3 will have 355510 observations
test_fold_1 will have 177755 observations
test_fold_2 will have 177755 observations
test_fold_3 will have 177755 observations


In [4]:
def standardize_inputs(X_train, X_test):
    '''
    Standardize a train-test split using mean and standard deviation estimates from training data.
    Returns a tuple of standardized training and test matrices.
    '''
    scale = StandardScaler()
    scale.fit(X_train)
    X_train_scaled = scale.transform(X_train)
    X_test_scaled = scale.transform(X_test)
    return(X_train_scaled, X_test_scaled)

def data_for_modeling(train_indices, test_indices, standardize = True, multi_input = False, multi_output = False, data = df, sequence = False):
    '''
    Get the training and test arrays for predictors and target to feed into your model.
    Objects returned in a tuple in the following order: (X_train_objects, ..., X_test_objects, ..., y_train_objects, ..., y_test_objects)
    Standardizes X appropriately, unless standardize = False, in which case X is not standardized.
    If multi_input = True, multiple X objects are returned (to be used in wide and deep network).
    If multi_output = True, multiple y objects are returned (to be used in multi-task network).
    Does not support both multi_input and multi_output.
    If sequence = True then each array is sorted by pid.
    '''
    # prepare target
    if sequence:
        y_train = df.iloc[train_indices].sort_values(by = ['pid'])['TAC_Reading_binary'].values
        y_test = df.iloc[test_indices].sort_values(by = ['pid'])['TAC_Reading_binary'].values
    else:
        y_train = df.iloc[train_indices]['TAC_Reading_binary'].values
        y_test = df.iloc[test_indices]['TAC_Reading_binary'].values
    if multi_output:
        y_train_classification = df.iloc[train_indices]['TAC_Reading_binary'].values
        y_train_regression = df.iloc[train_indices]['TAC_Reading'].values
        y_test_classification = df.iloc[test_indices]['TAC_Reading_binary'].values
        y_test_regression = df.iloc[test_indices]['TAC_Reading'].values
    
    # prepare predictors
    if sequence:
        X_train = df.iloc[train_indices].sort_values(by = ['pid']).drop(['time', 'pid', 'TAC_Reading', 'TAC_Reading_binary'], axis = 1)
        X_test = df.iloc[test_indices].sort_values(by = ['pid']).drop(['time', 'pid', 'TAC_Reading', 'TAC_Reading_binary'], axis = 1)
    else:
        X_train = df.iloc[train_indices].drop(['time', 'pid', 'TAC_Reading', 'TAC_Reading_binary'], axis = 1)
        X_test = df.iloc[test_indices].drop(['time', 'pid', 'TAC_Reading', 'TAC_Reading_binary'], axis = 1)
    if multi_input:
        X_train_x = X_train['x'].values.reshape(-1,1)
        X_train_y = X_train['y'].values.reshape(-1,1)
        X_train_z = X_train['z'].values.reshape(-1,1)
        X_train_p = X_train.drop(['x', 'y', 'z'], axis = 1)
        X_test_x = X_test['x'].values.reshape(-1,1)
        X_test_y = X_test['y'].values.reshape(-1,1)
        X_test_z = X_test['z'].values.reshape(-1,1)
        X_test_p = X_test.drop(['x', 'y', 'z'], axis = 1)
    
    # standardization
    if standardize:
        if multi_input:
            X_train_x, X_test_x = standardize_inputs(X_train_x, X_test_x)
            X_train_y, X_test_y = standardize_inputs(X_train_y, X_test_y)
            X_train_z, X_test_z = standardize_inputs(X_train_z, X_test_z)
            X_train_p, X_test_p = standardize_inputs(X_train_p, X_test_p)
        else:
            X_train_scaled, X_test_scaled = standardize_inputs(X_train, X_test)

    # returns
    if multi_input:
        if standardize:
            return(X_train_x, X_test_x, X_train_y, X_test_y, X_train_z, X_test_z, X_train_p, X_test_p, y_train, y_test)
        else:
            return(X_train_x, X_test_x, X_train_y, X_test_y, X_train_z, X_test_z, X_train_p, X_test_p, y_train, y_test)
    elif multi_output:
        if standardize:
            return(X_train_scaled, X_test_scaled, y_train_classification, y_train_regression, y_test_classification, y_test_regression)
        else:
            return(X_train, X_test, y_train_classification, y_train_regression, y_test_classification, y_test_regression)
    else:
        if standardize:
            return(X_train_scaled, X_test_scaled, y_train, y_test)
        else:
            return(X_train, X_test, y_train, y_test)
        
def format_test(y_test):
    '''
    Returns appropriately formatted y_test that can be used with output of model.predict() and sklearn scoring functions.
    '''
    y_test = y_test[np.arange(0, 177755, 20)]
    y_test = y_test[0:len(y_test) - 1]
    return(y_test)

In [5]:
# verify that data is being sequenced and batched appropriately
X_train, X_test, y_train, y_test = data_for_modeling(train_1_indices, test_1_indices, sequence = True)
data_inst = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20)

for predictors_batch, labels_batch in data_inst:
    print('predictors_batch shape is', predictors_batch.shape)
    print('labels_batch shape is', labels_batch.shape)
    break

predictors_batch shape is (128, 20, 99)
labels_batch shape is (128,)


In [6]:
test_inst = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
for predictors_batch, labels_batch in test_inst:
    print('predictors_batch shape is', predictors_batch.shape)
    print('labels_batch shape is', labels_batch.shape)
    break

predictors_batch shape is (128, 20, 99)
labels_batch shape is (128,)


# Modeling

In [31]:
# containers to store results
model = []
fold = []
accuracy = []
precision_intox = []
precision_sober = []
recall_intox = []
recall_sober = []
support_sober = []
support_intox = []

results = {'model': model, 'fold': fold, 'accuracy' : accuracy, 'precision (intoxicated)': precision_intox,
          'precision (sober)': precision_sober, 'recall (intoxicated)': recall_intox, 'recall (sober)': recall_sober, 
           'support (sober)': support_sober, 'support (intox)': support_intox}

## Model #1 LSTM

In [144]:
def train_lstm(train_data, test_data, name):
    '''
    Creates an LSTM network and fits it to supplied data. 
    Returns fitted model.
    Name specifies filename of model saved at checkpoints.
    '''
    # specify architecture
    model = keras.Sequential([
        keras.layers.LSTM(units = 64, input_shape = [None, 99], return_sequences = True),
        keras.layers.Dropout(rate = 0.1),
        keras.layers.LSTM(units = 64, return_sequences = True),
        keras.layers.Dropout(rate = 0.1),
        keras.layers.Dense(units = 1, activation = 'sigmoid')
    ])
    # compile model
    optimizer = keras.optimizers.SGD(momentum = 0.9, nesterov = True)
    model.compile(loss = 'binary_crossentropy', optimizer = optimizer, metrics = ['accuracy'])
    # callbacks
    lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience = 5)
    checkpoint_cb = keras.callbacks.ModelCheckpoint('model_checkpoints/' + 'lstm_' + name + '.h5', save_best_only = True, save_weights_only = False)
    early_stopping_cb = keras.callbacks.EarlyStopping(patience = 15, restore_best_weights = True)
    
    # fit model
    model.fit(x = train_data, epochs = 50, validation_data = test_data, callbacks = [lr_scheduler, checkpoint_cb, early_stopping_cb])
    
    return(model)

**Fold 1**

In [139]:
# get data
X_train, X_test, y_train, y_test = data_for_modeling(train_1_indices, test_1_indices, sequence = True)
train_data = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20, sequence_stride = 5)
test_data = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
# fit model
lstm_1 = train_lstm(train_data, test_data, 'fold_1')
# predict labels
pred_prob = lstm_1.predict(test_data)
pred_prob = pred_prob[:, -1, :]
pred = np.array([1 if prob >= 0.5 else 0 for prob in pred_prob])
y_test = format_test(y_test)
# record results
model += ['LSTM']
fold += [1]
accuracy += [accuracy_score(y_test, pred)]
precision_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][1]]
precision_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][0]]
recall_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][1]]
recall_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][0]]
support_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][0]]
support_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][1]]

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50


**Fold 2**

In [143]:
# get data
X_train, X_test, y_train, y_test = data_for_modeling(train_2_indices, test_2_indices, sequence = True)
train_data = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20, sequence_stride = 5)
test_data = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
# fit model
lstm_2 = train_lstm(train_data, test_data, 'fold_2')
# predict labels
pred_prob = lstm_2.predict(test_data)
pred_prob = pred_prob[:, -1, :]
pred = np.array([1 if prob >= 0.5 else 0 for prob in pred_prob])
y_test = format_test(y_test)
#record results
model += ['LSTM']
fold += [2]
accuracy += [accuracy_score(y_test, pred)]
precision_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][1]]
precision_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][0]]
recall_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][1]]
recall_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][0]]
support_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][0]]
support_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][1]]

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50


**Fold 3**

In [145]:
# get data
X_train, X_test, y_train, y_test = data_for_modeling(train_3_indices, test_3_indices, sequence = True)
train_data = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20, sequence_stride = 5)
test_data = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
# fit model
lstm_3 = train_lstm(train_data, test_data, 'fold_3')
# predict labels
pred_prob = lstm_3.predict(test_data)
pred_prob = pred_prob[:, -1, :]
pred = np.array([1 if prob >= 0.5 else 0 for prob in pred_prob])
y_test = format_test(y_test)
#record results
model += ['LSTM']
fold += [3]
accuracy += [accuracy_score(y_test, pred)]
precision_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][1]]
precision_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][0]]
recall_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][1]]
recall_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][0]]
support_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][0]]
support_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][1]]

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


## Model #2 Convolution + RNN (with GRU)

In [161]:
def train_conv_rnn(train_data, test_data, name):
    '''
    Creates a network with a convolutional layer followed by 2 recurrent layers.
    Returns fitted model.
    Name specifies filename of model saved at checkpoints.
    '''
    # specify architecture
    model = keras.Sequential([
        keras.layers.Conv1D(filters = 30, kernel_size = 4, strides = 1, padding = 'same', input_shape = [None, 99]),
        keras.layers.Dropout(rate = 0.15),
        keras.layers.GRU(units = 30, return_sequences = True),
        keras.layers.Dropout(rate = 0.15),
        keras.layers.GRU(units = 30),
        keras.layers.Dropout(rate = 0.15),
        keras.layers.Dense(units = 1, activation = 'sigmoid')
    ])
    # compile model
    optimizer = keras.optimizers.SGD(momentum = 0.9, nesterov = True)
    model.compile(loss = 'binary_crossentropy', optimizer = optimizer, metrics = ['accuracy'])
    
    # callbacks
    lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience = 5)
    checkpoint_cb = keras.callbacks.ModelCheckpoint('model_checkpoints/' + 'conv_rnn_' + name + '.h5', save_best_only = True, save_weights_only = False)
    early_stopping_cb = keras.callbacks.EarlyStopping(patience = 15, restore_best_weights = True)
    
    # fit model
    model.fit(x = train_data, epochs = 50, validation_data = test_data, callbacks = [lr_scheduler, checkpoint_cb, early_stopping_cb])
    
    return(model)

**Fold 1**

In [162]:
# get data
X_train, X_test, y_train, y_test = data_for_modeling(train_1_indices, test_1_indices, sequence = True)
train_data = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20, sequence_stride = 5)
test_data = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
# fit model
conv_rnn_1 = train_conv_rnn(train_data, test_data, 'fold_1')
# predict labels
pred_prob = conv_rnn_1.predict(test_data)
pred = np.array([1 if prob >= 0.5 else 0 for prob in pred_prob])
y_test = format_test(y_test)
#record results
model += ['convolutional + recurrent network']
fold += [1]
accuracy += [accuracy_score(y_test, pred)]
precision_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][1]]
precision_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][0]]
recall_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][1]]
recall_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][0]]
support_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][0]]
support_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][1]]

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50


**Fold 2**

In [164]:
# get data
X_train, X_test, y_train, y_test = data_for_modeling(train_2_indices, test_2_indices, sequence = True)
train_data = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20, sequence_stride = 5)
test_data = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
# fit model
conv_rnn_2 = train_conv_rnn(train_data, test_data, 'fold_2')
# predict labels
pred_prob = conv_rnn_2.predict(test_data)
pred = np.array([1 if prob >= 0.5 else 0 for prob in pred_prob])
y_test = format_test(y_test)
#record results
model += ['convolutional + recurrent network']
fold += [2]
accuracy += [accuracy_score(y_test, pred)]
precision_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][1]]
precision_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][0]]
recall_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][1]]
recall_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][0]]
support_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][0]]
support_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][1]]

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50


**Fold 3**

In [165]:
# get data
X_train, X_test, y_train, y_test = data_for_modeling(train_3_indices, test_3_indices, sequence = True)
train_data = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20, sequence_stride = 5)
test_data = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
# fit model
conv_rnn_3 = train_conv_rnn(train_data, test_data, 'fold_3')
# predict labels
pred_prob = conv_rnn_3.predict(test_data)
pred = np.array([1 if prob >= 0.5 else 0 for prob in pred_prob])
y_test = format_test(y_test)
#record results
model += ['convolutional + recurrent network']
fold += [3]
accuracy += [accuracy_score(y_test, pred)]
precision_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][1]]
precision_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][0]]
recall_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][1]]
recall_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][0]]
support_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][0]]
support_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][1]]

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


## Model #3 Wavenet

In [246]:
def train_wavenet(train_data, test_data, name):
    '''
    Creates wavenet network (stacked 1D convolutional networks).
    Returns fitted model.
    Name specifies filename of model saved at checkpoints.
    '''
    # specify architecture
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(input_shape = [None, 99]))
    for rate in (1, 2, 4, 8) * 2:
        model.add(keras.layers.Conv1D(filters = 30, kernel_size = 2, padding = 'causal', activation= 'relu', dilation_rate=rate))
    for rate in (1, 2, 4, 8) * 2:
        model.add(keras.layers.Conv1D(filters = 30, kernel_size = 2, padding = 'causal', activation= 'relu', dilation_rate=rate))
    model.add(keras.layers.Dense(units = 1, activation = 'sigmoid'))
    
    # callbacks
    lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience = 5)
    checkpoint_cb = keras.callbacks.ModelCheckpoint('model_checkpoints/' + 'wavenet_' + name + '.h5', save_best_only = True, save_weights_only = False)
    early_stopping_cb = keras.callbacks.EarlyStopping(patience = 15, restore_best_weights = True)
    
    # compile model
    
    optimizer = keras.optimizers.SGD(momentum = 0.9, nesterov = True)
    model.compile(loss = 'binary_crossentropy', optimizer = optimizer, metrics = ['accuracy'])
    
    # fit model
    model.fit(x = train_data, epochs = 50, validation_data = test_data, callbacks = [lr_scheduler, checkpoint_cb, early_stopping_cb])
    
    return(model)

**Fold 1**

In [247]:
# get data
X_train, X_test, y_train, y_test = data_for_modeling(train_1_indices, test_1_indices, sequence = True)
train_data = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20, sequence_stride = 5)
test_data = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
# fit model
wavenet_1 = train_wavenet(train_data, test_data, 'fold_1')
# predict labels
pred_prob = wavenet_1.predict(test_data)
pred_prob = pred_prob[:, -1, :]
pred = np.array([1 if prob >= 0.5 else 0 for prob in pred_prob])
y_test = format_test(y_test)
record results
model += ['simplified wavenet']
fold += [1]
accuracy += [accuracy_score(y_test, pred)]
precision_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][1]]
precision_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][0]]
recall_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][1]]
recall_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][0]]
support_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][0]]
support_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][1]]

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50


**Fold 2**

In [191]:
# get data
X_train, X_test, y_train, y_test = data_for_modeling(train_2_indices, test_2_indices, sequence = True)
train_data = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20, sequence_stride = 5)
test_data = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
# fit model
wavenet_2 = train_wavenet(train_data, test_data, 'fold_2')
# predict labels
pred_prob = wavenet_2.predict(test_data)
pred_prob = pred_prob[:, -1, :]
pred = np.array([1 if prob >= 0.5 else 0 for prob in pred_prob])
y_test = format_test(y_test)
#record results
model += ['simplified wavenet']
fold += [2]
accuracy += [accuracy_score(y_test, pred)]
precision_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][1]]
precision_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][0]]
recall_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][1]]
recall_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][0]]
support_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][0]]
support_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][1]]

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50


**Fold 3**

In [194]:
# get data
X_train, X_test, y_train, y_test = data_for_modeling(train_3_indices, test_3_indices, sequence = True)
train_data = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20, sequence_stride = 5)
test_data = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
# fit model
wavenet_3 = train_wavenet(train_data, test_data, 'fold_3')
# predict labels
pred_prob = wavenet_3.predict(test_data)
pred_prob = pred_prob[:, -1, :]
pred = np.array([1 if prob >= 0.5 else 0 for prob in pred_prob])
y_test = format_test(y_test)
#record results
model += ['simplified wavenet']
fold += [3]
accuracy += [accuracy_score(y_test, pred)]
precision_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][1]]
precision_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][0]]
recall_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][1]]
recall_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][0]]
support_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][0]]
support_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][1]]

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


IndexError: index 1 is out of bounds for axis 0 with size 1

In [217]:
model.append('simplified wavenet')
fold.append(3)
accuracy.append(1.00)
precision_intox.append(1.00)
precision_sober.append(1.00)
recall_intox.append(1.00)
recall_sober.append(1.00)
support_sober.append(8887)
support_intox.append(0)

## Model #4 Bidirectional LSTM

In [12]:
def train_bi_lstm(train_data, test_data, name):
    '''
    Creates an bidirectional LSTM network and fits it to supplied data. 
    Returns fitted model.
    Name specifies filename of model saved at checkpoints.
    '''
    # specify architecture
    model = keras.Sequential([
        keras.layers.Bidirectional(keras.layers.LSTM(units = 64, return_sequences = True), input_shape = [None, 99]),
        keras.layers.Dropout(rate = 0.1),
        keras.layers.Bidirectional(keras.layers.LSTM(units = 64)),
        keras.layers.Dropout(rate = 0.1),
        keras.layers.Dense(units = 1, activation = 'sigmoid')
    ])
    # compile model
    optimizer = keras.optimizers.SGD(momentum = 0.9, nesterov = True)
    model.compile(loss = 'binary_crossentropy', optimizer = optimizer, metrics = ['accuracy'])
    # callbacks
    lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience = 5)
    checkpoint_cb = keras.callbacks.ModelCheckpoint('model_checkpoints/' + 'bi_lstm_' + name + '.h5', save_best_only = True, save_weights_only = False)
    early_stopping_cb = keras.callbacks.EarlyStopping(patience = 15, restore_best_weights = True)
    
    # fit model
    model.fit(x = train_data, epochs = 50, validation_data = test_data, callbacks = [lr_scheduler, checkpoint_cb, early_stopping_cb])
    
    return(model)

**Fold 1**

In [15]:
# get data
X_train, X_test, y_train, y_test = data_for_modeling(train_1_indices, test_1_indices, sequence = True)
train_data = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20, sequence_stride = 5)
test_data = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
# fit model
bi_lstm_1 = train_bi_lstm(train_data, test_data, 'fold_1')
# predict labels
pred_prob = bi_lstm_1.predict(test_data)
pred = np.array([1 if prob >= 0.5 else 0 for prob in pred_prob])
y_test = format_test(y_test)
# record results
model += ['bi-LSTM']
fold += [1]
accuracy += [accuracy_score(y_test, pred)]
precision_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][1]]
precision_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][0]]
recall_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][1]]
recall_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][0]]
support_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][0]]
support_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][1]]

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50


**Fold 2**

In [18]:
# get data
X_train, X_test, y_train, y_test = data_for_modeling(train_2_indices, test_2_indices, sequence = True)
train_data = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20, sequence_stride = 5)
test_data = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
# fit model
bi_lstm_2 = train_bi_lstm(train_data, test_data, 'fold_2')
# predict labels
pred_prob = bi_lstm_2.predict(test_data)
pred = np.array([1 if prob >= 0.5 else 0 for prob in pred_prob])
y_test = format_test(y_test)
# record results
model += ['bi-LSTM']
fold += [2]
accuracy += [accuracy_score(y_test, pred)]
precision_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][1]]
precision_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][0]]
recall_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][1]]
recall_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][0]]
support_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][0]]
support_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][1]]

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50


In [19]:
# get data
X_train, X_test, y_train, y_test = data_for_modeling(train_3_indices, test_3_indices, sequence = True)
train_data = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20, sequence_stride = 5)
test_data = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
# fit model
bi_lstm_3 = train_bi_lstm(train_data, test_data, 'fold_3')
# predict labels
pred_prob = bi_lstm_3.predict(test_data)
pred = np.array([1 if prob >= 0.5 else 0 for prob in pred_prob])
y_test = format_test(y_test)
# record results
model += ['bi-LSTM']
fold += [3]
accuracy += [accuracy_score(y_test, pred)]
precision_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][1]]
precision_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][0]]
recall_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][1]]
recall_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][0]]
support_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][0]]
support_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][1]]

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
# get data
X_train, X_test, y_train, y_test = data_for_modeling(train_1_indices, test_1_indices, sequence = True)
train_data = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20, sequence_stride = 5)
test_data = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
# fit model
bi_lstm_1 = train_bi_lstm(train_data, test_data, 'fold_1')
# predict labels
pred_prob = bi_lstm_1.predict(test_data)
pred = np.array([1 if prob >= 0.5 else 0 for prob in pred_prob])
y_test = format_test(y_test)
# record results
model += ['bi-LSTM']
fold += [1]
accuracy += [accuracy_score(y_test, pred)]
precision_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][1]]
precision_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][0]]
recall_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][1]]
recall_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][0]]
support_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][0]]
support_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][1]]

## Model #5 Convolutional + bidirectional recurrent network

In [24]:
def train_conv_bi_rnn(train_data, test_data, name):
    '''
    Creates an convolutional + bidirectional RNN network and fits it to supplied data. 
    Returns fitted model.
    Name specifies filename of model saved at checkpoints.
    '''
    # specify architecture
    model = keras.Sequential([
        keras.layers.Conv1D(filters = 30, kernel_size = 4, strides = 1, padding = 'same', input_shape = [None, 99]),
        keras.layers.Dropout(rate = 0.15),
        keras.layers.Bidirectional(keras.layers.GRU(units = 30, return_sequences = True), input_shape = [None, 99]),
        keras.layers.Dropout(rate = 0.15),
        keras.layers.Bidirectional(keras.layers.GRU(units = 30)),
        keras.layers.Dropout(rate = 0.15),
        keras.layers.Dense(units = 1, activation = 'sigmoid')
    ])
    # compile model
    optimizer = keras.optimizers.SGD(momentum = 0.9, nesterov = True)
    model.compile(loss = 'binary_crossentropy', optimizer = optimizer, metrics = ['accuracy'])
    # callbacks
    lr_scheduler = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience = 5)
    checkpoint_cb = keras.callbacks.ModelCheckpoint('model_checkpoints/' + 'conv_bi_rnn_' + name + '.h5', save_best_only = True, save_weights_only = False)
    early_stopping_cb = keras.callbacks.EarlyStopping(patience = 15, restore_best_weights = True)
    
    # fit model
    model.fit(x = train_data, epochs = 50, validation_data = test_data, callbacks = [lr_scheduler, checkpoint_cb, early_stopping_cb])
    
    return(model)

**Fold 1**

In [25]:
# get data
X_train, X_test, y_train, y_test = data_for_modeling(train_1_indices, test_1_indices, sequence = True)
train_data = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20, sequence_stride = 5)
test_data = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
# fit model
conv_bi_rnn_1 = train_conv_bi_rnn(train_data, test_data, 'fold_1')
# predict labels
pred_prob = conv_bi_rnn_1.predict(test_data)
pred = np.array([1 if prob >= 0.5 else 0 for prob in pred_prob])
y_test = format_test(y_test)
# record results
model += ['convolutional + bidirectional rnn']
fold += [1]
accuracy += [accuracy_score(y_test, pred)]
precision_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][1]]
precision_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][0]]
recall_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][1]]
recall_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][0]]
support_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][0]]
support_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][1]]

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50


**Fold 2**

In [26]:
# get data
X_train, X_test, y_train, y_test = data_for_modeling(train_2_indices, test_2_indices, sequence = True)
train_data = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20, sequence_stride = 5)
test_data = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
# fit model
conv_bi_rnn_2 = train_conv_bi_rnn(train_data, test_data, 'fold_2')
# predict labels
pred_prob = conv_bi_rnn_2.predict(test_data)
pred = np.array([1 if prob >= 0.5 else 0 for prob in pred_prob])
y_test = format_test(y_test)
# record results
model += ['convolutional + bidirectional rnn']
fold += [2]
accuracy += [accuracy_score(y_test, pred)]
precision_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][1]]
precision_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][0]]
recall_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][1]]
recall_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][0]]
support_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][0]]
support_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][1]]

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


**Fold 3**

In [27]:
# get data
X_train, X_test, y_train, y_test = data_for_modeling(train_3_indices, test_3_indices, sequence = True)
train_data = timeseries_dataset_from_array(data = X_train, targets = y_train, sequence_length = 20, sequence_stride = 5)
test_data = timeseries_dataset_from_array(data = X_test, targets = y_test, sequence_length = 20, sequence_stride = 20)
# fit model
conv_bi_rnn_3 = train_conv_bi_rnn(train_data, test_data, 'fold_3')
# predict labels
pred_prob = conv_bi_rnn_3.predict(test_data)
pred = np.array([1 if prob >= 0.5 else 0 for prob in pred_prob])
y_test = format_test(y_test)
# record results
model += ['convolutional + bidirectional rnn']
fold += [3]
accuracy += [accuracy_score(y_test, pred)]
precision_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][1]]
precision_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[0][0]]
recall_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][1]]
recall_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[1][0]]
support_sober += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][0]]
support_intox += [precision_recall_fscore_support(y_test, pred, zero_division = 1)[3][1]]

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [40]:
model = ['Bi-LSTM','Bi-LSTM','Bi-LSTM','Conv+Bi-GRU','Conv+Bi-GRU','Conv+Bi-GRU','LSTM','LSTM','LSTM','Conv+GRU','Conv+GRU','Conv+GRU','Wavenet','Wavenet','Wavenet']
fold = [1,2,3,1,2,3,1,2,3,1,2,3,1,2,3]
accuracy = [0.518848,0.775065,0.965568,0.586925,0.779003,0.946214,0.510183,0.756836,0.965230,0.571621,0.740407,0.960729,0.543828,0.720266,1.000000]
precision_intox = [0.483556,0.594563,0,0.545261,0.623229,0,0.477623,0.556719,0,0.525354,0.527146,0,1,1,1]
precision_sober =[0.630323,0.848701,1,0.624785,0.827744,1,0.608794,0.852043,1,0.627739,0.866190,1,0.543828,0.720266,1]
recall_intox = [0.805131,0.615849,1,0.569068,0.530973,1,0.787124,0.641593,1,0.631228,0.699115,1,0,0,1]
recall_sober = [0.278709,0.836900,0.965568,0.601904,0.875332,0.946214,0.277881,0.801594,0.965230,0.521622,0.756444,0.960729,1,1,1]
support_sober = [4833,6401,8887,4833,6401,8887,4833,6401,8887,4833,6401,8887,4833,6401,8887]
support_intox = [4054,2486,0,4054,2486,0,4054,2486,0,4054,2486,0,4054,2486,0]

results = {'model':model,'fold':fold,'accuracy':accuracy,'precision (intoxicated)':precision_intox,'precision (sober)':precision_sober,'recall (intoxicated)':recall_intox,
          'recall (sober)':recall_sober, 'support (sober)':support_sober, 'support (intoxicated)':support_intox}

# Results

In [60]:
pd.DataFrame(results)

Unnamed: 0,model,fold,accuracy,precision (intoxicated),precision (sober),recall (intoxicated),recall (sober),support (sober),support (intoxicated)
0,Bi-LSTM,1,0.518848,0.483556,0.630323,0.805131,0.278709,4833,4054
1,Bi-LSTM,2,0.775065,0.594563,0.848701,0.615849,0.8369,6401,2486
2,Bi-LSTM,3,0.965568,0.0,1.0,1.0,0.965568,8887,0
3,Conv+Bi-GRU,1,0.586925,0.545261,0.624785,0.569068,0.601904,4833,4054
4,Conv+Bi-GRU,2,0.779003,0.623229,0.827744,0.530973,0.875332,6401,2486
5,Conv+Bi-GRU,3,0.946214,0.0,1.0,1.0,0.946214,8887,0
6,LSTM,1,0.510183,0.477623,0.608794,0.787124,0.277881,4833,4054
7,LSTM,2,0.756836,0.556719,0.852043,0.641593,0.801594,6401,2486
8,LSTM,3,0.96523,0.0,1.0,1.0,0.96523,8887,0
9,Conv+GRU,1,0.571621,0.525354,0.627739,0.631228,0.521622,4833,4054


In [61]:
pd.DataFrame(results).groupby('model', as_index = False).agg(np.mean).drop(['fold', 'support (sober)', 'support (intoxicated)'], axis = 1)

Unnamed: 0,model,accuracy,precision (intoxicated),precision (sober),recall (intoxicated),recall (sober)
0,Bi-LSTM,0.75316,0.359373,0.826341,0.806993,0.693726
1,Conv+Bi-GRU,0.770714,0.389497,0.81751,0.700014,0.807817
2,Conv+GRU,0.757586,0.350833,0.83131,0.776781,0.746265
3,LSTM,0.744083,0.344781,0.820279,0.809572,0.681568
4,Wavenet,0.754698,1.0,0.754698,0.333333,1.0
