In [1]:
# This cell is included to show what libraries are imported and used in the project
import matplotlib.pyplot as plt
import numpy as np
import random
import sys
import math
import pickle
import timeit

from glob import glob
from scipy.linalg import expm
import bisect

from sklearn.metrics import accuracy_score

import tensorflow as tf

import keras
from keras.models import Sequential, model_from_json, Model
from keras.layers import Activation, Dense, Dropout, Flatten, Lambda, GlobalAveragePooling1D
from keras.layers.convolutional import Conv1D, Conv2D, MaxPooling1D, MaxPooling2D
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.normalization import BatchNormalization
from keras.optimizers import SGD, Adam

%matplotlib inline

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# This cell is included to define various functions used for computations and analysis
def error_rate(y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    return 1-acc

## FCN

In [3]:
def FCN(num_features, num_classes):
    
    model = Sequential()
    
    model.add(Conv1D(filters=128, kernel_size=8, padding='valid', activation='linear',
                     strides=1, input_shape=(num_features,1)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv1D(filters=256, kernel_size=5, padding='valid', activation='linear',
                     strides=1))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv1D(filters=128, kernel_size=3, padding='valid', activation='linear',
                     strides=1))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(GlobalAveragePooling1D())

    model.add(Dense(num_classes))
    model.add(Activation('softmax'))
    
    return model

## MLP

In [4]:
def mlp_wang(num_features, num_classes):
    
    model = Sequential()
    
    model.add(Dense(num_features, input_shape=(num_features,)))
    model.add(Activation('relu'))
    
    model.add(Dropout(0.1))
    model.add(Dense(500))
    model.add(Activation('relu'))
    
    model.add(Dropout(0.2))
    model.add(Dense(500))
    model.add(Activation('relu'))
    
    model.add(Dropout(0.3))
    model.add(Dense(500))
    model.add(Activation('relu'))

    model.add(Dense(num_classes))
    model.add(Activation('softmax'))
    
    return model

## ResNet

In [5]:
def ResNet_wang(num_features, num_classes):
    
    model = Sequential()
    
    # Block 1
    model.add(Conv1D(filters=64, kernel_size=8, padding='valid', activation='linear',
                     strides=1, input_shape=(num_features,1)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv1D(filters=64, kernel_size=5, padding='valid', activation='linear',
                     strides=1))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv1D(filters=64, kernel_size=3, padding='valid', activation='linear',
                     strides=1))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    # Block 2
    model.add(Conv1D(filters=128, kernel_size=8, padding='valid', activation='linear',
                     strides=1, input_shape=(num_features,1)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv1D(filters=128, kernel_size=5, padding='valid', activation='linear',
                     strides=1))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv1D(filters=128, kernel_size=3, padding='valid', activation='linear',
                     strides=1))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    # Block 3
    model.add(Conv1D(filters=128, kernel_size=8, padding='valid', activation='linear',
                     strides=1, input_shape=(num_features,1)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv1D(filters=128, kernel_size=5, padding='valid', activation='linear',
                     strides=1))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    model.add(Conv1D(filters=128, kernel_size=3, padding='valid', activation='linear',
                     strides=1))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    # Pooling
    model.add(GlobalAveragePooling1D())

    # Softmax
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))
    
    return model

## NeuralNetwork-DatAug (All data sets)

In [30]:
# Names of all data sets in UCR Archive
PATH = 'UCR_TS_Archive_2015/'
data_sets = []

for folder_PATH in glob(PATH+'*/'):
    
    ds = folder_PATH.split("/")[-2]
    data_sets.append(ds)
    
data_sets = np.sort(data_sets)

In [31]:
# Use only augmented data sets
ds_aug_list = []
for folder_PATH in glob('Augmented_data_sets/'+'*'):
    ds_aug = folder_PATH.split("/")[-1]
    ds = ds_aug.split("_")[:-1]
    ds_aug_list.append('_'.join(ds))

In [32]:
ds_targ = ['HandOutlines']

In [33]:
table_name = 'FCN-DatAug_performance'
continue_run = True
continue_ds = True

if continue_run:
    with open(table_name, 'rb') as f:
        perf_table_net = pickle.load(f)
    ds_idx = np.where( data_sets == perf_table_net[-1,0] )[0][0] + 2
else:
    perf_table_net = np.array(['Data set', '1NN-ED', '1NN-DTW',
                               'MLP', 'FCN', 'FCN-DatAug', 'Run time FCN-DatAug'])
    ds_idx = 0
    
ds_done = []
if continue_ds:
    ds_done = perf_table_net[:,0]
    ds_idx = 0
    #ds_done = np.concatenate((ds_done,np.array(['CinC_ECG_torso','HandOutlines']))) # Out Of Memory errors

In [34]:
# Read previous performance results on UCR Archive
UCR_results = {}

lines = [line.rstrip('\n') for line in open('UCR_results.txt')]

for line in lines:
    ds,nn_ed,nn_dtw,mlp,fcn,resnet,cote,shape_dtw = line.split(",")
    UCR_results[ds] = ([nn_ed,nn_dtw,mlp,fcn,resnet,cote,shape_dtw])

In [35]:
for ds in ds_targ:
    
    print('\n################################# ' + ds + ' #################################')
    
    if ds in ds_done:
        continue
    
    if ds not in ds_aug_list:
        continue
    
    perf_table_line = np.array([ds, UCR_results[ds][0], UCR_results[ds][1], UCR_results[ds][2], UCR_results[ds][3]])
    
    
    # Test set
    with open(PATH + ds + str('/') + ds + '_TEST', 'r') as f:
        
        test = f.read().splitlines()
        data_set_test = np.array([test[0].split(",")])
        
        for line in test[1:]:
            data_set_test = np.append(data_set_test, [line.split(",")], axis=0)
            
    # Augmented training set
    with open('Augmented_data_sets/' + ds + '_augmented', 'rb') as f:
        augmented_data_set = pickle.load(f)
    
    # Remove NanNs
    augmented_data_set = augmented_data_set[~np.isnan(augmented_data_set).any(axis=1)]

    print('Length of augmented training set: ' + str(len(augmented_data_set)))
    print(augmented_data_set)
    

    
    # --------------------------------------------------------------------------------------------



    # Set up training and test set
    train_size_aug = len(augmented_data_set)
    test_size = len(data_set_test)
    ts_length = len(data_set_test[0])-1

    X_train_aug = np.zeros((train_size_aug, ts_length))
    y_train_aug = np.zeros(train_size_aug)

    X_test = np.zeros((test_size, ts_length))
    y_test = np.zeros(test_size)

    for i in range(ts_length+1):
        # Test
        for j in range(test_size):
            if i == 0:
                y_test[j] = int(data_set_test[j][0])
            else:
                X_test[j][i-1] = float(data_set_test[j][i])
        # Train
        for j in range(train_size_aug):
            if i == 0:
                y_train_aug[j] = int(augmented_data_set[j][0])
            else:
                X_train_aug[j][i-1] = float(augmented_data_set[j][i])

    # Make sure the labels are integers
    y_test = y_test.astype(int)
    y_train_aug = y_train_aug.astype(int)

    # Make sure the labels are zero indexed
    num_classes = len(np.unique(y_test))

    idx = 0
    for label in np.unique(y_test):
        y_test[np.where( y_test == label )] = idx
        idx += 1

    # Convert labels to one-hot encoding
    y_train_aug_onehot = np.zeros((train_size_aug, num_classes))
    y_train_aug_onehot[np.arange(train_size_aug), y_train_aug] = 1



    # --------------------------------------------------------------------------------------------


    
    # Reset tensorflow graph
    tf.reset_default_graph()
    sess = tf.InteractiveSession()

    # Setup model
    model = FCN(ts_length, num_classes)
    
    # Set up data for Tensorflow model
    X_train = np.reshape(X_train_aug,(X_train_aug.shape[0],X_train_aug.shape[1],1))
    y_train_onehot = np.reshape(y_train_aug_onehot,(train_size_aug, num_classes))
    
    num_outer_batches = 10
    X_batches = np.split(X_train, num_outer_batches)
    y_batches = np.split(y_train_onehot, num_outer_batches)

    # Optimizers
    sgd = SGD(lr=0.001, decay=1e-6)
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.01)

    # Train the model
    batch_size = 500
    epochs = 100
    validation_split = 0.3
    
    
    
    start_time = timeit.default_timer()

    model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
    
    model.fit(X_train, y_train_onehot,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_split=validation_split,
                    shuffle=True,
                    verbose=1)
    
    #for batch_idx in range(num_outer_batches):
    #    print('########################################## batch_idx ##########################################')
    #    model.fit(X_batches[batch_idx], y_batches[batch_idx],
    #                batch_size=batch_size,
    #                epochs=epochs,
    #                validation_split=validation_split,
    #                shuffle=True,
    #                verbose=1)
    
    

    # Evaluate the model with test data
    X_test = np.reshape(X_test,(X_test.shape[0],X_test.shape[1],1))
    class_probs = model.predict(X_test)
    y_pred = np.argmax(class_probs, axis=1)

    acc = accuracy_score(y_test, y_pred)
    err_rate = error_rate(y_test, y_pred)
    
    elapsed = timeit.default_timer() - start_time

    print('\nTime (with data augmentation): ' + str(elapsed))
    print('Classification accuracy: ' + str(acc))
    print('Error rate: ' + str(err_rate))
    


    # --------------------------------------------------------------------------------------------


    
    # Append and save table
    perf_table_line = np.concatenate((perf_table_line,np.array([err_rate, elapsed])))
    perf_table_net = np.row_stack((perf_table_net,perf_table_line))

    with open(table_name, 'wb') as f:
        pickle.dump(perf_table_net, f)
        


################################# HandOutlines #################################


In [None]:
# Sort and save performance table
sorted_idx = np.argsort(perf_table_net[:,0])
perf_table_net = perf_table_net[sorted_idx]

with open(table_name, 'wb') as f:
    pickle.dump(perf_table_net, f)

In [None]:
with open(table_name, 'rb') as f:
    perf_table_net = pickle.load(f)
    
print(perf_table_net)

In [19]:
win = 0
lose = 0
for line in perf_table_net[1:]:
    (ds,nn_ed,nn_dtw,mlp,fcn,fcn_dataug,time) = line
    
    if ds == 'Data set':
        continue
    
    win = 'nn_ed'
    win_val = nn_ed
    if float(nn_dtw) <= float(win_val):
        win = 'nn_dtw'
        win_val = nn_dtw
    if float(mlp) <= float(win_val):
        win = 'mlp'
        win_val = mlp
    if float(fcn) <= float(win_val):
        win = 'fcn'
        win_val = fcn
    if float(fcn_dataug) <= float(win_val):
        win = 'fcn_dataug'
        win_val = fcn_dataug
        
    print(win)

fcn
fcn
mlp
fcn
fcn
fcn
fcn
mlp
fcn_dataug
fcn
fcn
fcn
fcn
nn_dtw
fcn_dataug
fcn
fcn
mlp
fcn
fcn
fcn
fcn_dataug
fcn
fcn
fcn
fcn
fcn
fcn
fcn_dataug
fcn
fcn
fcn
fcn
mlp
fcn
fcn
nn_dtw
fcn
fcn
fcn
fcn
fcn
fcn
fcn
fcn
fcn
fcn
fcn
nn_ed
mlp
fcn
fcn_dataug
fcn_dataug
fcn_dataug
fcn
fcn
fcn
fcn_dataug
fcn
fcn
fcn
fcn
fcn
fcn
fcn
fcn_dataug
fcn
fcn
fcn
fcn
nn_dtw
mlp
fcn
nn_dtw
fcn
fcn
nn_dtw
mlp
fcn
fcn
fcn
mlp


### MLP DatAug

In [14]:
# Names of all data sets in UCR Archive
PATH = 'UCR_TS_Archive_2015/'
data_sets = []

for folder_PATH in glob(PATH+'*/'):
    
    ds = folder_PATH.split("/")[-2]
    data_sets.append(ds)
    
data_sets = np.sort(data_sets)

In [15]:
# Use only augmented data sets
ds_aug_list = []
for folder_PATH in glob('Augmented_data_sets/'+'*'):
    ds_aug = folder_PATH.split("/")[-1]
    ds = ds_aug.split("_")[:-1]
    ds_aug_list.append('_'.join(ds))

In [7]:
table_name = 'MLP-DatAug_performance'
continue_run = True
continue_ds = True

if continue_run:
    with open(table_name, 'rb') as f:
        perf_table_net = pickle.load(f)
    ds_idx = np.where( data_sets == perf_table_net[-1,0] )[0][0] + 2
else:
    perf_table_net = np.array(['Data set', '1NN-ED', '1NN-DTW',
                               'MLP', 'FCN', 'MLP-DatAug', 'Run time MLP-DatAug'])
    ds_idx = 0
    
ds_done = []
if continue_ds:
    ds_done = perf_table_net[:,0]
    ds_idx = 0
    #ds_done = np.concatenate((ds_done,np.array(['CinC_ECG_torso','HandOutlines']))) # Out Of Memory errors

In [16]:
ds_targ = ['HandOutlines']

In [18]:
# Read previous performance results on UCR Archive
UCR_results = {}

lines = [line.rstrip('\n') for line in open('UCR_results.txt')]

for line in lines:
    ds,nn_ed,nn_dtw,mlp,fcn,resnet,cote,shape_dtw = line.split(",")
    UCR_results[ds] = ([nn_ed,nn_dtw,mlp,fcn,resnet,cote,shape_dtw])

In [19]:
for ds in ds_targ:
    
    print('\n################################# ' + ds + ' #################################')
    
    if ds in ds_done:
        continue
    
    if ds not in ds_aug_list:
        continue
    
    perf_table_line = np.array([ds, UCR_results[ds][0], UCR_results[ds][1], UCR_results[ds][2], UCR_results[ds][3]])
    
    
    # Test set
    with open(PATH + ds + str('/') + ds + '_TEST', 'r') as f:
        
        test = f.read().splitlines()
        data_set_test = np.array([test[0].split(",")])
        
        for line in test[1:]:
            data_set_test = np.append(data_set_test, [line.split(",")], axis=0)
            
    # Augmented training set
    with open('Augmented_data_sets/' + ds + '_augmented', 'rb') as f:
        augmented_data_set = pickle.load(f)
    
    # Remove NanNs
    augmented_data_set = augmented_data_set[~np.isnan(augmented_data_set).any(axis=1)]

    print('Length of augmented training set: ' + str(len(augmented_data_set)))
    print(augmented_data_set)
    

    
    # --------------------------------------------------------------------------------------------



    # Set up training and test set
    train_size_aug = len(augmented_data_set)
    test_size = len(data_set_test)
    ts_length = len(data_set_test[0])-1

    X_train_aug = np.zeros((train_size_aug, ts_length))
    y_train_aug = np.zeros(train_size_aug)

    X_test = np.zeros((test_size, ts_length))
    y_test = np.zeros(test_size)

    for i in range(ts_length+1):
        # Test
        for j in range(test_size):
            if i == 0:
                y_test[j] = int(data_set_test[j][0])
            else:
                X_test[j][i-1] = float(data_set_test[j][i])
        # Train
        for j in range(train_size_aug):
            if i == 0:
                y_train_aug[j] = int(augmented_data_set[j][0])
            else:
                X_train_aug[j][i-1] = float(augmented_data_set[j][i])

    # Make sure the labels are integers
    y_test = y_test.astype(int)
    y_train_aug = y_train_aug.astype(int)

    # Make sure the labels are zero indexed
    num_classes = len(np.unique(y_test))

    idx = 0
    for label in np.unique(y_test):
        y_test[np.where( y_test == label )] = idx
        idx += 1

    # Convert labels to one-hot encoding
    y_train_aug_onehot = np.zeros((train_size_aug, num_classes))
    y_train_aug_onehot[np.arange(train_size_aug), y_train_aug] = 1



    # --------------------------------------------------------------------------------------------


    
    # Reset tensorflow graph
    tf.reset_default_graph()
    sess = tf.InteractiveSession()

    # Setup model
    model = mlp_wang(ts_length, num_classes)
    
    
    # Set up data for Tensorflow model
    X_train = np.reshape(X_train_aug,(X_train_aug.shape[0],X_train_aug.shape[1]))
    y_train_onehot = np.reshape(y_train_aug_onehot,(train_size_aug, num_classes))

    # Optimizers
    sgd = SGD(lr=0.001, decay=1e-6)
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.01)

    # Train the model
    batch_size = 1000
    epochs = 100
    validation_split = 0.3
    
    
    
    start_time = timeit.default_timer()

    model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
    model.fit(X_train, y_train_onehot,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_split=validation_split,
                    shuffle=True,
                    verbose=1)
    

    # Evaluate the model with test data
    X_test = np.reshape(X_test,(X_test.shape[0],X_test.shape[1]))
    class_probs = model.predict(X_test)
    y_pred = np.argmax(class_probs, axis=1)

    acc = accuracy_score(y_test, y_pred)
    err_rate = error_rate(y_test, y_pred)
    
    elapsed = timeit.default_timer() - start_time

    print('\nTime (with data augmentation): ' + str(elapsed))
    print('Classification accuracy: ' + str(acc))
    print('Error rate: ' + str(err_rate))
    


    # --------------------------------------------------------------------------------------------


    
    # Append and save table
    perf_table_line = np.concatenate((perf_table_line,np.array([err_rate, elapsed])))
    perf_table_net = np.row_stack((perf_table_net,perf_table_line))

    with open(table_name, 'wb') as f:
        pickle.dump(perf_table_net, f)


################################# HandOutlines #################################
Length of augmented training set: 99000
[[ 1.         -2.3312     -2.3312     ... -2.3247     -2.3291
  -2.3312    ]
 [ 1.         -2.3374     -2.3374     ... -2.3374     -2.3374
  -2.3374    ]
 [ 0.         -2.3414     -2.3436     ... -2.3414     -2.3414
  -2.3414    ]
 ...
 [ 0.         -2.2305     -2.23214577 ... -2.2305     -2.2305
  -2.2305    ]
 [ 1.         -2.2761     -2.2761     ... -2.2761     -2.2761
  -2.2761    ]
 [ 1.         -2.1993     -2.1993     ... -2.19747986 -2.1993
  -2.1993    ]]




Train on 69300 samples, validate on 29700 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/1

In [21]:
# Sort and save performance table
sorted_idx = np.argsort(perf_table_net[:,0])
perf_table_net = perf_table_net[sorted_idx]

with open(table_name, 'wb') as f:
    pickle.dump(perf_table_net, f)

In [22]:
with open(table_name, 'rb') as f:
    perf_table_net = pickle.load(f)
    
print(perf_table_net)

[['50words' '0.369' '0.310' '0.288' '0.321' '0.9076923076923077'
  '5.596147200005362']
 ['Adiac' '0.389' '0.396' '0.248' '0.143' '0.5038363171355499'
  '694.2159890230032']
 ['ArrowHead' '0.200' '0.297' '0.177' '0.120' '0.1657142857142857'
  '492.504771271997']
 ['Beef' '0.333' '0.367' '0.167' '0.25' '0.30000000000000004'
  '1781.552806110005']
 ['BeetleFly' '0.250' '0.300' '0.150' '0.050' '0.09999999999999998'
  '1949.7589633369935']
 ['BirdChicken' '0.450' '0.250' '0.200' '0.050' '0.09999999999999998'
  '1946.7888806190022']
 ['CBF' '0.148' '0.003' '0.14' '0' '0.01777777777777778'
  '501.71618851700623']
 ['Car' '0.267' '0.267' '0.167' '0.083' '0.19999999999999996'
  '2224.4934930699965']
 ['ChlorineConcentration' '0.35' '0.352' '0.128' '0.157'
  '0.4424479166666667' '626.867258519007']
 ['Coffee' '0.000' '0.000' '0' '0' '0.0' '1077.7104566500057']
 ['Computers' '0.424' '0.300' '0.460' '0.152' '0.26' '2780.880527966001']
 ['Cricket_X' '0.423' '0.246' '0.431' '0.185' '0.3461538461538

## FCN and MLP (No DatAug)

In [5]:
# Names of all data sets in UCR Archive
PATH = 'UCR_TS_Archive_2015/'
data_sets = []

for folder_PATH in glob(PATH+'*/'):
    
    ds = folder_PATH.split("/")[-2]
    data_sets.append(ds)
    
data_sets = np.sort(data_sets)

In [6]:
# Use only augmented data sets
ds_aug_list = [] # '50words'
for folder_PATH in glob('Augmented_data_sets/'+'*'):
    ds_aug = folder_PATH.split("/")[-1]
    ds = ds_aug.split("_")[:-1]
    ds_aug_list.append('_'.join(ds))

In [7]:
table_name = 'MLP-FCN_performance'
continue_run = True
continue_ds = True

if continue_run:
    with open(table_name, 'rb') as f:
        perf_table_net = pickle.load(f)
    ds_idx = np.where( data_sets == perf_table_net[-1,0] )[0][0] + 2
else:
    perf_table_net = np.array(['Data set', '1NN-ED', '1NN-DTW',
                               'FCN', 'Run time FCN', 'MLP', 'Run time MLP'])
    ds_idx = 0
    
if continue_ds:
    ds_done = perf_table_net[:,0]
    ds_idx = 0

In [8]:
# Read previous performance results on UCR Archive
UCR_results = {}

lines = [line.rstrip('\n') for line in open('UCR_results.txt')]

for line in lines:
    ds,nn_ed,nn_dtw,mlp,fcn,resnet,cote = line.split(",")
    UCR_results[ds] = ([nn_ed,nn_dtw,mlp,fcn,resnet,cote])

In [9]:
for ds in data_sets[ds_idx:]:
    
    print('\n################################# ' + ds + ' #################################')
    
    if ds in ds_done:
        continue
    
    if ds not in ds_aug_list:
        continue
    
    perf_table_line = np.array([ds, UCR_results[ds][0], UCR_results[ds][1]])
    
    # Training set
    with open(PATH + ds + str('/') + ds + '_TRAIN', 'r') as f:
        
        train = f.read().splitlines()
        data_set_train = np.array([train[0].split(",")])
        
        for line in train[1:]:
            data_set_train = np.append(data_set_train, [line.split(",")], axis=0)
    
    # Test set
    with open(PATH + ds + str('/') + ds + '_TEST', 'r') as f:
        
        test = f.read().splitlines()
        data_set_test = np.array([test[0].split(",")])
        
        for line in test[1:]:
            data_set_test = np.append(data_set_test, [line.split(",")], axis=0)
    
    # --------------------------------------------------------------------------------------------



    # Set up training and test set
    train_size = len(data_set_train)
    test_size = len(data_set_test)
    ts_length = len(data_set_test[0])-1

    X_train = np.zeros((train_size, ts_length))
    y_train = np.zeros(train_size)

    X_test = np.zeros((test_size, ts_length))
    y_test = np.zeros(test_size)

    for i in range(ts_length+1):
        # Test
        for j in range(test_size):
            if i == 0:
                y_test[j] = int(data_set_test[j][0])
            else:
                X_test[j][i-1] = float(data_set_test[j][i])
        # Train
        for j in range(train_size):
            if i == 0:
                y_train[j] = int(data_set_train[j][0])
            else:
                X_train[j][i-1] = float(data_set_train[j][i])

    # Make sure the labels are integers
    y_test = y_test.astype(int)
    y_train = y_train.astype(int)

    # Make sure the labels are zero indexed
    num_classes = len(np.unique(y_test))

    idx = 0
    for label in np.unique(y_test):
        y_train[np.where( y_train == label )] = idx
        y_test[np.where( y_test == label )] = idx
        idx += 1

    # Convert labels to one-hot encoding
    y_train_onehot = np.zeros((train_size, num_classes))
    y_train_onehot[np.arange(train_size), y_train] = 1



    # --------------------------------------------------------------------------------------------


    # ---------------------------------- FCN ----------------------------------
    # Reset tensorflow graph
    tf.reset_default_graph()
    sess = tf.InteractiveSession()

    # Setup model
    model = FCN(ts_length, num_classes)
    
    # Set up data for Tensorflow model
    X_train = np.reshape(X_train,(X_train.shape[0],X_train.shape[1],1))
    y_train_onehot = np.reshape(y_train_onehot,(train_size, num_classes))

    # Optimizers
    sgd = SGD(lr=0.001, decay=1e-6)
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.01)

    # Train the model
    batch_size = 10
    epochs = 100
    validation_split = 0.3
    
    
    
    start_time = timeit.default_timer()

    model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
    model.fit(X_train, y_train_onehot,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_split=validation_split,
                    shuffle=True,
                    verbose=0)
    

    # Evaluate the model with test data
    X_test = np.reshape(X_test,(X_test.shape[0],X_test.shape[1],1))
    class_probs = model.predict(X_test)
    y_pred = np.argmax(class_probs, axis=1)

    acc = accuracy_score(y_test, y_pred)
    err_rate = error_rate(y_test, y_pred)
    
    elapsed = timeit.default_timer() - start_time

    print('\nTime (FCN): ' + str(elapsed))
    print('Classification accuracy: ' + str(acc))
    print('Error rate: ' + str(err_rate) + '\n')
    
    # Append and save table
    perf_table_line = np.concatenate((perf_table_line,np.array([err_rate, elapsed])))
    
    
    
    
    # ---------------------------------- MLP ----------------------------------
    # Reset tensorflow graph
    tf.reset_default_graph()
    sess = tf.InteractiveSession()

    # Setup model
    model = mlp_wang(ts_length, num_classes)
    
    
    # Set up data for Tensorflow model
    X_train = np.reshape(X_train,(X_train.shape[0],X_train.shape[1]))
    y_train_onehot = np.reshape(y_train_onehot,(train_size, num_classes))

    # Optimizers
    sgd = SGD(lr=0.001, decay=1e-6)
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.01)

    # Train the model
    batch_size = 10
    epochs = 100
    validation_split = 0.3
    
    
    start_time = timeit.default_timer()

    model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
    model.fit(X_train, y_train_onehot,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_split=validation_split,
                    shuffle=True,
                    verbose=0)
    

    # Evaluate the model with test data
    X_test = np.reshape(X_test,(X_test.shape[0],X_test.shape[1]))
    class_probs = model.predict(X_test)
    y_pred = np.argmax(class_probs, axis=1)

    acc = accuracy_score(y_test, y_pred)
    err_rate = error_rate(y_test, y_pred)
    
    elapsed = timeit.default_timer() - start_time

    print('\nTime (MLP): ' + str(elapsed))
    print('Classification accuracy: ' + str(acc))
    print('Error rate: ' + str(err_rate))
    
    
    # Append and save table
    perf_table_line = np.concatenate((perf_table_line,np.array([err_rate, elapsed])))
    perf_table_net = np.row_stack((perf_table_net,perf_table_line))

    with open(table_name, 'wb') as f:
        pickle.dump(perf_table_net, f)


################################# 50words #################################

################################# Adiac #################################

################################# ArrowHead #################################

################################# Beef #################################

################################# BeetleFly #################################

################################# BirdChicken #################################

################################# CBF #################################

################################# Car #################################

################################# ChlorineConcentration #################################

################################# CinC_ECG_torso #################################

################################# Coffee #################################

################################# Computers #################################

################################# Cricket_X ##############




Time (MLP): 104.67743703350425
Classification accuracy: 0.9438860971524288
Error rate: 0.05611390284757123

################################# Wine #################################

################################# WordsSynonyms #################################

################################# Worms #################################

Time (FCN): 68.61305598262697
Classification accuracy: 0.5027624309392266
Error rate: 0.49723756906077343


Time (MLP): 10.164013951085508
Classification accuracy: 0.3701657458563536
Error rate: 0.6298342541436464

################################# WormsTwoClass #################################

Time (FCN): 68.5181339615956
Classification accuracy: 0.5303867403314917
Error rate: 0.4696132596685083


Time (MLP): 10.191812179982662
Classification accuracy: 0.5082872928176796
Error rate: 0.4917127071823204

################################# synthetic_control #################################

################################# uWaveGestureLibrary_X #####

In [10]:
# Sort and save performance table
sorted_idx = np.argsort(perf_table_net[:,0])
perf_table_net = perf_table_net[sorted_idx]

with open(table_name, 'wb') as f:
    pickle.dump(perf_table_net, f)

In [11]:
with open(table_name, 'rb') as f:
    perf_table_net = pickle.load(f)
    
print(perf_table_net)

[['50words' '0.369' '0.310' '0.621978021978022' '16.156557979993522'
  '0.3582417582417582' '8.349115027987864']
 ['Adiac' '0.389' '0.396' '0.5063938618925832' '80.85867945943028'
  '0.6163682864450128' '20.206664296798408']
 ['ArrowHead' '0.200' '0.297' '0.6971428571428571' '2.0730466549866833'
  '0.23428571428571432' '1.1012845349905547']
 ['Beef' '0.333' '0.367' '0.8' '2.160927275021095' '0.5333333333333333'
  '1.0363243660249282']
 ['BeetleFly' '0.250' '0.300' '0.5' '1.6740828149777371' '0.35'
  '0.823900727002183']
 ['BirdChicken' '0.450' '0.250' '0.5' '1.6738065860117786' '0.4'
  '0.8248766429896932']
 ['CBF' '0.148' '0.003' '0.012222222222222245' '6.720642405562103'
  '0.1333333333333333' '2.505319164134562']
 ['Car' '0.267' '0.267' '0.7666666666666666' '3.5472062949847896'
  '0.16666666666666663' '1.5816666040045675']
 ['ChlorineConcentration' '0.35' '0.352' '0.4854166666666667'
  '15.830903065012535' '0.36197916666666663' '8.672713913023472']
 ['CinC_ECG_torso' '0.103' '0.349'

## Deep Neural Network

In [None]:
ds_targ = ['InsectWingbeatSound', 'Phoneme']

In [22]:
# Names of all data sets in UCR Archive
PATH = 'UCR_TS_Archive_2015/'
data_sets = []

for folder_PATH in glob(PATH+'*/'):
    
    ds = folder_PATH.split("/")[-2]
    data_sets.append(ds)
    
data_sets = np.sort(data_sets)

In [23]:
# Use only augmented data sets
ds_aug_list = []
for folder_PATH in glob('Augmented_data_sets/'+'*'):
    ds_aug = folder_PATH.split("/")[-1]
    ds = ds_aug.split("_")[:-1]
    ds_aug_list.append('_'.join(ds))

In [24]:
table_name = 'ResNet-DatAug_performance'
continue_run = True
continue_ds = True

if continue_run:
    with open(table_name, 'rb') as f:
        perf_table_net = pickle.load(f)
    ds_idx = np.where( data_sets == perf_table_net[-1,0] )[0][0] + 2
else:
    perf_table_net = np.array(['Data set', '1NN-ED', '1NN-DTW',
                               'MLP', 'FCN', 'ResNet-DatAug', 'Run time ResNet-DatAug'])
    ds_idx = 0
    
ds_done = []
if continue_ds:
    ds_done = perf_table_net[:,0]
    ds_idx = 0
    ds_done = np.concatenate((ds_done,np.array(['CinC_ECG_torso','HandOutlines']))) # Out Of Memory errors

In [25]:
# Read previous performance results on UCR Archive
UCR_results = {}

lines = [line.rstrip('\n') for line in open('UCR_results.txt')]

for line in lines:
    ds,nn_ed,nn_dtw,mlp,fcn,resnet,cote,shape_dtw = line.split(",")
    UCR_results[ds] = ([nn_ed,nn_dtw,mlp,fcn,resnet,cote,shape_dtw])

In [26]:
for ds in data_sets[ds_idx:]:
    
    print('\n################################# ' + ds + ' #################################')
    
    if ds in ds_done:
        continue
    
    if ds not in ds_aug_list:
        continue
    
    perf_table_line = np.array([ds, UCR_results[ds][0], UCR_results[ds][1], UCR_results[ds][2], UCR_results[ds][3]])
    
    
    # Test set
    with open(PATH + ds + str('/') + ds + '_TEST', 'r') as f:
        
        test = f.read().splitlines()
        data_set_test = np.array([test[0].split(",")])
        
        for line in test[1:]:
            data_set_test = np.append(data_set_test, [line.split(",")], axis=0)
            
    # Augmented training set
    with open('Augmented_data_sets/' + ds + '_augmented', 'rb') as f:
        augmented_data_set = pickle.load(f)
    
    # Remove NanNs
    augmented_data_set = augmented_data_set[~np.isnan(augmented_data_set).any(axis=1)]

    print('Length of augmented training set: ' + str(len(augmented_data_set)))
    print(augmented_data_set)
    

    
    # --------------------------------------------------------------------------------------------



    # Set up training and test set
    train_size_aug = len(augmented_data_set)
    test_size = len(data_set_test)
    ts_length = len(data_set_test[0])-1

    X_train_aug = np.zeros((train_size_aug, ts_length))
    y_train_aug = np.zeros(train_size_aug)

    X_test = np.zeros((test_size, ts_length))
    y_test = np.zeros(test_size)

    for i in range(ts_length+1):
        # Test
        for j in range(test_size):
            if i == 0:
                y_test[j] = int(data_set_test[j][0])
            else:
                X_test[j][i-1] = float(data_set_test[j][i])
        # Train
        for j in range(train_size_aug):
            if i == 0:
                y_train_aug[j] = int(augmented_data_set[j][0])
            else:
                X_train_aug[j][i-1] = float(augmented_data_set[j][i])

    # Make sure the labels are integers
    y_test = y_test.astype(int)
    y_train_aug = y_train_aug.astype(int)

    # Make sure the labels are zero indexed
    num_classes = len(np.unique(y_test))

    idx = 0
    for label in np.unique(y_test):
        y_test[np.where( y_test == label )] = idx
        idx += 1

    # Convert labels to one-hot encoding
    y_train_aug_onehot = np.zeros((train_size_aug, num_classes))
    y_train_aug_onehot[np.arange(train_size_aug), y_train_aug] = 1



    # --------------------------------------------------------------------------------------------


    
    # Reset tensorflow graph
    tf.reset_default_graph()
    sess = tf.InteractiveSession()

    # Setup model
    model = ResNet_wang(ts_length, num_classes)
    
    # Set up data for Tensorflow model
    X_train = np.reshape(X_train_aug,(X_train_aug.shape[0],X_train_aug.shape[1],1))
    y_train_onehot = np.reshape(y_train_aug_onehot,(train_size_aug, num_classes))

    # Optimizers
    sgd = SGD(lr=0.001, decay=1e-6)
    adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.01)

    # Train the model
    batch_size = 1000
    epochs = 100
    validation_split = 0.3
    
    
    
    start_time = timeit.default_timer()

    model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
    model.fit(X_train, y_train_onehot,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_split=validation_split,
                    shuffle=True,
                    verbose=1)
    

    # Evaluate the model with test data
    X_test = np.reshape(X_test,(X_test.shape[0],X_test.shape[1],1))
    class_probs = model.predict(X_test)
    y_pred = np.argmax(class_probs, axis=1)

    acc = accuracy_score(y_test, y_pred)
    err_rate = error_rate(y_test, y_pred)
    
    elapsed = timeit.default_timer() - start_time

    print('\nTime (with data augmentation): ' + str(elapsed))
    print('Classification accuracy: ' + str(acc))
    print('Error rate: ' + str(err_rate))
    


    # --------------------------------------------------------------------------------------------


    
    # Append and save table
    perf_table_line = np.concatenate((perf_table_line,np.array([err_rate, elapsed])))
    perf_table_net = np.row_stack((perf_table_net,perf_table_line))

    with open(table_name, 'wb') as f:
        pickle.dump(perf_table_net, f)


################################# 50words #################################

################################# Adiac #################################
Length of augmented training set: 99390
[[21.          1.598       1.5994     ...  1.5642      1.5709
   1.5929    ]
 [27.          1.7011      1.6706     ...  1.5197      1.6025
   1.6702    ]
 [20.          1.7223      1.6953     ...  1.6418      1.695
   1.7085    ]
 ...
 [35.          1.4228      1.3134764  ...  1.68613135  1.62563769
   1.5323    ]
 [13.          1.6397      1.63571585 ...  1.58464498  1.60932413
   1.6209    ]
 [11.          1.6057      1.50163977 ...  1.70765681  1.70366438
   1.6652    ]]




Train on 69573 samples, validate on 29817 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100

KeyboardInterrupt: 

In [None]:
# Sort and save performance table
sorted_idx = np.argsort(perf_table_net[:,0])
perf_table_net = perf_table_net[sorted_idx]

with open(table_name, 'wb') as f:
    pickle.dump(perf_table_net, f)

In [None]:
with open(table_name, 'rb') as f:
    perf_table_net = pickle.load(f)
    
print(perf_table_net)