In [42]:
import os

import numpy as np
import sklearn

from utils.utils import read_dataset
from utils.utils import create_directory

In [5]:
def fit_classifier():
    x_train = datasets_dict[dataset_name][0]
    y_train = datasets_dict[dataset_name][1]
    x_test = datasets_dict[dataset_name][2]
    y_test = datasets_dict[dataset_name][3]
    
    nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0)))
    #print('Number of Classes: %s' % nb_classes)
    
    #one-hot-encoding
    enc = sklearn.preprocessing.OneHotEncoder(categories='auto')
    enc.fit(np.concatenate((y_train, y_test), axis=0).reshape(-1, 1))
    y_train = enc.transform(y_train.reshape(-1, 1)).toarray()
    y_test = enc.transform(y_test.reshape(-1, 1)).toarray()
    
    # save orignal y because later we will use binary
    y_true = np.argmax(y_test, axis=1) #See if this is really needed later
    
    if len(x_train.shape) == 2: #if univariate, check to see if this may make things harder later on
        #adds dimension making it multivariate with one dimension
        x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
        x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))
        
    input_shape = x_train.shape
    print(input_shape)
    classifier = create_classifier(classifier_name, input_shape, nb_classes, output_dir, True)
    
    classifier.fit(x_train, y_train, x_test, y_test, y_true)
    

In [6]:
def create_classifier(classifier_name, input_shape, nb_classes, output_dir, verbose=False):
    if classifier_name == 'mlp':
        from models import mlp
        return mlp.Classifier_MLP(output_dir, input_shape, nb_classes, verbose)
    if classifier_name == 'lstmfcn':
        from models import lstmfcn
        return lstmfcn.Classifier_LSTMFCN(output_dir, input_shape, nb_classes,  verbose)
    if classifier_name == 'emn':
        return Classifier_EMN()

In [4]:
root_dir_win = 'C:/Users/worf9_000/Desktop/bs-thesis/experiments'
root_dir_arch = '/home/worf/Work/bs-thesis/experiments'

root_dir = os.getcwd()

dataset_name = 'ShapesAll'
classifier_name = 'emn'

datasets_dict = read_dataset(root_dir, dataset_name)

In [26]:
output_dir = root_dir + '/results/' + classifier_name + '/UCRArchive_2018/' + dataset_name + '/'

test_dir_df_metrics = output_dir + 'df_metrics.csv'

print('Method: ', dataset_name, classifier_name)

if os.path.exists(test_dir_df_metrics):
    print('Already done')
#else:
    
create_directory(output_dir)
datasets_dict = read_dataset(root_dir, dataset_name)
    
fit_classifier()
    
print('DONE')
    
create_directory(output_dir + '/DONE')

Method:  ShapesAll emn
(600, 512, 1)
[0.6, 0.7]
512
[307, 358]
600


ValueError: Input 0 of layer conv2d_5 is incompatible with the layer: : expected min_ndim=4, found ndim=2. Full shape received: [None, 32]

In [45]:
import logging
import tensorflow.keras as keras
import tensorflow as tf
tf.get_logger().setLevel(logging.ERROR)
import numpy as np

from layers.reservoir import Reservoir

from tqdm.keras import TqdmCallback
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_is_fitted

class Classifier_EMN(BaseEstimator, ClassifierMixin):
    def __init__(self, res_units=32, 
                 spectral_radius=0.9, input_scaling=0.1, 
                 connectivity=0.3, leaky=1, n_in=1,
                 epochs=500, batch_size=25, 
                 ratio=[0.1,0.2], num_filter=120,
                 verbose = True):
        self.res_units = res_units
        self.spectral_radius = spectral_radius
        self.input_scaling = input_scaling
        self.connectivity = connectivity
        self.leaky = leaky
        self.n_in = n_in
        self.epochs = epochs
        self.batch_size = batch_size
        self.ratio = ratio
        self.num_filter = num_filter
        
        self.verbose = verbose
        
        
        
    def build_model(self, input_shape, nb_classes, len_series):
        nb_rows = [np.int(self.ratio[0]*len_series),np.int(self.ratio[1]*len_series)]
        nb_cols = input_shape[2]
        
        input_layer = keras.layers.Input(input_shape)
        
        x_layer_1 = keras.layers.Conv2D(self.num_filter, (nb_rows[0], nb_cols), kernel_initializer='lecun_uniform', activation='relu',
                                       padding='valid', strides=(1,1), data_format = 'channels_last')(input_layer)
        x_layer_1 = keras.layers.GlobalMaxPooling2D(data_format = 'channels_first')(x_layer_1)
        
        
        
        y_layer_1 = keras.layers.Conv2D(self.num_filter, (nb_rows[1], nb_cols), kernel_initializer='lecun_uniform', activation='relu',
                                       padding='valid', strides=(1,1), data_format = 'channels_last')(input_layer)
        y_layer_1 = keras.layers.GlobalMaxPooling2D(data_format = 'channels_last')(y_layer_1)
        
        
        
        concat_layer = keras.layers.concatenate([x_layer_1, y_layer_1])
        concat_layer = keras.layers.Dropout(0.25)(concat_layer)
        
        output_layer = keras.layers.Dense(nb_classes, kernel_initializer='lecun_uniform', activation='softmax')(concat_layer)
        
        model = keras.models.Model(input_layer, output_layer)
        
        model.compile(loss='categorical_crossentropy', optimizer = tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'])
        
        self.callbacks_ = []
        
        if self.verbose:
            self.callbacks_.append(TqdmCallback(verbose=0))
        
        
        
        return model
        
    def reshape_shuffle(self, x_train, y_train, nb_samples, nb_classes, len_series):

        #Generate template for train data
        train_data = np.zeros((nb_samples, 1, len_series, self.res_units))
        train_labels = np.zeros((nb_samples, nb_classes))

        #Generate Shuffle template
        L_train = [x_train for x_train in range(nb_samples)] #Array with size==samples, every value==index 
        np.random.shuffle(L_train)
        
        #For every series -> shuffle train and labels
        for m in range(nb_samples):
            train_data[m,0,:,:] = x_train[L_train[m],:,:] 
            train_labels[m,:] = y_train[L_train[m],:]
        
        return train_data, train_labels      
        
    def fit(self, x, y):              
        self.escnn_ = Reservoir(self.res_units, self.n_in, 
                                self.input_scaling, self.spectral_radius, 
                                self.connectivity, self.leaky, verbose = self.verbose)
        x = self.escnn_.set_weights(x)

        nb_samples_x = np.shape(x)[0]
        len_series = x.shape[1]
        input_shape = (len_series, self.res_units, 1)
        nb_classes = len(np.unique(np.argmax(y,axis=1)))

        x, y = self.reshape_shuffle(x, y, nb_samples_x, nb_classes, len_series)

        # From NCHW to NHWC
        x = tf.transpose(x, [0, 2, 3, 1])
        
        self.model = self.build_model(input_shape, nb_classes, len_series)
        
        hist = self.model.fit(x, y, batch_size=self.batch_size, epochs=self.epochs,
            verbose=False, callbacks=self.callbacks_)

        keras.backend.clear_session()
        
        return self
    
    def predict(self, x):
        
        check_is_fitted(self)
        
        x = self.escnn_.set_weights(x)
        nb_samples_test = np.shape(x)[0]
        len_series = x.shape[1]
        x = np.reshape(x, (nb_samples_test, len_series, self.res_units, 1))
        
        y_pred = self.model.predict(x)
        y_pred = np.argmax(y_pred, axis=1)
        
        return y_pred
    
    def score(self, x, y):
        x = self.escnn_.set_weights(x)
        nb_samples_x = np.shape(x)[0]
        len_series = x.shape[1]
        x = np.reshape(x, (nb_samples_x, len_series, self.res_units, 1))
        
        outputs = self.model.evaluate(x, y, verbose=False)
        if not isinstance(outputs, list):
            outputs = [outputs]
        for name, output in zip(self.model.metrics_names, outputs):
            if name in ['accuracy', 'acc']:
                return output
        raise ValueError('The model is not configured to compute accuracy. '
                         'You should pass `metrics=["accuracy"]` to '
                         'the `model.compile()` method.')
        
        

In [47]:
root_dir = os.getcwd()
dataset_name = 'Coffee'
datasets_dict = read_dataset(root_dir, dataset_name)
x_train = datasets_dict[dataset_name][0]
y_train = datasets_dict[dataset_name][1]
x_test = datasets_dict[dataset_name][2]
y_test = datasets_dict[dataset_name][3]

#one-hot-encoding
enc = sklearn.preprocessing.OneHotEncoder(categories='auto')
enc.fit(np.concatenate((y_train, y_test), axis=0).reshape(-1, 1))
y_train = enc.transform(y_train.reshape(-1, 1)).toarray()
y_test = enc.transform(y_test.reshape(-1, 1)).toarray()

x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

In [52]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import TimeSeriesSplit

In [53]:
#tscv = TimeSeriesSplit(n_splits=3)

In [54]:
input_scaling = [0.1,1]
connectivity = [0.3,0.7]
param_grid_1 = dict(input_scaling = input_scaling, connectivity=connectivity)
emn_stage_1 = Classifier_EMN(verbose=False)
emn_stage_1.get_params()

{'batch_size': 25,
 'connectivity': 0.3,
 'epochs': 500,
 'input_scaling': 0.1,
 'leaky': 1,
 'n_in': 1,
 'num_filter': 120,
 'ratio': [0.1, 0.2],
 'res_units': 32,
 'spectral_radius': 0.9,
 'verbose': False}

In [60]:
grid_1 = GridSearchCV(estimator=emn_stage_1, param_grid=param_grid_1, cv=3, verbose=3)
grid_1_result = grid_1.fit(x_train, y_train)
print("Best: %f using %s" % (grid_1_result.best_score_, grid_1_result.best_params_))

Fitting 3 folds for each of 4 candidates, totalling 12 fits
[CV 1/3] END ............connectivity=0.3, input_scaling=0.1; total time=  11.3s
[CV 2/3] END ............connectivity=0.3, input_scaling=0.1; total time=  11.8s
[CV 3/3] END ............connectivity=0.3, input_scaling=0.1; total time=  12.0s
[CV 1/3] END ..............connectivity=0.3, input_scaling=1; total time=  11.4s
[CV 2/3] END ..............connectivity=0.3, input_scaling=1; total time=  11.9s
[CV 3/3] END ..............connectivity=0.3, input_scaling=1; total time=  12.1s
[CV 1/3] END ............connectivity=0.7, input_scaling=0.1; total time=  11.4s
[CV 2/3] END ............connectivity=0.7, input_scaling=0.1; total time=  12.0s
[CV 3/3] END ............connectivity=0.7, input_scaling=0.1; total time=  11.9s
[CV 1/3] END ..............connectivity=0.7, input_scaling=1; total time=  11.7s
[CV 2/3] END ..............connectivity=0.7, input_scaling=1; total time=  12.0s
[CV 3/3] END ..............connectivity=0.7, inpu

In [62]:
ratio = [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6], [0.7, 0.8]]
param_grid_2 = dict(ratio = ratio)
emn_stage_2 = grid_1_result.best_estimator_
grid_2 = GridSearchCV(estimator=emn_stage_2, param_grid=param_grid_2, cv=3, verbose=3)
grid_2.get_params()

{'cv': 3,
 'error_score': nan,
 'estimator__batch_size': 25,
 'estimator__connectivity': 0.7,
 'estimator__epochs': 500,
 'estimator__input_scaling': 0.1,
 'estimator__leaky': 1,
 'estimator__n_in': 1,
 'estimator__num_filter': 120,
 'estimator__ratio': [0.1, 0.2],
 'estimator__res_units': 32,
 'estimator__spectral_radius': 0.9,
 'estimator__verbose': False,
 'estimator': Classifier_EMN(connectivity=0.7, verbose=False),
 'n_jobs': None,
 'param_grid': {'ratio': [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6], [0.7, 0.8]]},
 'pre_dispatch': '2*n_jobs',
 'refit': True,
 'return_train_score': False,
 'scoring': None,
 'verbose': 3}

In [63]:
grid_2_result = grid_2.fit(x_train, y_train)
print("Best: %f using %s" % (grid_2_result.best_score_, grid_2_result.best_params_))

Fitting 3 folds for each of 4 candidates, totalling 12 fits
[CV 1/3] END ...............................ratio=[0.1, 0.2]; total time=  11.3s
[CV 2/3] END ...............................ratio=[0.1, 0.2]; total time=  11.6s
[CV 3/3] END ...............................ratio=[0.1, 0.2]; total time=  11.7s
[CV 1/3] END ...............................ratio=[0.3, 0.4]; total time=   9.6s
[CV 2/3] END ...............................ratio=[0.3, 0.4]; total time=  10.3s
[CV 3/3] END ...............................ratio=[0.3, 0.4]; total time=  10.1s
[CV 1/3] END ...............................ratio=[0.5, 0.6]; total time=   8.4s
[CV 2/3] END ...............................ratio=[0.5, 0.6]; total time=   8.6s
[CV 3/3] END ...............................ratio=[0.5, 0.6]; total time=   8.8s
[CV 1/3] END ...............................ratio=[0.7, 0.8]; total time=   5.9s
[CV 2/3] END ...............................ratio=[0.7, 0.8]; total time=   6.2s
[CV 3/3] END ...............................ratio

In [69]:
final = grid_2_result.best_estimator_
final.get_params()
final.fit(x_train, y_train)

Classifier_EMN(connectivity=0.7, ratio=[0.5, 0.6], verbose=False)

In [70]:
final.score(x_test,y_test)

1.0

In [71]:
final.get_params()

{'batch_size': 25,
 'connectivity': 0.7,
 'epochs': 500,
 'input_scaling': 0.1,
 'leaky': 1,
 'n_in': 1,
 'num_filter': 120,
 'ratio': [0.5, 0.6],
 'res_units': 32,
 'spectral_radius': 0.9,
 'verbose': False}