In [1]:
# import the required module
from utils.utils import generate_results_csv
from utils.utils import create_directory
from utils.utils import read_dataset
from utils.utils import get_func_length
from utils.utils import transform_to_same_length
from utils.utils import transform_mts_to_ucr_format

In [2]:
# import the required module
import os
import numpy as np
import pandas as pd
import sys
import sklearn
import utils
import classifiers
import datetime

from utils.constants import CLASSIFIERS
from utils.constants import ARCHIVE_NAMES
from utils.constants import ITERATIONS
from utils.utils import read_all_datasets

In [3]:
# Definition of fit function
def fit_classifier():
    x_train = datasets_dict[dataset_name][0]
    y_train = datasets_dict[dataset_name][1]
    x_test = datasets_dict[dataset_name][2]
    y_test = datasets_dict[dataset_name][3]

    nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0)))

    # transform the labels from integers to one hot vectors
    enc = sklearn.preprocessing.OneHotEncoder(categories='auto')
    enc.fit(np.concatenate((y_train, y_test), axis=0).reshape(-1, 1))
    y_train = enc.transform(y_train.reshape(-1, 1)).toarray()
    y_test = enc.transform(y_test.reshape(-1, 1)).toarray()

    # save orignal y because later we will use binary
    y_true = np.argmax(y_test, axis=1)

    if len(x_train.shape) == 2:  # if univariate
        # add a dimension to make it multivariate with one dimension 
        x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
        x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

    input_shape = x_train.shape[1:] 
    classifier = create_classifier(classifier_name, input_shape, nb_classes, output_directory)
    
    classifier.fit(x_train, y_train, x_test, y_test, y_true)

In [4]:
# Definition of classifier 
# We import 2 classifier - FCN and ResNet
def create_classifier(classifier_name, input_shape, nb_classes, output_directory, verbose=False):
    if classifier_name == 'fcn':
        from classifiers import fcn
        return fcn.Classifier_FCN(output_directory, input_shape, nb_classes, verbose)
    if classifier_name == 'resnet':
        from classifiers import resnet
        return resnet.Classifier_RESNET(output_directory, input_shape, nb_classes, verbose)


In [5]:
# import our dataset
root_dir = 'D:/FH_Dortmund/Projekt/Projektarbeit_2/time_series/dl-4-tsc-master/dataset'

# fixed length and no missing value for univariate dataset

In [8]:
# main Program for fixed length and no missing value
# set the archive_name and import the training dataset
# decide the classifier -- fcn or resnet
archive_name = "UCRArchive_2018"
dataset_name = 'ArrowHead'
classifier_name = 'fcn'
# classifier_name = 'resnet'
itr = '0'

# iter = 1 is fcn classifier, iter = 2 is resnet classifier
iter = 1

# read the training data
datasets_dict = read_all_datasets(root_dir, archive_name)


print('\t\titer', iter)
trr = ''

# create the file about the result
if iter != 0:
    trr = '_itr_' + str(iter)

    tmp_output_directory = root_dir + '/results/' + classifier_name + '/' + archive_name + trr + '/'

    print('\t\t\tdataset_name: ', dataset_name)

    output_directory = tmp_output_directory + dataset_name + '/'

    create_directory(output_directory)


# fit the data    
    fit_classifier()

    print('\t\t\t\tDONE')

        # the creation of this directory means
    create_directory(output_directory + '/DONE')

else:
    print('iter = ', iter)

		iter 1
			dataset_name:  ArrowHead
				DONE


# 4.2 vary length and missing value

In [6]:
# we definition the function read_dataset_original to read the original training data

def read_dataset_original(root_dir, archive_name, dataset_name):
    datasets_dict = {}
    cur_root_dir = root_dir.replace('-temp', '')

    if archive_name == 'UCRArchive_2018':
       
        # print all value 
        # np.set_printoptions(threshold=np.inf)
    
        root_dir_dataset = cur_root_dir + '/archives/' + archive_name + '/' + dataset_name + '/'
        df_train = pd.read_csv(root_dir_dataset + '/' + dataset_name + '_TRAIN.tsv', sep='\t', header=None)

        df_test = pd.read_csv(root_dir_dataset + '/' + dataset_name + '_TEST.tsv', sep='\t', header=None)

        y_train = df_train.values[:, 0]
        y_test = df_test.values[:, 0]

        x_train = df_train.drop(columns=[0])
        x_test = df_test.drop(columns=[0])

        x_train.columns = range(x_train.shape[1])
        x_test.columns = range(x_test.shape[1])

        x_train = x_train.values
        x_test = x_test.values
        
        
        print("original x_train:", x_train, '\n')

        # znorm
        std_ = x_train.std(axis=1, keepdims=True)
        std_[std_ == 0] = 1.0
        x_train = (x_train - x_train.mean(axis=1, keepdims=True)) / std_

        std_ = x_test.std(axis=1, keepdims=True)
        std_[std_ == 0] = 1.0
        x_test = (x_test - x_test.mean(axis=1, keepdims=True)) / std_
        
        print("z_norm x_train:", x_train, '\n')

        datasets_dict[dataset_name] = (x_train.copy(), y_train.copy(), x_test.copy(),
                                       y_test.copy())
    else:
        file_name = cur_root_dir + '/archives/' + archive_name + '/' + dataset_name + '/' + dataset_name
        x_train, y_train = readucr(file_name + '_TRAIN')
        x_test, y_test = readucr(file_name + '_TEST')
        datasets_dict[dataset_name] = (x_train.copy(), y_train.copy(), x_test.copy(),
                                       y_test.copy())

    return datasets_dict

In [7]:
# we definition the function read_dataset_nan to change the nan value 

def read_dataset_nan(root_dir, archive_name, dataset_name):
    datasets_dict = {}
    cur_root_dir = root_dir.replace('-temp', '')

    if archive_name == 'UCRArchive_2018':
        root_dir_dataset = cur_root_dir + '/archives/' + archive_name + '/' + dataset_name + '/'
        df_train = pd.read_csv(root_dir_dataset + '/' + dataset_name + '_TRAIN.tsv', sep='\t', header=None)

        df_test = pd.read_csv(root_dir_dataset + '/' + dataset_name + '_TEST.tsv', sep='\t', header=None)

        y_train = df_train.values[:, 0]
        y_test = df_test.values[:, 0]

        x_train = df_train.drop(columns=[0])
        x_test = df_test.drop(columns=[0])

        x_train.columns = range(x_train.shape[1])
        x_test.columns = range(x_test.shape[1])

        x_train = x_train.values
        x_test = x_test.values
        
        print("original x_train:", x_train, '\n')

        # nan -> 0
        
        x_train[np.isnan(x_train)] = 0
        x_test[np.isnan(x_test)] = 0
        
        print("nan to 0:", x_train, '\n')
        
        # znorm
        std_ = x_train.std(axis=1, keepdims=True)
        std_[std_ == 0] = 1.0
        x_train = (x_train - x_train.mean(axis=1, keepdims=True)) / std_

        std_ = x_test.std(axis=1, keepdims=True)
        std_[std_ == 0] = 1.0
        x_test = (x_test - x_test.mean(axis=1, keepdims=True)) / std_
        
        print("z_norm x_train:", x_train, '\n')
        
        datasets_dict[dataset_name] = (x_train.copy(), y_train.copy(), x_test.copy(),
                                       y_test.copy())

    return datasets_dict

## vary length 

In [8]:
# read the original data and change the nan value
# set the archive_name and import the training dataset

archive_name = "UCRArchive_2018"
dataset_name = 'AllGestureWiimoteX'
read_dataset_original(root_dir, archive_name, dataset_name)

archive_name = "UCRArchive_2018"
dataset_name = 'AllGestureWiimoteX'
read_dataset_nan(root_dir, archive_name, dataset_name)

original x_train: [[ 0.     0.038  0.038 ...    nan    nan    nan]
 [-0.962 -0.962 -0.962 ...    nan    nan    nan]
 [ 0.423  0.538  0.5   ...    nan    nan    nan]
 ...
 [ 0.038  0.     0.038 ...    nan    nan    nan]
 [-0.115 -0.231 -0.115 ...    nan    nan    nan]
 [ 0.154  0.115  0.115 ...    nan    nan    nan]] 

z_norm x_train: [[nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 ...
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]] 

original x_train: [[ 0.     0.038  0.038 ...    nan    nan    nan]
 [-0.962 -0.962 -0.962 ...    nan    nan    nan]
 [ 0.423  0.538  0.5   ...    nan    nan    nan]
 ...
 [ 0.038  0.     0.038 ...    nan    nan    nan]
 [-0.115 -0.231 -0.115 ...    nan    nan    nan]
 [ 0.154  0.115  0.115 ...    nan    nan    nan]] 

nan to 0: [[ 0.     0.038  0.038 ...  0.     0.     0.   ]
 [-0.962 -0.962 -0.962 ...  0.     0.     0.   ]
 [ 0.423  0.538  0.5   ...  0.     0.     

{'AllGestureWiimoteX': (array([[-0.44613952, -0.05304682, -0.05304682, ..., -0.44613952,
          -0.44613952, -0.44613952],
         [-1.76852885, -1.76852885, -1.76852885, ...,  0.63900164,
           0.63900164,  0.63900164],
         [ 4.73360396,  6.0952572 ,  5.64531961, ..., -0.27491187,
          -0.27491187, -0.27491187],
         ...,
         [ 0.45535108, -0.1643517 ,  0.45535108, ..., -0.1643517 ,
          -0.1643517 , -0.1643517 ],
         [-1.04867495, -2.62661162, -1.04867495, ...,  0.51565882,
           0.51565882,  0.51565882],
         [ 1.42675073,  0.98651402,  0.98651402, ..., -0.31161986,
          -0.31161986, -0.31161986]]),
  array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
          1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
          1.,  1.,  1.,  1.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,
          2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,  2.,
          2.,  2.,  2.,  2.,  2.,  2.,  2.

In [9]:
# main Program for vary length 
# set the archive_name and import the training dataset
# decide the classifier -- fcn or resnet

archive_name = "UCRArchive_2018"
dataset_name = 'AllGestureWiimoteX'  
classifier_name = 'fcn'
# classifier_name = 'resnet'
itr = '0'

# iter = 1 is fcn classifier, iter = 2 is resnet classifier
iter = 1

# the different point: we use the read_dataset_nan
datasets_dict = read_dataset_nan(root_dir, archive_name, dataset_name)


print('\t\titer', iter)
trr = ''
if iter != 0:
    trr = '_itr_' + str(iter)

    tmp_output_directory = root_dir + '/results/' + classifier_name + '/' + archive_name + trr + '/'

    print('\t\t\tdataset_name: ', dataset_name)

    output_directory = tmp_output_directory + dataset_name + '/'

    create_directory(output_directory)


    
    fit_classifier()

    print('\t\t\t\tDONE')

        # the creation of this directory means
    create_directory(output_directory + '/DONE')

else:
    print('iter = ', iter)

original x_train: [[ 0.     0.038  0.038 ...    nan    nan    nan]
 [-0.962 -0.962 -0.962 ...    nan    nan    nan]
 [ 0.423  0.538  0.5   ...    nan    nan    nan]
 ...
 [ 0.038  0.     0.038 ...    nan    nan    nan]
 [-0.115 -0.231 -0.115 ...    nan    nan    nan]
 [ 0.154  0.115  0.115 ...    nan    nan    nan]] 

nan to 0: [[ 0.     0.038  0.038 ...  0.     0.     0.   ]
 [-0.962 -0.962 -0.962 ...  0.     0.     0.   ]
 [ 0.423  0.538  0.5   ...  0.     0.     0.   ]
 ...
 [ 0.038  0.     0.038 ...  0.     0.     0.   ]
 [-0.115 -0.231 -0.115 ...  0.     0.     0.   ]
 [ 0.154  0.115  0.115 ...  0.     0.     0.   ]] 

z_norm x_train: [[-0.44613952 -0.05304682 -0.05304682 ... -0.44613952 -0.44613952
  -0.44613952]
 [-1.76852885 -1.76852885 -1.76852885 ...  0.63900164  0.63900164
   0.63900164]
 [ 4.73360396  6.0952572   5.64531961 ... -0.27491187 -0.27491187
  -0.27491187]
 ...
 [ 0.45535108 -0.1643517   0.45535108 ... -0.1643517  -0.1643517
  -0.1643517 ]
 [-1.04867495 -2.6266116

## missing value

In [11]:
# read the original data and change the nan value
# set the archive_name and import the training dataset

archive_name = "UCRArchive_2018"
dataset_name = 'DodgerLoopDay'
read_dataset_original(root_dir, archive_name, dataset_name)

archive_name = "UCRArchive_2018"
dataset_name = 'DodgerLoopDay'
read_dataset_nan(root_dir, archive_name, dataset_name)

original x_train: [[12. 18. 11. ...  6.  3.  6.]
 [12.  9. 11. ... 10.  4.  9.]
 [ 8.  5. 10. ...  7.  8.  6.]
 ...
 [11. 10.  8. ... 14.  9. 13.]
 [ 9.  4.  3. ... 14.  9. 11.]
 [16. 11.  5. ... 16. 21.  6.]] 

z_norm x_train: [[-0.41782779  0.21606722 -0.52347695 ... -1.05172279 -1.3686703
  -1.05172279]
 [-0.39967471 -0.76278612 -0.52071185 ... -0.64174898 -1.3679718
  -0.76278612]
 [-0.80742402 -1.11326108 -0.60353264 ... -0.90936971 -0.80742402
  -1.0113154 ]
 ...
 [        nan         nan         nan ...         nan         nan
          nan]
 [-0.80444756 -1.28115722 -1.37649916 ... -0.32773789 -0.80444756
  -0.61376369]
 [-0.23282975 -0.65938808 -1.17125806 ... -0.23282975  0.19372857
  -1.0859464 ]] 

original x_train: [[12. 18. 11. ...  6.  3.  6.]
 [12.  9. 11. ... 10.  4.  9.]
 [ 8.  5. 10. ...  7.  8.  6.]
 ...
 [11. 10.  8. ... 14.  9. 13.]
 [ 9.  4.  3. ... 14.  9. 11.]
 [16. 11.  5. ... 16. 21.  6.]] 

nan to 0: [[12. 18. 11. ...  6.  3.  6.]
 [12.  9. 11. ... 10.  4.  

{'DodgerLoopDay': (array([[-0.41782779,  0.21606722, -0.52347695, ..., -1.05172279,
          -1.3686703 , -1.05172279],
         [-0.39967471, -0.76278612, -0.52071185, ..., -0.64174898,
          -1.3679718 , -0.76278612],
         [-0.80742402, -1.11326108, -0.60353264, ..., -0.90936971,
          -0.80742402, -1.0113154 ],
         ...,
         [-0.74013875, -0.81530094, -0.96562531, ..., -0.51465219,
          -0.89046312, -0.58981438],
         [-0.80444756, -1.28115722, -1.37649916, ..., -0.32773789,
          -0.80444756, -0.61376369],
         [-0.23282975, -0.65938808, -1.17125806, ..., -0.23282975,
           0.19372857, -1.0859464 ]]),
  array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2.,
         2., 2., 2., 2., 2., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 4., 4.,
         4., 4., 4., 4., 4., 4., 4., 4., 4., 5., 5., 5., 5., 5., 5., 5., 5.,
         5., 5., 5., 5., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 7.,
         7., 7., 7., 7., 7., 7., 7.

In [10]:
# main Program for missing value
# set the archive_name and import the training dataset
# decide the classifier -- fcn or resnet

archive_name = "UCRArchive_2018"
dataset_name = 'DodgerLoopDay'  
classifier_name = 'fcn'
# classifier_name = 'resnet'
itr = '0'

# iter = 1 is fcn classifier, iter = 2 is resnet classifier
iter = 1

# the different point: we use the read_dataset_nan
datasets_dict = read_dataset_nan(root_dir, archive_name, dataset_name)


print('\t\titer', iter)
trr = ''
if iter != 0:
    trr = '_itr_' + str(iter)

    tmp_output_directory = root_dir + '/results/' + classifier_name + '/' + archive_name + trr + '/'

    print('\t\t\tdataset_name: ', dataset_name)

    output_directory = tmp_output_directory + dataset_name + '/'

    create_directory(output_directory)


    
    fit_classifier()

    print('\t\t\t\tDONE')

        # the creation of this directory means
    create_directory(output_directory + '/DONE')

else:
    print('iter = ', iter)

original x_train: [[12. 18. 11. ...  6.  3.  6.]
 [12.  9. 11. ... 10.  4.  9.]
 [ 8.  5. 10. ...  7.  8.  6.]
 ...
 [11. 10.  8. ... 14.  9. 13.]
 [ 9.  4.  3. ... 14.  9. 11.]
 [16. 11.  5. ... 16. 21.  6.]] 

nan to 0: [[12. 18. 11. ...  6.  3.  6.]
 [12.  9. 11. ... 10.  4.  9.]
 [ 8.  5. 10. ...  7.  8.  6.]
 ...
 [11. 10.  8. ... 14.  9. 13.]
 [ 9.  4.  3. ... 14.  9. 11.]
 [16. 11.  5. ... 16. 21.  6.]] 

z_norm x_train: [[-0.41782779  0.21606722 -0.52347695 ... -1.05172279 -1.3686703
  -1.05172279]
 [-0.39967471 -0.76278612 -0.52071185 ... -0.64174898 -1.3679718
  -0.76278612]
 [-0.80742402 -1.11326108 -0.60353264 ... -0.90936971 -0.80742402
  -1.0113154 ]
 ...
 [-0.74013875 -0.81530094 -0.96562531 ... -0.51465219 -0.89046312
  -0.58981438]
 [-0.80444756 -1.28115722 -1.37649916 ... -0.32773789 -0.80444756
  -0.61376369]
 [-0.23282975 -0.65938808 -1.17125806 ... -0.23282975  0.19372857
  -1.0859464 ]] 

		iter 1
			dataset_name:  DodgerLoopDay


  'precision', 'predicted', average, warn_for)


				DONE


# 4.3 Multivariate dataset

In [None]:
# we cannot direct to use the multivariate dataset, we should definition the function to change the data type
# After this function can we use the main program to get the result

def transform_mts_to_ucr_format():
   
    # import our dataset
    mts_root_dir = 'D:/FH_Dortmund/Projekt/Projektarbeit_2/time_series/dl-4-tsc-master/dataset/archives/mts_data/'
    mts_out_dir = 'D:/FH_Dortmund/Projekt/Projektarbeit_2/time_series/dl-4-tsc-master/dataset/archives/mts_archive/'
    
    for dataset_name in MTS_DATASET_NAMES:
        print('dataset_name',dataset_name)

        out_dir = mts_out_dir + dataset_name + '/'

        if create_directory(out_dir) is None:
             print('Already_done')
             continue

        a = loadmat(mts_root_dir + dataset_name + '/' + dataset_name + '.mat')
        a = a['mts']
        a = a[0, 0]

        dt = a.dtype.names
        dt = list(dt)

        for i in range(len(dt)):
            if dt[i] == 'train':
                x_train = a[i].reshape(max(a[i].shape))
            elif dt[i] == 'test':
                x_test = a[i].reshape(max(a[i].shape))
            elif dt[i] == 'trainlabels':
                y_train = a[i].reshape(max(a[i].shape))
            elif dt[i] == 'testlabels':
                y_test = a[i].reshape(max(a[i].shape))

        n_var = x_train[0].shape[0]

        max_length = get_func_length(x_train, x_test, func=max)
        min_length = get_func_length(x_train, x_test, func=min)

        print(dataset_name, 'max', max_length, 'min', min_length)
        print()
        # continue

        x_train = transform_to_same_length(x_train, n_var, max_length)
        x_test = transform_to_same_length(x_test, n_var, max_length)

        # save them
        np.save(out_dir + 'x_train.npy', x_train)
        np.save(out_dir + 'y_train.npy', y_train)
        np.save(out_dir + 'x_test.npy', x_test)
        np.save(out_dir + 'y_test.npy', y_test)

        print('Done')

In [None]:
# main Program for MTS
# set the archive_name and import the training dataset
# decide the classifier -- fcn or resnet
archive_name = "UCRArchive_2018"
dataset_name = 'ArabicDigits'
classifier_name = 'fcn'
# classifier_name = 'resnet'
itr = '0'

# iter = 1 is fcn classifier, iter = 2 is resnet classifier
iter = 1

# read the training data
datasets_dict = read_all_datasets(root_dir, archive_name)


print('\t\titer', iter)
trr = ''

# create the file about the result
if iter != 0:
    trr = '_itr_' + str(iter)

    tmp_output_directory = root_dir + '/results/' + classifier_name + '/' + archive_name + trr + '/'

    print('\t\t\tdataset_name: ', dataset_name)

    output_directory = tmp_output_directory + dataset_name + '/'

    create_directory(output_directory)


# fit the data    
    fit_classifier()

    print('\t\t\t\tDONE')

        # the creation of this directory means
    create_directory(output_directory + '/DONE')

else:
    print('iter = ', iter)