In [7]:
from utils.constants import UNIVARIATE_ARCHIVE_NAMES as ARCHIVE_NAMES

from utils.utils import read_all_datasets
from utils.utils import transform_labels
from utils.utils import create_directory
from utils.utils import run_length_xps
from utils.utils import generate_results_csv
from data_formatting import split_sequence_overlap, split_sequence_nooverlap, split_sequence, split_train_test, normalize_data, set_targets
import parameters
parameters.initialize_parameters()


import utils
import numpy as np
import pandas as pd
import sys
import sklearn
import torch
from torch.utils.data import TensorDataset, DataLoader

In [23]:
def split_x_y(data_loader):
  x = []
  y = []
  for i, (x_batch, y_batch) in enumerate(data_loader):
    x.append(x_batch)
    y.append(y_batch)
  x = torch.cat(x, dim=0)
  y = torch.cat(y, dim=0)
  return x, y

# function for testing the incepiton time series classifier
# Training on one specific subject
def test(list_targets, list_labels, subj, train_df):
    # Select only the classes we want to predict
    train_df, nclasses, targets_numpy = set_targets(train_df, list_targets, list_labels)

    # Convert the subject names (strings) into numbers
    subjects = pd.factorize(train_df['Subject'])[0]

    # Normalise the features
    features_numpy = normalize_data(train_df, False) #parameters.normalise_individual_subjects
    input_dim = features_numpy.shape[1]
    print(f"Number of features: {input_dim}")

    del train_df

    subj = np.unique(subjects)
    print(f"Sujets: {subj}")

    test_idx = np.array([subj[0]])
    print(f"test: {test_idx}")

    train_idx = np.setxor1d(subj, test_idx)
    print(f"Train subjects: {train_idx}")

    print("Generating train/val/test split...")
    features_train, targets_train, features_test, targets_test = split_train_test(targets_numpy, features_numpy, subjects, train_idx, test_idx)

    print("Generating sequences...")
    features_train, targets_train = split_sequence_overlap(features_train, targets_train, parameters.seq_dim, parameters.overlap_size)
    
    # Overlap or no
    if parameters.test_with_subsequences:
      features_test, targets_test = split_sequence_overlap(features_test, targets_test, parameters.test_seq_dim, parameters.test_overlap_size)
    else:
      features_test, targets_test = split_sequence_nooverlap(features_test, targets_test, parameters.test_seq_dim, parameters.test_overlap_size)

    #print(f"Number of training examples: {len(targets_train)}")
    #print(f"Number of test examples: {len(targets_test)}")

    # Create feature and targets tensor for train set. We need variable to accumulate gradients. Therefore first we create tensor, then we will create variable
    featuresTrain = torch.from_numpy(features_train)
    targetsTrain = torch.from_numpy(targets_train).type(torch.LongTensor)  # data type is long


    # Pytorch train and validation sets
    train = TensorDataset(featuresTrain, targetsTrain)
    
    # Data loader
    train_loader = DataLoader(train, batch_size=parameters.batch_size, shuffle=True)

    # Create feature and targets tensor for test set
    if parameters.test_with_subsequences:
      featuresTest = torch.from_numpy(features_test)
      targetsTest = torch.from_numpy(targets_test).type(torch.LongTensor)  # data type is long
      test = TensorDataset(featuresTest, targetsTest)
      test_loader = DataLoader(test, batch_size=parameters.batch_size, shuffle=False)

    # Split test and train sets
    x_test, y_test = split_x_y(test_loader)
    x_train, y_train = split_x_y(train_loader)


    # save orignal y because later we will use binary
    y_true = y_test.to(torch.int64)
    y_true_train = y_train.to(torch.int64)
    # transform the labels from integers to one hot vectors
    enc = sklearn.preprocessing.OneHotEncoder()
    enc.fit(np.concatenate((y_train, y_test), axis=0).reshape(-1, 1))
    y_train = enc.transform(y_train.reshape(-1, 1)).toarray()
    y_test = enc.transform(y_test.reshape(-1, 1)).toarray()

    if len(x_train.shape) == 2:  # if univariate
        print("Univariate time series classification\n")
        # add a dimension to make it multivariate with one dimension
        x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
        x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

    nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0)))
    print(f"Number of classes: {nb_classes}")

    return x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc

In [3]:
def prepare_data(list_targets):

    """
    x_train = datasets_dict[dataset_name][0]
    y_train = datasets_dict[dataset_name][1]
    x_test = datasets_dict[dataset_name][2]
    y_test = datasets_dict[dataset_name][3]

    nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0)))

    # make the min to zero of labels
    y_train, y_test = transform_labels(y_train, y_test)

    # save orignal y because later we will use binary
    y_true = y_test.astype(np.int64)
    y_true_train = y_train.astype(np.int64)
    # transform the labels from integers to one hot vectors
    enc = sklearn.preprocessing.OneHotEncoder()
    enc.fit(np.concatenate((y_train, y_test), axis=0).reshape(-1, 1))
    y_train = enc.transform(y_train.reshape(-1, 1)).toarray()
    y_test = enc.transform(y_test.reshape(-1, 1)).toarray()

    if len(x_train.shape) == 2:  # if univariate
        # add a dimension to make it multivariate with one dimension
        x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
        x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))"""

    return x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc


def fit_classifier():
    input_shape = x_train.shape[1:]

    classifier = create_classifier(classifier_name, input_shape, nb_classes,
                                   output_directory)

    classifier.fit(x_train, y_train, x_test, y_test, y_true)


def create_classifier(classifier_name, input_shape, nb_classes, output_directory,
                      verbose=False, build=True):
    if classifier_name == 'nne':
        from classifiers import nne
        return nne.Classifier_NNE(output_directory, input_shape,
                                  nb_classes, verbose)
    if classifier_name == 'inception':
        from classifiers import inception
        return inception.Classifier_INCEPTION(output_directory, input_shape, nb_classes, verbose,
                                              build=build)


def get_xp_val(xp):
    if xp == 'batch_size':
        xp_arr = [16, 32, 128]
    elif xp == 'use_bottleneck':
        xp_arr = [False]
    elif xp == 'use_residual':
        xp_arr = [False]
    elif xp == 'nb_filters':
        xp_arr = [16, 64]
    elif xp == 'depth':
        xp_arr = [3, 9]
    elif xp == 'kernel_size':
        xp_arr = [8, 64]
    else:
        raise Exception('wrong argument')
    return xp_arr




In [5]:
# to remove
csv_file = "../inception_time_root_dir/archives/All_Subs_Diff_Modules_nofilter_withoutAUc.csv"
train_df = pd.read_csv(csv_file,  delimiter=",")  # 101 features (only AU_r)


In [26]:
############################################### main
root_dir = '../inception_time_root_dir/'
xps = ['use_bottleneck', 'use_residual', 'nb_filters', 'depth',
       'kernel_size', 'batch_size']

sys.argv = ['main.py', 'InceptionTime']

list_targets = [0, 3]
list_labels = [0, 1]

if sys.argv[1] == 'InceptionTime':
    # run nb_iter_ iterations of Inception on the whole TSC archive
    classifier_name = 'inception'
    archive_name = ARCHIVE_NAMES[0]
    nb_iter_ = 5

    #datasets_dict = read_all_datasets(root_dir, archive_name)
    #csv_file = "../inception_time_root_dir/archives/All_Subs_Diff_Modules_nofilter_withoutAUc.csv"
    #train_df = pd.read_csv(csv_file,  delimiter=",")  # 101 features (only AU_r)

    for iter in range(nb_iter_):
        print('\t\titer', iter)

        trr = ''
        if iter != 0:
            trr = '_itr_' + str(iter)

        tmp_output_directory = root_dir + '/results/' + classifier_name + '/' + archive_name + trr + '/'

        for dataset_name in utils.constants.dataset_names_for_archive[archive_name]:
            print('\t\t\tdataset_name: ', dataset_name)

            #x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc = prepare_data(list_targets)
            x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc = test(list_targets, list_labels, 1, train_df)

            output_directory = tmp_output_directory + dataset_name + '/'

            temp_output_directory = create_directory(output_directory)

            if temp_output_directory is None:
                print('Already_done', tmp_output_directory, dataset_name)
                continue

            fit_classifier()

            print('\t\t\t\tDONE')

            # the creation of this directory means
            create_directory(output_directory + '/DONE')

    # run the ensembling of these iterations of Inception
    classifier_name = 'nne'

    datasets_dict = read_all_datasets(root_dir, archive_name)

    tmp_output_directory = root_dir + '/results/' + classifier_name + '/' + archive_name + '/'

    for dataset_name in utils.constants.dataset_names_for_archive[archive_name]:
        print('\t\t\tdataset_name: ', dataset_name)

        #x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc = prepare_data()
        x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc = test(list_targets, list_labels, 1, train_df)

        output_directory = tmp_output_directory + dataset_name + '/'

        fit_classifier()

        print('\t\t\t\tDONE')

elif sys.argv[1] == 'InceptionTime_xp':
    # this part is for running inception with the different hyperparameters
    # listed in the paper, on the whole TSC archive
    archive_name = 'TSC'
    classifier_name = 'inception'
    max_iterations = 5

    datasets_dict = read_all_datasets(root_dir, archive_name)

    for xp in xps:

        xp_arr = get_xp_val(xp)

        print('xp', xp)

        for xp_val in xp_arr:
            print('\txp_val', xp_val)

            kwargs = {xp: xp_val}

            for iter in range(max_iterations):

                trr = ''
                if iter != 0:
                    trr = '_itr_' + str(iter)
                print('\t\titer', iter)

                for dataset_name in utils.constants.dataset_names_for_archive[archive_name]:

                    output_directory = root_dir + '/results/' + classifier_name + '/' + '/' + xp + '/' + '/' + str(
                        xp_val) + '/' + archive_name + trr + '/' + dataset_name + '/'

                    print('\t\t\tdataset_name', dataset_name)
                    #x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc = prepare_data()
                    x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc = test(list_targets, list_labels, 1, train_df)

                    # check if data is too big for this gpu
                    size_data = x_train.shape[0] * x_train.shape[1]

                    temp_output_directory = create_directory(output_directory)

                    if temp_output_directory is None:
                        print('\t\t\t\t', 'Already_done')
                        continue

                    input_shape = x_train.shape[1:]

                    from classifiers import inception

                    classifier = inception.Classifier_INCEPTION(output_directory, input_shape, nb_classes,
                                                                verbose=False, build=True, **kwargs)

                    classifier.fit(x_train, y_train, x_test, y_test, y_true)

                    # the creation of this directory means
                    create_directory(output_directory + '/DONE')

                    print('\t\t\t\t', 'DONE')

    # we now need to ensemble each iteration of inception (aka InceptionTime)
    archive_name = ARCHIVE_NAMES[0]
    classifier_name = 'nne'

    datasets_dict = read_all_datasets(root_dir, archive_name)

    tmp_output_directory = root_dir + '/results/' + classifier_name + '/' + archive_name + '/'

    for xp in xps:
        xp_arr = get_xp_val(xp)
        for xp_val in xp_arr:

            clf_name = 'inception/' + xp + '/' + str(xp_val)

            for dataset_name in utils.constants.dataset_names_for_archive[archive_name]:
                #x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc = prepare_data()
                x_train, y_train, x_test, y_test, y_true, nb_classes, y_true_train, enc = test(list_targets, list_labels, 1, train_df)

                output_directory = tmp_output_directory + dataset_name + '/'

                from classifiers import nne

                classifier = nne.Classifier_NNE(output_directory, x_train.shape[1:],
                                                nb_classes, clf_name=clf_name)

                classifier.fit(x_train, y_train, x_test, y_test, y_true)

elif sys.argv[1] == 'run_length_xps':
    # this is to generate the archive for the length experiments
    run_length_xps(root_dir)

elif sys.argv[1] == 'generate_results_csv':
    clfs = []
    itr = '-0-1-2-3-4-'
    inceptionTime = 'nne/inception'
    # add InceptionTime: an ensemble of 5 Inception networks
    clfs.append(inceptionTime + itr)
    # add InceptionTime for each hyperparameter study
    for xp in xps:
        xp_arr = get_xp_val(xp)
        for xp_val in xp_arr:
            clfs.append(inceptionTime + '/' + xp + '/' + str(xp_val) + itr)
    df = generate_results_csv('results.csv', root_dir, clfs)
    print(df)


		iter 0
			dataset_name:  All_Subs_Diff_Modules_nofilter_withoutAUc
Number of features: 100
Sujets: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
test: [0]
Train subjects: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
Generating train/val/test split...
Generating sequences...
Number of classes: 2
Already_done ../inception_time_root_dir//results/inception/TSC/ All_Subs_Diff_Modules_nofilter_withoutAUc
		iter 1
			dataset_name:  All_Subs_Diff_Modules_nofilter_withoutAUc
Number of features: 100
Sujets: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
test: [0]
Train subjects: [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
Generating train/val/test split...
Generating sequences...
Number of classes: 2
Already_done ../inception_time_root_dir//results/inception/TSC_itr_1/ All_Subs_Diff_Modules_nofilter_withoutAUc
		iter 2
			dataset_name:  All_Subs_Diff_Modules_nofilter_withoutAUc
Number of features: 100
Sujets: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15]
test: [0]
Train subject

FileNotFoundError: ../inception_time_root_dir//archives/TSC/All_Subs_Diff_Modules_nofilter_withoutAUc/All_Subs_Diff_Modules_nofilter_withoutAUc_TRAIN not found.