In [None]:
import numpy as np
import os
import tensorflow as tf

from joblib import load

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
tf.keras.backend.set_session(sess)

import sys
sys.path.append("../")
%load_ext autoreload
%autoreload 2

In [None]:
########################################
########### Prepare Data ###############
########################################
from modules.data_processor import generate_data

root_path = '../../data/'
datasets= ['character_trajectories/dataset_steps-20_timesteps-206.pickle', 'anomaly_new/anomaly_dataset.pickle', 'FordA/dataset_classes-2_timesteps-500.pickle', 'ElectricDevices/dataset_classes-7_timesteps-96.pickle', 'daily_and_sport_activites/dataset_classes-19_timesteps-60.pickle']
path = os.path.join(root_path, datasets[1])
trainX, trainY, valX, valY, testX, testY, classes, seqlen, channel = generate_data(path, create_val=True, verbose=1)

trainLen, valLen, testLen = trainX.shape[0], valX.shape[0], testX.shape[0]

set_name = path.split(os.sep)[-2]
model_path = os.path.join('../../models', set_name)
img_path = os.path.join('../../images', set_name)
stats_path = os.path.join('../../statistics', set_name)
if not os.path.exists(model_path):
    os.makedirs(model_path)
if not os.path.exists(img_path):
    os.makedirs(img_path)
if not os.path.exists(stats_path):
    os.makedirs(stats_path)


In [None]:
# not modularized

def validate_and_adjust_settings(zero, attach, notemp):
    # 0 0 0 invalid
    # 1 0 0 valid
    # 0 1 0 valid
    # 1 1 0 valid
    # 0 0 1 invalid
    # 1 0 1 valid
    # 0 1 1 invalid
    # 1 1 1 valid
    if zero == 0 and attach == 0:
        return 1, attach, notemp
    if attach == 0 and notemp == 0:
        return 1, attach, notemp
    if notemp == 1:
        return 1, attach, notemp
    return zero, attach, notemp

def define_setup(config, zero, attach, notemp):
    s = 'strides_'
    l = 'length_'
    for c in config:
        s += str(c[0]) + '-'
        l += str(c[1]) + '-'
    s = s[:-1] + '_' + l[:-1] +'_zero-'
    s += '1' if zero else '0'
    s += '_attach-'
    s += '1' if attach else '0'
    s += '_notemp-' 
    s += '1' if notemp else '0'
    return s

In [None]:
########################################
############# Patch Data ###############
########################################
from modules.data_generator import DataGenerator
from modules.patch_generator import get_generator_id_list

# [Stride, Length]
config = [[5,10]]
zero, attach, notemp = True, True, False
zero, attach, notemp = validate_and_adjust_settings(zero, attach, notemp)

params = {'dim': [seqlen, channel], 'batch_size': 1024, 'config': config,
          'zero': zero, 'attach': attach, 'notemp': notemp, 'shuffle': False}

clf_type = 'svm'

setup = define_setup(config, zero, attach, notemp)
setup_path = os.path.join(model_path, setup)
if not os.path.exists(setup_path):
    os.makedirs(setup_path)
image_path = os.path.join(img_path, setup, clf_type)
if not os.path.exists(image_path):
    os.makedirs(image_path)
stat_path = os.path.join(stats_path, setup)
if not os.path.exists(stat_path):
    os.makedirs(stat_path)

# Generators
trainIds = get_generator_id_list(trainLen, seqlen, config)
train_generator = DataGenerator(trainIds, trainX, trainY, **params)
valIds = get_generator_id_list(valLen, seqlen, config)
val_generator = DataGenerator(valIds, valX, valY, **params)
testIds = get_generator_id_list(testLen, seqlen, config)
test_generator = DataGenerator(testIds, testX, testY, **params)

In [None]:
########################################
############ Train Level 1 #############
########################################
from modules.model import create_model
from modules.model_trainer import train_descriptive

input_shape = trainX.shape[1:]
if attach:
    input_shape = list(input_shape)
    input_shape[-1] +=1
    input_shape = tuple(input_shape)
patch_model_path = os.path.join(setup_path, 'patch_classifier.h5')

if os.path.exists(patch_model_path):
    patch_model = tf.keras.models.load_model(patch_model_path)
else:
    patch_model = create_model(input_shape, classes)
    patch_model = train_descriptive(patch_model_path, patch_model, trainIds, valIds, trainX, trainY, valX, valY, params, thresh=0.0, verbose=1, workers=1)

softmax_trainXp = patch_model.predict(train_generator)[:len(trainIds)]
softmax_valXp = patch_model.predict(val_generator)[:len(valIds)]
softmax_testXp = patch_model.predict(test_generator)[:len(testIds)]


In [None]:
########################################
############ Train Level 2 #############
########################################
from modules.patch_generator import get_sample_id_list, create_histo_dataset, get_data_patch_stats
from modules.model import create_clf
from modules.model_trainer import train_clf

train_pps = get_data_patch_stats(trainLen, seqlen, config)[1]
train_sidx = get_sample_id_list(trainLen, train_pps)
val_pps = get_data_patch_stats(valLen, seqlen, config)[1]
val_sidx = get_sample_id_list(valLen, val_pps)
test_pps = get_data_patch_stats(testLen, seqlen, config)[1]
test_sidx = get_sample_id_list(testLen, test_pps)

histo_trainX = create_histo_dataset(softmax_trainXp, train_sidx)
histo_valX = create_histo_dataset(softmax_valXp, val_sidx)
histo_testX = create_histo_dataset(softmax_testXp, test_sidx)

clf_model_path = os.path.join(setup_path, clf_type + '_classifier.pickle')
if os.path.exists(clf_model_path):
    clf = load(clf_model_path)
else:
    clf = create_clf(clf_type)
    clf = train_clf(clf_model_path, clf, histo_trainX, trainY, histo_valX, valY)

clf_train_pred = clf.predict(histo_trainX)
clf_val_pred = clf.predict(histo_valX)
clf_test_pred = clf.predict(histo_testX)

In [None]:
from modules.patch_generator import get_patch_params

def compute_prediction_relevant(clf_preds, softmax_dataXp, sidx, dataLen, sampleLen, config):
    relevant = []
    for i in range(clf_preds.shape[0]):
        y = clf_preds[i]
        i_patches = np.where(sidx==i)[0]
        i_patches = [i for i in i_patches if np.argmax(softmax_dataXp[i]) == y]
        r = set()
        for j in i_patches:
            _, s, e = get_patch_params(j, dataLen, sampleLen, config)
            r.update(np.arange(s,e))
        relevant.append(np.sort(list(r)))
    return np.array(relevant)

def compute_relevant_percentage(relevant, seqlen, verbose=1):
    percentage = [len(r) / seqlen for r in relevant]
    avg = np.average(percentage)
    mi = np.min(percentage)
    ma = np.max(percentage)
    std = np.std(percentage)
    if verbose:
        print('Min:', mi)
        print('Max:', ma)
        print('Avg:', avg)
        print('Std:', std)
    return np.array(percentage), np.array([mi, ma, avg, std])
        
def group_cwise(labels, num_classes):
    cwise = [[] for i in range(num_classes)]
    for i in range(len(labels)):
        cwise[labels[i]].append(i)
    cwise = [np.array(c) for c in cwise]
    return cwise

clf_preds = [clf_train_pred, clf_val_pred, clf_test_pred]
softs = [softmax_trainXp, softmax_valXp, softmax_testXp]
sids = [train_sidx, val_sidx, test_sidx]
lens = [trainLen, valLen, testLen]
dataY = [trainY, valY, testY]
sets = ('Train', 'Val', 'Test')
save_str = ""
for i in range(len(sets)):
    relevant = compute_prediction_relevant(clf_preds[i], softs[i], sids[i], lens[i], seqlen, config)
    percentage, stats = compute_relevant_percentage(relevant, seqlen, verbose=0)
    s = sets[i] + ' avg:' + str(stats_train[2])
    save_str += s + '\n'

    cwise = group_cwise(dataY[i], classes)
    for j, c in enumerate(cwise):
        _, _, a, _ = compute_relevant_percentage(relevant, seqlen, verbose=0)[1]
        s = 'Class ' + str(j) + ' | Avg: ' + str(a)
        save_str += s + '\n'
save_str = save_str[:-1]
print(save_str)
with open(os.path.join(stat_path, 'Relevant.txt'), "w") as f:
    f.write(save_str)