In [None]:
import numpy as np
import os
import tensorflow as tf

from joblib import load

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
tf.keras.backend.set_session(sess)

import sys
sys.path.append("../")
%load_ext autoreload
%autoreload 2

In [None]:
########################################
########### Prepare Data ###############
########################################
from modules.data_processor import generate_data

root_path = '../../data/'
datasets= ['character_trajectories/dataset_steps-20_timesteps-206.pickle', 'anomaly_new/anomaly_dataset.pickle', 'FordA/dataset_classes-2_timesteps-500.pickle', 'ElectricDevices/dataset_classes-7_timesteps-96.pickle', 'daily_and_sport_activites/dataset_classes-19_timesteps-60.pickle']
path = os.path.join(root_path, datasets[0])
trainX, trainY, valX, valY, testX, testY, classes, seqlen, channel = generate_data(path, create_val=True, verbose=1)

trainLen, valLen, testLen = trainX.shape[0], valX.shape[0], testX.shape[0]

set_name = path.split(os.sep)[-2]
model_path = os.path.join('../../models', set_name)
img_path = os.path.join('../../images', set_name)
if not os.path.exists(model_path):
    os.makedirs(model_path)
if not os.path.exists(img_path):
    os.makedirs(img_path)

In [None]:
# not modularized

def validate_and_adjust_settings(zero, attach, notemp):
    # 0 0 0 invalid
    # 1 0 0 valid
    # 0 1 0 valid
    # 1 1 0 valid
    # 0 0 1 invalid
    # 1 0 1 valid
    # 0 1 1 invalid
    # 1 1 1 valid
    if zero == 0 and attach == 0:
        return 1, attach, notemp
    if attach == 0 and notemp == 0:
        return 1, attach, notemp
    if notemp == 1:
        return 1, attach, notemp
    return zero, attach, notemp

def define_setup(config, zero, attach, notemp):
    s = 'strides_'
    l = 'length_'
    for c in config:
        s += str(c[0]) + '-'
        l += str(c[1]) + '-'
    s = s[:-1] + '_' + l[:-1] +'_zero-'
    s += '1' if zero else '0'
    s += '_attach-'
    s += '1' if attach else '0'
    s += '_notemp-' 
    s += '1' if notemp else '0'
    return s

In [None]:
########################################
############# Patch Data ###############
########################################
from modules.data_generator import DataGenerator
from modules.patch_generator import get_generator_id_list

# [Stride, Length]
config = [[5,10]]
zero, attach, notemp = True, True, False
zero, attach, notemp = validate_and_adjust_settings(zero, attach, notemp)

params = {'dim': [seqlen, channel], 'batch_size': 1024, 'config': config,
          'zero': zero, 'attach': attach, 'notemp': notemp, 'shuffle': False}

clf_type = 'svm'

setup = define_setup(config, zero, attach, notemp)
setup_path = os.path.join(model_path, setup)
if not os.path.exists(setup_path):
    os.makedirs(setup_path)
image_path = os.path.join(img_path, setup, clf_type)
if not os.path.exists(image_path):
    os.makedirs(image_path)

# Generators
trainIds = get_generator_id_list(trainLen, seqlen, config)
train_generator = DataGenerator(trainIds, trainX, trainY, **params)
valIds = get_generator_id_list(valLen, seqlen, config)
val_generator = DataGenerator(valIds, valX, valY, **params)
testIds = get_generator_id_list(testLen, seqlen, config)
test_generator = DataGenerator(testIds, testX, testY, **params)

In [None]:
########################################
############ Train Level 1 #############
########################################
from modules.model import create_model
from modules.model_trainer import train_descriptive

input_shape = trainX.shape[1:]
if attach:
    input_shape = list(input_shape)
    input_shape[-1] +=1
    input_shape = tuple(input_shape)
patch_model_path = os.path.join(setup_path, 'patch_classifier.h5')

if os.path.exists(patch_model_path):
    patch_model = tf.keras.models.load_model(patch_model_path)
else:
    patch_model = create_model(input_shape, classes)
    patch_model = train_descriptive(patch_model_path, patch_model, trainIds, valIds, trainX, trainY, valX, valY, params, thresh=0.0, verbose=1, workers=1)

softmax_trainXp = patch_model.predict(train_generator)[:len(trainIds)]
softmax_valXp = patch_model.predict(val_generator)[:len(valIds)]
softmax_testXp = patch_model.predict(test_generator)[:len(testIds)]


In [None]:
########################################
############ Train Level 2 #############
########################################
from modules.patch_generator import get_sample_id_list, create_histo_dataset, get_data_patch_stats
from modules.model import create_clf
from modules.model_trainer import train_clf

train_pps = get_data_patch_stats(trainLen, seqlen, config)[1]
train_sidx = get_sample_id_list(trainLen, train_pps)
val_pps = get_data_patch_stats(valLen, seqlen, config)[1]
val_sidx = get_sample_id_list(valLen, val_pps)
test_pps = get_data_patch_stats(testLen, seqlen, config)[1]
test_sidx = get_sample_id_list(testLen, test_pps)

histo_trainX = create_histo_dataset(softmax_trainXp, train_sidx)
histo_valX = create_histo_dataset(softmax_valXp, val_sidx)
histo_testX = create_histo_dataset(softmax_testXp, test_sidx)

clf_model_path = os.path.join(setup_path, clf_type + '_classifier.pickle')
if os.path.exists(clf_model_path):
    clf = load(clf_model_path)
else:
    clf = create_clf(clf_type)
    clf = train_clf(clf_model_path, clf, histo_trainX, trainY, histo_valX, valY)

clf_train_pred = clf.predict(histo_trainX)
clf_val_pred = clf.predict(histo_valX)
clf_test_pred = clf.predict(histo_testX)

In [None]:
########################################
########### Plot Statistics ############
########################################
from modules.patch_generator import get_all_patch_params, get_data_patch_stats, get_all_patch, get_patch_params_list
from modules.plot_processor import plot_heatmap, plot_class_means, plot_series_and_dist, plot_patch_and_dist, plot_class_overlay

# patch + dist
idx = 0
show_patches = [0]

npc, pps = get_data_patch_stats(trainLen, seqlen, config)
ids = get_all_patch_params(idx, npc, pps)
samples = get_all_patch(ids, trainX, trainLen, seqlen, config, zero, attach, notemp)

patch_dists = patch_model.predict(samples)
param_list = get_patch_params_list(ids, trainLen, seqlen, config)

for i in show_patches:
    plot_patch_and_dist(idx, trainX[idx], trainY[idx], np.argmax(patch_dists[i]), patch_dists[i], param_list[i], patch=i)#, save=image_path)

# complete sample
idx = 0

npc, pps = get_data_patch_stats(trainLen, seqlen, config)
ids = get_all_patch_params(idx, npc, pps)
samples = get_all_patch(ids, trainX, trainLen, seqlen, config, zero, attach, notemp)

patch_preds = patch_model.predict(samples)
plot_series_and_dist(idx, trainX[idx], trainY[idx], clf_train_pred[idx], patch_preds)#, save=image_path)

# class overlay
idx = 0
only_classes = None
ids = get_all_patch_params(idx, npc, pps)
samples = get_all_patch(ids, trainX, trainLen, seqlen, config, zero, attach, notemp)

patch_dists = patch_model.predict(samples)
param_list = get_patch_params_list(ids, trainLen, seqlen, config)
plot_class_overlay(idx, trainX[idx], trainY[idx], clf_train_pred[idx], patch_dists, param_list, only_classes=only_classes)#, save=image_path)

# class means
#plot_heatmap(train_class_means)
#plot_class_means(np.expand_dims(train_class_means[0], axis=0))

In [None]:
from modules.patch_generator import get_patch_params
from scipy.spatial.distance import cosine

def data_class_acc(data, softmax, datalen, samplelen, classes, config, notemp, thresh=0, subset=0, topX=1):
    datalen_sub = np.array([i for i in range(len(softmax)) if np.max(softmax[i]) > thresh])
    if subset > 0:
        labels = np.argmax(softmax, axis=1)
        class_wise = [[] for i in range(classes)]
        for i in datalen_sub:
            class_wise[labels[i]].append(i)
        for i in range(len(class_wise)):
            if i == 0:
                datalen_sub = np.random.permutation(class_wise[i])[:subset]
            else:
                current = np.random.permutation(class_wise[i])[:subset]
                if len(current) > 0:
                    datalen_sub = np.concatenate([datalen_sub, current])

    correct_sim = 0
    for idx in datalen_sub:
        sims = []
        sim_labels = []
        
        idx_data, idx_label, idx_key = compute_slice(idx, data, softmax, datalen, samplelen, config)

        for idx2 in datalen_sub:
            idx_data2, idx_label2, idx_key2 = compute_sim(idx2, data, softmax, datalen, samplelen, config)
            
            if idx_key != idx_key2 or idx == idx2:
                continue
            sims.append(get_sim(idx_data, idx_data2))
            sim_labels.append(idx_label2)
            
        sims, sim_labels = np.array(sims), np.array(sim_labels)
        sorted_sim_labels = sim_labels[np.argsort(sims)[::-1]]
        correct_sim += 1 if idx_label in sorted_sim_labels[:topX] else 0
        return correct_sim
    acc = correct_sim / len(datalen_sub)
    return acc

def compute_slice(idx, data, softmax, datalen, samplelen, config):
    label = np.argmax(softmax[idx])
    sidx, start, end = get_patch_params(idx, datalen, samplelen, config)
    key = str(end - start) if notemp else str(start) + '-' + str(end)
    data_slice = data[sidx, start:end]
    return data_slice, label, key

def get_sim(a, b):
    af, bf = a.flatten(), b.flatten()
    return 1 - cosine(af, bf)
    

In [None]:
sim_acc = data_class_acc(trainX, softmax_trainXp, trainLen, seqlen, classes, config, True, thresh=0.5, subset=0, topX=70)
print('Accuracy:', sim_acc)