In [None]:
import numpy as np
import os
import pickle
import sys
import tensorflow as tf

from joblib import load

######### Tensorflow settings ##########
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
tf.keras.backend.set_session(sess)

############ Append sys path ###########
sys.path.append('../')

%load_ext autoreload
%autoreload 2

In [None]:
########################################
########### Prepare Data ###############
########################################
from modules.data_processor import generate_data

root_path = '../../data/'
datasets= ['character_trajectories/dataset_steps-20_timesteps-206.pickle', 'anomaly_new/anomaly_dataset.pickle', 'FordA/dataset_classes-2_timesteps-500.pickle', 'ElectricDevices/dataset_classes-7_timesteps-96.pickle', 'daily_and_sport_activites/dataset_classes-19_timesteps-60.pickle']
path = os.path.join(root_path, datasets[3])
trainX, trainY, valX, valY, testX, testY, classes, seqlen, channel = generate_data(path, create_val=True, verbose=1)

trainLen, valLen, testLen = trainX.shape[0], valX.shape[0], testX.shape[0]

set_name = path.split(os.sep)[-2]
model_path = os.path.join('../../models', set_name)
img_path = os.path.join('../../images', set_name)
if not os.path.exists(model_path):
    os.makedirs(model_path)
if not os.path.exists(img_path):
    os.makedirs(img_path)

In [None]:
# not modularized

def validate_and_adjust_settings(zero, attach, notemp):
    # 0 0 0 invalid
    # 1 0 0 valid
    # 0 1 0 valid
    # 1 1 0 valid
    # 0 0 1 invalid
    # 1 0 1 valid
    # 0 1 1 invalid
    # 1 1 1 valid
    if zero == 0 and attach == 0:
        return 1, attach, notemp
    if attach == 0 and notemp == 0:
        return 1, attach, notemp
    if notemp == 1:
        return 1, attach, notemp
    return zero, attach, notemp

def define_setup(config, zero, attach, notemp):
    s = 'strides_'
    l = 'length_'
    for c in config:
        s += str(c[0]) + '-'
        l += str(c[1]) + '-'
    s = s[:-1] + '_' + l[:-1] +'_zero-'
    s += '1' if zero else '0'
    s += '_attach-'
    s += '1' if attach else '0'
    s += '_notemp-' 
    s += '1' if notemp else '0'
    return s

In [None]:
########################################
############# Patch Data ###############
########################################
from modules.data_generator import DataGenerator
from modules.patch_generator import get_generator_id_list

# [Stride, Length]
config = [[5,10]]
zero, attach, notemp = True, True, False
zero, attach, notemp = validate_and_adjust_settings(zero, attach, notemp)

params = {'dim': [seqlen, channel], 'batch_size': 1024, 'config': config,
          'zero': zero, 'attach': attach, 'notemp': notemp, 'shuffle': False}

# feature_type
feature_mode = 'subset'
feature_subset = 500

# trivial mode
triviral_mode='majority' if not 'anomaly' in path else 'occurance'

# clf type
clf_type = 'svm'
use_dense = False

setup = define_setup(config, zero, attach, notemp)
setup_path = os.path.join(model_path, setup)
if not os.path.exists(setup_path):
    os.makedirs(setup_path)
image_path = os.path.join(img_path, setup, clf_type)
if not os.path.exists(image_path):
    os.makedirs(image_path)

# Generators
trainIds = get_generator_id_list(trainLen, seqlen, config)
train_generator = DataGenerator(trainIds, trainX, trainY, **params)
valIds = get_generator_id_list(valLen, seqlen, config)
val_generator = DataGenerator(valIds, valX, valY, **params)
testIds = get_generator_id_list(testLen, seqlen, config)
test_generator = DataGenerator(testIds, testX, testY, **params)

In [None]:
########################################
############ Train Level 1 #############
########################################
from modules.model import create_model
from modules.model_trainer import train_descriptive

input_shape = trainX.shape[1:]
if attach:
    input_shape = list(input_shape)
    input_shape[-1] +=1
    input_shape = tuple(input_shape)
patch_model_path = os.path.join(setup_path, 'patch_classifier.h5')

if os.path.exists(patch_model_path):
    patch_model = tf.keras.models.load_model(patch_model_path)
else:
    patch_model = create_model(input_shape, classes)
    patch_model = train_descriptive(patch_model_path, patch_model, trainIds, valIds, trainX, trainY, valX, valY, params, thresh=0.0, verbose=1, workers=1)

softmax_trainXp = patch_model.predict(train_generator)[:len(trainIds)]
softmax_valXp = patch_model.predict(val_generator)[:len(valIds)]
softmax_testXp = patch_model.predict(test_generator)[:len(testIds)]

In [None]:
########################################
########## Prepare Level 2 #############
########################################
from modules.patch_generator import get_data_patch_stats, get_sample_id_list

train_pps = get_data_patch_stats(trainLen, seqlen, config)[1]
train_sidx = get_sample_id_list(trainLen, train_pps)
val_pps = get_data_patch_stats(valLen, seqlen, config)[1]
val_sidx = get_sample_id_list(valLen, val_pps)
test_pps = get_data_patch_stats(testLen, seqlen, config)[1]
test_sidx = get_sample_id_list(testLen, test_pps)


In [None]:
########################################
########## Trivial Classifier ##########
########################################
from modules.patch_generator import compute_trivial_preds

trivial_train_preds = compute_trivial_preds(softmax_trainXp, train_sidx, mode=triviral_mode)
trivial_val_preds = compute_trivial_preds(softmax_valXp, val_sidx, mode=triviral_mode)
trivial_test_preds = compute_trivial_preds(softmax_testXp, test_sidx, mode=triviral_mode)

In [None]:
########################################
############ Train Level 2 #############
########################################
from modules.patch_generator import create_histo_dataset
from modules.model import create_clf
from modules.model_trainer import train_clf

histo_trainX = create_histo_dataset(softmax_trainXp, train_sidx, full=True)
histo_valX = create_histo_dataset(softmax_valXp, val_sidx, full=True)
histo_testX = create_histo_dataset(softmax_testXp, test_sidx, full=True)

clf_model_path = os.path.join(setup_path, clf_type + '_classifier.pickle')
if os.path.exists(clf_model_path):
    clf = load(clf_model_path)
else:
    clf = create_clf(clf_type)
    clf = train_clf(clf_model_path, clf, histo_trainX, trainY, histo_valX, valY)

clf_train_pred = clf.predict(histo_trainX)
clf_val_pred = clf.predict(histo_valX)
clf_test_pred = clf.predict(histo_testX)

In [None]:
########################################
############ Train Blackbox ############
########################################
from modules.model_trainer import train_blackbox
from modules.data_generator import DataGenerator_sample

# Generators
params_simple = {'dim': [seqlen, channel], 'batch_size': 32, 'shuffle': False}
train_generator_simple = DataGenerator_sample(np.arange(trainLen), trainX, trainY, **params_simple)
val_generator_simple = DataGenerator_sample(np.arange(valLen), valX, valY, **params_simple)
test_generator_simple = DataGenerator_sample(np.arange(testLen), testX, testY, **params_simple)

blackbox_model_path = os.path.join(model_path, 'blackbox_classifier.h5')
if os.path.exists(blackbox_model_path):
    blackbox_model = load_model(blackbox_model_path)
else:
    blackbox_model = create_model(trainX.shape[1:], classes)
    blackbox_model = train_blackbox(blackbox_model_path, blackbox_model, train_generator_simple, val_generator_simple, epochs=50, verbose=1, workers=1)

bm_train_pred = np.argmax(blackbox_model.predict(train_generator_simple), axis=-1)[:trainLen]
bm_val_pred = np.argmax(blackbox_model.predict(val_generator_simple), axis=-1)[:valLen]
bm_test_pred = np.argmax(blackbox_model.predict(test_generator_simple), axis=-1)[:testLen]

In [None]:
########################################
############# Train SimpleClf ##########
########################################
from modules.model import create_clf
from modules.model_trainer import train_clf, predict_clf

simple_clf_model_path = os.path.join(model_path, 'simpleClf_' + clf_type + '_classifier.pickle')
if os.path.exists(simple_clf_model_path):
    simple_clf_model = load(simple_clf_model_path)
else:
    simple_clf_model = create_clf(clf_type)
    simple_clf_model = train_clf(simple_clf_model_path, simple_clf_model, trainX, trainY, valX, valY)

simple_clf_train_pred = predict_clf(simple_clf_model, trainX)
simple_clf_val_pred = predict_clf(simple_clf_model, valX)
simple_clf_test_pred = predict_clf(simple_clf_model, testX)

In [None]:
########################################
############ Feature Data ##############
########################################
from modules.feature_extractor import compute_relevant_subset_features, create_clf_features

if feature_mode == 'subset':
    feature_path = os.path.join(model_path, 'features_' + feature_mode + '_' + str(feature_subset) + '.pickle')
else:
    feature_path = os.path.join(model_path, 'features_' + feature_mode + '.pickle')

if os.path.exists(feature_path):
    with open(feature_path, 'rb') as f:
        train_features, val_features, test_features = pickle.load(f)
else:
    if feature_mode == 'subset':
        f_mode = 'given'
        relevant_features = compute_relevant_subset_features(trainX, trainY, classes, num=feature_subset)
    else:
        f_mode = 'relevant'
        relevant_features = None
    
    train_features, relevant_features = create_clf_features(trainX, trainY, pre_selected=relevant_features, mode=f_mode, return_names=True)
    val_features = create_clf_features(valX, pre_selected=relevant_features, mode='given')
    test_features = create_clf_features(testX, pre_selected=relevant_features, mode='given')

    with open(feature_path, 'wb') as f:
        pickle.dump([train_features, val_features, test_features], f)

In [None]:
########################################
####### Train Feature SimpleClf ########
########################################
from modules.model import create_clf
from modules.model_trainer import train_clf, predict_clf

simple_clf_model_feature_path = os.path.join(model_path, 'simpleClf_' + clf_type + '_classifier_feature.pickle')
if os.path.exists(simple_clf_model_feature_path):
    simple_clf_feature_model = load(simple_clf_model_feature_path)
else:
    simple_clf_feature_model = create_clf(clf_type)
    simple_clf_feature_model = train_clf(simple_clf_model_feature_path, simple_clf_model, train_features, trainY, val_features, valY)

simple_clf_feature_train_pred = predict_clf(simple_clf_feature_model, train_features)
simple_clf_feature_val_pred = predict_clf(simple_clf_feature_model, val_features)
simple_clf_feature_test_pred = predict_clf(simple_clf_feature_model, test_features)

In [None]:
########################################
######## Train Feature Blackbox ########
########################################
from modules.model import create_dense_model

# Generators
trainXf = np.expand_dims(train_features, axis=-1)
valXf = np.expand_dims(val_features, axis=-1)
testXf = np.expand_dims(test_features, axis=-1)

params_feature_simple = {'dim': [train_features.shape[1], 1], 'batch_size': 32, 'shuffle': False}
train_feature_generator_simple = DataGenerator_sample(np.arange(trainLen), trainXf, trainY, **params_feature_simple)
val_feature_generator_simple = DataGenerator_sample(np.arange(valLen), valXf, valY, **params_feature_simple)
test_feature_generator_simple = DataGenerator_sample(np.arange(testLen), testXf, testY, **params_feature_simple)

blackbox_feature_model_path = os.path.join(model_path, 'blackbox_classifier_feature.h5' if not use_dense else 'blackbox_classifier_feature_dense.h5')
if os.path.exists(blackbox_feature_model_path):
    blackbox_feature_model = load_model(blackbox_feature_model_path)
else:
    if not use_dense:
        blackbox_feature_model = create_model(trainXf.shape[1:], classes)
    else:
        blackbox_feature_model = create_dense_model(trainXf.shape[1:], classes)
    blackbox_feature_model = train_blackbox(blackbox_feature_model_path, blackbox_feature_model, train_feature_generator_simple, val_feature_generator_simple, epochs=50, verbose=1, workers=1)

bfm_train_pred = np.argmax(blackbox_feature_model.predict(train_feature_generator_simple), axis=-1)[:trainLen]
bfm_val_pred = np.argmax(blackbox_feature_model.predict(val_feature_generator_simple), axis=-1)[:valLen]
bfm_test_pred = np.argmax(blackbox_feature_model.predict(test_feature_generator_simple), axis=-1)[:testLen]

In [None]:
########################################
######## Accuracy Statistics ###########
########################################
from modules.statistic_processor import get_classification_report, get_complete_evaluation, get_misclassifications, compute_class_mean
from modules.file_writer import write_to_file

get_classification_report(testY, clf_test_pred, verbose=True)
print('Interpretable')
int_rep = get_complete_evaluation(trainY, valY, testY, clf_train_pred, clf_val_pred, clf_test_pred)

print('Trivial')
triv_rep = get_complete_evaluation(trainY, valY, testY, trivial_train_preds, trivial_val_preds, trivial_test_preds)

print('Blackbox')
black_rep = get_complete_evaluation(trainY, valY, testY, bm_train_pred, bm_val_pred, bm_test_pred)
print('SimpleClf')
simple_rep = get_complete_evaluation(trainY, valY, testY, simple_clf_train_pred, simple_clf_val_pred, simple_clf_test_pred)

print('Feature Blackbox')
black_feature_rep = get_complete_evaluation(trainY, valY, testY, bfm_train_pred, bfm_val_pred, bfm_test_pred)
print('Feature SimpleClf')
simple_feature_rep = get_complete_evaluation(trainY, valY, testY, simple_clf_feature_train_pred, simple_clf_feature_val_pred, simple_clf_feature_test_pred)
            
if not os.path.exists(os.path.join(setup_path, 'accuracy_report_' + clf_type + '.txt')):
    write_to_file(os.path.join(setup_path, 'accuracy_report_' + clf_type + '.txt'), int_rep)

if not os.path.exists(os.path.join(setup_path, 'accuracy_report_trivial.txt')):
    write_to_file(os.path.join(setup_path, 'accuracy_report_trivial.txt'), triv_rep)

if not os.path.exists(os.path.join(model_path, 'accuracy_report.txt')):
    write_to_file(os.path.join(model_path, 'accuracy_report.txt'), black_rep)
if not os.path.exists(os.path.join(model_path, 'accuracy_report_simpleClf_' + clf_type + '.txt')):
    write_to_file(os.path.join(model_path, 'accuracy_report_simpleClf_' + clf_type + '.txt'), simple_rep)

if not os.path.exists(os.path.join(model_path, 'accuracy_report_feature.txt' if not use_dense else 'accuracy_report_feature_dense.txt')):
    write_to_file(os.path.join(model_path, 'accuracy_report_feature.txt' if not use_dense else 'accuracy_report_feature_dense.txt'), black_feature_rep)
if not os.path.exists(os.path.join(model_path, 'accuracy_report_simpleClf_' + clf_type + '_feature.txt')):
    write_to_file(os.path.join(model_path, 'accuracy_report_simpleClf_' + clf_type + '_feature.txt'), simple_feature_rep)

# currently not in use
train_mis = get_misclassifications(trainY, clf_train_pred)
val_mis = get_misclassifications(valY, clf_val_pred)
test_mis = get_misclassifications(testY, clf_test_pred)

train_class_means = compute_class_mean(histo_trainX, trainY)
val_class_means = compute_class_mean(histo_valX, valY)
test_class_means = compute_class_mean(histo_testX, testY)

In [None]:
########################################
########### Plot Statistics ############
########################################
from modules.patch_generator import get_all_patch_params, get_data_patch_stats, get_all_patch, get_patch_params_list
from modules.plot_processor import plot_heatmap, plot_class_means, plot_series_and_dist, plot_patch_and_dist, plot_class_overlay

image_path_bak = image_path
image_path = None

# patch + dist
idx = 0
show_patches = [0]

npc, pps = get_data_patch_stats(trainLen, seqlen, config)
ids = get_all_patch_params(idx, npc, pps)
samples = get_all_patch(ids, trainX, trainLen, seqlen, config, zero, attach, notemp)

patch_dists = patch_model.predict(samples)
param_list = get_patch_params_list(ids, trainLen, seqlen, config)

for i in show_patches:
    plot_patch_and_dist(idx, trainX[idx], trainY[idx], np.argmax(patch_dists[i]), patch_dists[i], param_list[i], patch=i, save=image_path)

# complete sample
idx = 0

npc, pps = get_data_patch_stats(trainLen, seqlen, config)
ids = get_all_patch_params(idx, npc, pps)
samples = get_all_patch(ids, trainX, trainLen, seqlen, config, zero, attach, notemp)

patch_preds = patch_model.predict(samples)
plot_series_and_dist(idx, trainX[idx], trainY[idx], clf_train_pred[idx], patch_preds, save=image_path)

# class overlay
idx = 0
only_classes = None#[0,1]
ids = get_all_patch_params(idx, npc, pps)
samples = get_all_patch(ids, trainX, trainLen, seqlen, config, zero, attach, notemp)

patch_dists = patch_model.predict(samples)
param_list = get_patch_params_list(ids, trainLen, seqlen, config)
plot_class_overlay(idx, trainX[idx], trainY[idx], clf_train_pred[idx], patch_dists, param_list, only_classes=only_classes, save=image_path)

# class means
#plot_heatmap(train_class_means)
#plot_class_means(np.expand_dims(train_class_means[0], axis=0))