In [None]:
import numpy as np
import os
import tensorflow as tf

from joblib import load

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
tf.keras.backend.set_session(sess)

import sys
sys.path.append("../")
%load_ext autoreload
%autoreload 2

In [None]:
########################################
########### Prepare Data ###############
########################################
from data_processor import generate_data

path = '../../data//character_trajectories/dataset_steps-20_timesteps-206.pickle'
trainX, trainY, valX, valY, testX, testY, classes, seqlen, channel = generate_data(path, create_val=True, verbose=1)

trainLen, valLen, testLen = trainX.shape[0], valX.shape[0], testX.shape[0]

set_name = path.split(os.sep)[-2]
model_path = os.path.join('../../models', set_name)
img_path = os.path.join('../../images', set_name)
if not os.path.exists(model_path):
    os.makedirs(model_path)
if not os.path.exists(img_path):
    os.makedirs(img_path)

In [None]:
# not modularized

def validate_and_adjust_settings(zero, attach, notemp):
    # 0 0 0 invalid
    # 1 0 0 valid
    # 0 1 0 valid
    # 1 1 0 valid
    # 0 0 1 invalid
    # 1 0 1 valid
    # 0 1 1 invalid
    # 1 1 1 valid
    if zero == 0 and attach == 0:
        return 1, attach, notemp
    if attach == 0 and notemp == 0:
        return 1, attach, notemp
    if notemp == 1:
        return 1, attach, notemp
    return zero, attach, notemp

def define_setup(config, zero, attach, notemp):
    s = 'strides_'
    l = 'length_'
    for c in config:
        s += str(c[0]) + '-'
        l += str(c[1]) + '-'
    s = s[:-1] + '_' + l[:-1] +'_zero-'
    s += '1' if zero else '0'
    s += '_attach-'
    s += '1' if attach else '0'
    s += '_notemp-' 
    s += '1' if notemp else '0'
    return s

In [None]:
########################################
############# Patch Data ###############
########################################
from modules.data_generator import DataGenerator
from modules.patch_generator_batch import get_generator_id_list

# [Stride, Length]
config = [[5,10]]
zero, attach, notemp = True, True, False
zero, attach, notemp = validate_and_adjust_settings(zero, attach, notemp)

params = {'dim': [seqlen, channel], 'batch_size': 1024, 'config': config,
          'zero': zero, 'attach': attach, 'notemp': notemp, 'shuffle': False}

clf_type = 'svm'

setup = define_setup(config, zero, attach, notemp)
setup_path = os.path.join(model_path, setup)
if not os.path.exists(setup_path):
    os.makedirs(setup_path)
image_path = os.path.join(img_path, setup, clf_type)
if not os.path.exists(image_path):
    os.makedirs(image_path)

# Generators
trainIds = get_generator_id_list(trainLen, seqlen, config)
train_generator = DataGenerator(trainIds, trainX, trainY, **params)
valIds = get_generator_id_list(valLen, seqlen, config)
val_generator = DataGenerator(valIds, valX, valY, **params)
testIds = get_generator_id_list(testLen, seqlen, config)
test_generator = DataGenerator(testIds, testX, testY, **params)

In [None]:
########################################
############ Train Level 1 #############
########################################
from modules.model import create_model
from modules.model_trainer import train_descriptive

input_shape = trainX.shape[1:]
if attach:
    input_shape = list(input_shape)
    input_shape[-1] +=1
    input_shape = tuple(input_shape)
patch_model_path = os.path.join(setup_path, 'patch_classifier.h5')

if os.path.exists(patch_model_path):
    patch_model = tf.keras.models.load_model(patch_model_path)
else:
    patch_model = create_model(input_shape, classes)
    patch_model = train_descriptive(patch_model_path, patch_model, trainIds, valIds, trainX, trainY, valX, valY, params, thresh=0.0, verbose=1, workers=1)

softmax_trainXp = patch_model.predict(train_generator)[:len(trainIds)]
softmax_valXp = patch_model.predict(val_generator)[:len(valIds)]
softmax_testXp = patch_model.predict(test_generator)[:len(testIds)]

In [None]:
########################################
############ Train Level 2 #############
########################################
from modules.patch_generator import get_sample_id_list, create_histo_dataset, get_data_patch_stats
from modules.model import create_clf
from modules.model_trainer import train_clf

train_pps = get_data_patch_stats(trainLen, seqlen, config)[1]
train_sidx = get_sample_id_list(trainLen, train_pps)
val_pps = get_data_patch_stats(valLen, seqlen, config)[1]
val_sidx = get_sample_id_list(valLen, val_pps)
test_pps = get_data_patch_stats(testLen, seqlen, config)[1]
test_sidx = get_sample_id_list(testLen, test_pps)

histo_trainX = create_histo_dataset(softmax_trainXp, train_sidx)
histo_valX = create_histo_dataset(softmax_valXp, val_sidx)
histo_testX = create_histo_dataset(softmax_testXp, test_sidx)

clf_model_path = os.path.join(setup_path, clf_type + '_classifier.pickle')
if os.path.exists(clf_model_path):
    clf = load(clf_model_path)
else:
    clf = create_clf(clf_type)
    clf = train_clf(clf_model_path, clf, histo_trainX, trainY, histo_valX, valY)

clf_train_pred = clf.predict(histo_trainX)
clf_val_pred = clf.predict(histo_valX)
clf_test_pred = clf.predict(histo_testX)

In [None]:
########################################
######## Accuracy Statistics ###########
########################################
from modules.statistic_processor import get_classification_report, get_complete_evaluation, get_misclassifications, compute_class_mean
from modules.file_writer import write_to_file

get_classification_report(testY, clf_test_pred, complete=True)
print('Interpretable')
int_rep = get_complete_evaluation(trainY, valY, testY, clf_train_pred, clf_val_pred, clf_test_pred)

train_mis = get_misclassifications(trainY, clf_train_pred)
val_mis = get_misclassifications(valY, clf_val_pred)
test_mis = get_misclassifications(testY, clf_test_pred)

In [None]:
########################################
########### Plot Statistics ############
########################################
from modules.patch_generator import get_all_patch_params, get_data_patch_stats, get_all_patch, get_patch_params_list
from modules.plot_processor import plot_heatmap, plot_class_means, plot_series_and_dist, plot_patch_and_dist, plot_class_overlay

# patch + dist
idx = 789
show_patches = [0]

npc, pps = get_data_patch_stats(trainLen, seqlen, config)
ids = get_all_patch_params(idx, npc, pps)
samples = get_all_patch(ids, trainX, trainLen, seqlen, config, zero, attach, notemp)

patch_dists = patch_model.predict(samples)
param_list = get_patch_params_list(ids, trainLen, seqlen, config)

for i in show_patches:
    plot_patch_and_dist(idx, trainX[idx], trainY[idx], np.argmax(patch_dists[i]), patch_dists[i], param_list[i], patch=i)#, save=image_path)

# complete sample
idx = 789

npc, pps = get_data_patch_stats(trainLen, seqlen, config)
ids = get_all_patch_params(idx, npc, pps)
samples = get_all_patch(ids, trainX, trainLen, seqlen, config, zero, attach, notemp)

patch_preds = patch_model.predict(samples)
plot_series_and_dist(idx, trainX[idx], trainY[idx], clf_train_pred[idx], patch_preds)#, save=image_path)

# class overlay
idx = 789
only_classes = [4,10]
ids = get_all_patch_params(idx, npc, pps)
samples = get_all_patch(ids, trainX, trainLen, seqlen, config, zero, attach, notemp)

patch_dists = patch_model.predict(samples)
param_list = get_patch_params_list(ids, trainLen, seqlen, config)
plot_class_overlay(idx, trainX[idx], trainY[idx], clf_train_pred[idx], patch_dists, param_list, only_classes=only_classes)#, save=image_path)

# class means
#plot_heatmap(train_class_means)
#plot_class_means(np.expand_dims(train_class_means[0], axis=0))

In [None]:
import matplotlib.pyplot as plt

def sort_by_class(dataY):
    result = [np.where(dataY==i)[0] for i in np.unique(dataY)]
    return result

def plot_class_sample(data, dataSort, c, s):
    plt.plot(data[dataSort[c][s]])
    plt.show()
    
def plot_class_slice(data, dataSort):
    n_rows = int(np.sqrt(len(dataSort)))
    n_cols = int(np.ceil(len(dataSort) / n_rows))
    g, b = plt.subplots(n_rows, n_cols, figsize=(20, 4*n_rows))
    for i in range(len(dataSort)):
        x = i % n_cols
        y = i // n_cols
        b[y][x].plot(data[dataSort[i][0]])
        b[y][x].set_title('Class: %s' % (i))
        
    plt.subplots_adjust(wspace=0.4, hspace=0.5)
    plt.show()

In [None]:
trainSort = sort_by_class(trainY)

In [None]:
#plot_class_sample(trainX, trainSort, 0, 0)

In [None]:
#plot_class_slice(trainX, trainSort)

In [None]:
#ONLY FOR CHARACTER
import scipy.io 

mat = scipy.io.loadmat('/home/Data/Timeseries/Classification/character_trajectories/mixoutALL_shifted.mat')
chars = np.array([d[0] for d in np.squeeze(mat['consts'][0][0][3])])

s = ''
for i, c in enumerate(chars):
    s += str(i) + ': ' + c + ' | '
s = s[:-2]
print(s)

In [None]:
def convert_to_plot(xy_data):
    grid_dir = int(np.ceil(np.max(np.sum(abs(xy_data), axis=0))))

    dots = []
    x_pos, y_pos, tip = 0, 0, 1
    dots.append([x_pos, y_pos, tip])
    for (x, y, t) in xy_data:
        x_pos += x
        y_pos += y
        tip += t
        dots.append([x_pos, y_pos, tip])

    dots = np.transpose(np.array(dots), [1,0])
    return dots

def plot_dots(dots):
    plt.scatter(dots[0], dots[1], c=dots[2], cmap='Greys')
    plt.show()

In [None]:
c = 16
s = 0

xy_data = trainX[trainSort[c][s]]

dots = convert_to_plot(xy_data)
#plot_dots(dots)

In [None]:
def get_window(org, syn):
    ts = 0
    diff = False
    window = []
    for i in range(org.shape[0]):
        if np.any(org[i] != syn[i]):
            if not diff:
                diff = True
                ts = i
        elif diff:
            window.append([ts, i-1])
            diff = False
    return np.array(window)

def compare_idx(data, labels, idx, idx2, chars):
    org = data[idx]
    syn = data[idx2]

    window = get_window(org, syn)

    org_plot = convert_to_plot(org)
    syn_plot = convert_to_plot(syn)
    
    labels = [chars[labels[i]] for i in [idx, idx2]]
    return org, syn, org_plot, syn_plot, window, labels


def plot_compare(org, syn, org_plot, syn_plot, window, labels, norm =False):
    fig, ax = plt.subplots(2,2, figsize=(10,8))
    fig.suptitle('GT 1: %s | GT 2: %s' % (labels[0], labels[1]))
    ax[0,0].plot(org)
    ax[0,1].plot(syn)
    
    if norm:
        mi, ma = np.min([org_plot[1], syn_plot[1]]), np.max([org_plot[1], syn_plot[1]])
        ax[1,0].set_ylim((mi,ma))
        ax[1,1].set_ylim((mi,ma))
    
    ax[1,0].scatter(org_plot[0], org_plot[1], c=org_plot[2], cmap='Greys')
    ax[1,1].scatter(syn_plot[0], syn_plot[1], c=syn_plot[2], cmap='Greys')
    
    for w in window:
        ax[0,0].axvspan(w[0], w[1], alpha=0.5, color='yellow')
        ax[0,1].axvspan(w[0], w[1], alpha=0.5, color='yellow')
        
        ax[1,0].scatter(org_plot[0][w[0]:w[1]], org_plot[1][w[0]:w[1]], c='yellow', marker='.')
        ax[1,1].scatter(syn_plot[0][w[0]:w[1]], syn_plot[1][w[0]:w[1]], c='yellow', marker='.')
    
    plt.show()

def plot_highlight(sid, org, org_plot, window, label, label_names=None, norm =False, save=None):
    fig, ax = plt.subplots(figsize=(5,4))
    fig.suptitle('Class distribtuion')

    # sample
    ax.set_title('Sample ID: ' + str(sid) + ' | Label: ' +
                 str(label_names[label]))
    ax.set_xlabel('Timesteps')
    ax.set_ylabel('Value')
        
    if norm:
        mi, ma = np.min(org_plot[1]), np.max(org_plot[1])
        ax.set_ylim((mi,ma))
        
    ax.scatter(org_plot[0], org_plot[1], c=org_plot[2], cmap='Greys')
    
    for w in window:
        ax.scatter(org_plot[0][w[0]:w[1]], org_plot[1][w[0]:w[1]], c='yellow', marker='.')

    if not save is None:
        fname = 'Class_overlay_char_' + str(sid)
        if not only_classes is None:
            fname += '_c-' + str(label) 
            plt.savefig(os.path.join(save, fname), dpi=300)
        
    plt.show()

In [None]:
sample_1 = 1195#trainSort[0][0]
sample_2 = trainSort[15][0]

org, syn, org_plot, syn_plot, window, labels = compare_idx(trainX, trainY, sample_1, sample_2, chars)

p1 = 5
p2 = 19
window = [[p1*5, p2*5+10]]

#plot_compare(org, syn, org_plot, syn_plot, window, labels)
plot_highlight(sample_2, syn, syn_plot, window, 15, chars, save=image_path)

In [None]:
import matplotlib

def plot_description(sid, data, label, pred, patch_dists, param_list, only_classes=None, label_names=None, save=None):
    fig, ax = plt.subplots(figsize=(5, 4))
    fig.suptitle('Class distribtuion')

    # sample
    ax.set_title('Sample ID: ' + str(sid) + ' | Label: ' +
                 str(label_names[label]) + ' | Pred: ' + str(label_names[pred]))
    ax.set_xlabel('Timesteps')
    ax.set_ylabel('Value')
    dots = convert_to_plot(data)
    plt.scatter(dots[0], dots[1], c=dots[2], cmap='Greys')

    # dist
    alpha = np.max(patch_dists, axis=1)
    color = np.argmax(patch_dists, axis=1)

    #color range
    if patch_dists.shape[1] > 10:
        cm = plt.get_cmap('tab20')
        cNorm = matplotlib.colors.Normalize(
            vmin=0, vmax=patch_dists.shape[1]-1)
        scalarMap = matplotlib.cm.ScalarMappable(
            norm=cNorm, cmap=cm)
        color_val = [scalarMap.to_rgba(c) for c in np.arange(patch_dists.shape[1])]
    else: 
        color_val = np.array(['C' + str(c) for c in np.arange(patch_dists.shape[1])])

    for c in np.unique(color):
        if not only_classes is None:
            if not c in only_classes:
                continue
        dot_ids = np.where(color==c)[0]
        dot_part = set()
        for i in dot_ids:
            dot_part.update(np.arange(param_list[i, 1], param_list[i, 2]))
        dot_part = list(dot_part)
        ax.scatter(dots[0][dot_part], dots[1][dot_part], color=color_val[c], label='Class: ' + str(label_names[c]))
    
        plt.legend()

    if not save is None:
        fname = 'Class_overlay_char_' + str(sid)
        if not only_classes is None:
            fname += '_c' 
            for c in only_classes:
                fname += '-' + str(c)
        plt.savefig(os.path.join(save, fname), dpi=300)

    plt.show()
    
idx = 1195
only_classes = [15]
plot_description(idx, trainX[idx], trainY[idx], clf_train_pred[idx], patch_dists, param_list, only_classes=only_classes, label_names=chars, save=image_path)