In [2]:
%load_ext autoreload
%autoreload 2

import re
import numpy as np
import pandas as pd
import subprocess

from matplotlib import pyplot as plt
from scipy.io import arff
from scipy.stats import zscore

from DBA_multivariate import performDBA
from search_subsequence import search_dtw, decision, search_ed, OUPUT_STARTS_REGEXP, OUPUT_BASE_REGEXP

# Эксперимент 1
## Сравнение производителности с оптимизациями и без них
**Датасет**: рукописные цифры

In [3]:
keys = pd.read_csv("../data/search_subseries/character/character_trajectories_labels.csv", index_col=0)
warp_window = 0.05
subseq_len = 100
closest_series_num = 80 
character = 9
true_subseq_len = 182

### Усреднение: DBA

In [4]:
for character in range(1, 21):
    closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/character/character_trajectories.csv",
                     "../data/search_subseries/character/dba_averaged_{0}.csv".format(character), 1)
    expected = np.array(list(closest.keys()))
    true_starts = keys[keys.labels == character].start.values

    print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
        character,
        decision(true_starts, expected, 182, subseq_len),
        elapsed))

character:   1 | 0.932 | 1.869 
character:   2 | 0.793 | 2.114 
character:   3 | 0.932 | 1.675 
character:   4 | 0.924 | 1.514 
character:   5 | 0.975 | 2.656 
character:   6 | 0.912 | 1.814 
character:   7 | 0.843 | 1.350 
character:   8 | 0.787 | 1.134 
character:   9 | 1.000 | 2.523 
character:  10 | 0.566 | 1.223 
character:  11 | 0.983 | 2.007 
character:  12 | 0.887 | 2.066 
character:  13 | 0.860 | 1.764 
character:  14 | 0.511 | 2.160 
character:  15 | 0.981 | 2.337 
character:  16 | 0.755 | 1.418 
character:  17 | 1.000 | 2.288 
character:  18 | 0.720 | 1.632 
character:  19 | 0.804 | 2.149 
character:  20 | 0.973 | 2.688 


In [5]:
for character in range(1, 21):
    closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/character/character_trajectories.csv",
                     "../data/search_subseries/character/dba_averaged_{0}.csv".format(character), 2)
    expected = np.array(list(closest.keys()))
    true_starts = keys[keys.labels == character].start.values

    print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
        character,
        decision(true_starts, expected, 182, subseq_len),
        elapsed))

character:   1 | 0.877 | 1.258 
character:   2 | 0.741 | 1.374 
character:   3 | 0.898 | 1.072 
character:   4 | 0.924 | 0.754 
character:   5 | 0.975 | 1.103 
character:   6 | 0.877 | 1.255 
character:   7 | 0.804 | 0.917 
character:   8 | 0.853 | 0.848 
character:   9 | 1.000 | 1.547 
character:  10 | 0.528 | 0.840 
character:  11 | 0.948 | 1.379 
character:  12 | 0.792 | 1.486 
character:  13 | 0.740 | 1.214 
character:  14 | 0.447 | 1.457 
character:  15 | 0.981 | 1.564 
character:  16 | 0.623 | 1.021 
character:  17 | 1.000 | 1.606 
character:  18 | 0.660 | 1.043 
character:  19 | 0.804 | 1.562 
character:  20 | 0.890 | 1.807 


### Усреднение: mean

In [6]:
for character in range(1, 21):
    closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/character/character_trajectories.csv",
                     "../data/search_subseries/character/averaged_{0}.csv".format(character), 1)
    expected = np.array(list(closest.keys()))
    true_starts = keys[keys.labels == character].start.values

    print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
        character,
        decision(true_starts, expected, 182, subseq_len),
        elapsed))

character:   1 | 0.986 | 1.946 
character:   2 | 0.879 | 1.926 
character:   3 | 0.898 | 1.566 
character:   4 | 0.985 | 1.379 
character:   5 | 0.988 | 1.520 
character:   6 | 0.965 | 1.810 
character:   7 | 0.882 | 1.448 
character:   8 | 0.880 | 1.323 
character:   9 | 1.000 | 2.738 
character:  10 | 0.585 | 1.368 
character:  11 | 1.000 | 1.871 
character:  12 | 0.717 | 1.989 
character:  13 | 0.920 | 1.649 
character:  14 | 0.936 | 1.977 
character:  15 | 0.981 | 2.569 
character:  16 | 0.830 | 1.478 
character:  17 | 1.000 | 2.285 
character:  18 | 0.600 | 1.573 
character:  19 | 0.893 | 1.986 
character:  20 | 0.945 | 2.620 


In [7]:
for character in range(1, 21):
    closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/character/character_trajectories.csv",
                     "../data/search_subseries/character/averaged_{0}.csv".format(character), 2)
    expected = np.array(list(closest.keys()))
    true_starts = keys[keys.labels == character].start.values

    print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
        character,
        decision(true_starts, expected, 182, subseq_len),
        elapsed))

character:   1 | 0.863 | 1.201 
character:   2 | 0.828 | 1.353 
character:   3 | 0.881 | 1.077 
character:   4 | 0.970 | 0.995 
character:   5 | 0.988 | 1.034 
character:   6 | 0.947 | 1.376 
character:   7 | 0.863 | 0.956 
character:   8 | 0.893 | 0.952 
character:   9 | 0.980 | 1.608 
character:  10 | 0.547 | 0.925 
character:  11 | 0.948 | 1.251 
character:  12 | 0.717 | 1.447 
character:  13 | 0.780 | 1.163 
character:  14 | 0.872 | 1.377 
character:  15 | 1.000 | 1.632 
character:  16 | 0.660 | 1.017 
character:  17 | 1.000 | 1.557 
character:  18 | 0.540 | 1.020 
character:  19 | 0.893 | 1.343 
character:  20 | 0.918 | 1.829 


## Без оптимизаций

### Усреднение: DBA

In [8]:
for character in range(1, 21):
    closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/character/character_trajectories.csv",
                     "../data/search_subseries/character/dba_averaged_{0}.csv".format(character),
                     2, optimize=False)
    expected = np.array(list(closest.keys()))
    true_starts = keys[keys.labels == character].start.values

    print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
        character,
        decision(true_starts, expected, 182, subseq_len),
        elapsed))

character:   1 | 0.877 | 1.289 
character:   2 | 0.741 | 1.379 
character:   3 | 0.898 | 1.080 
character:   4 | 0.924 | 0.746 
character:   5 | 0.975 | 1.095 
character:   6 | 0.877 | 1.263 
character:   7 | 0.804 | 0.924 
character:   8 | 0.853 | 0.843 
character:   9 | 1.000 | 1.559 
character:  10 | 0.528 | 0.843 
character:  11 | 0.948 | 1.384 
character:  12 | 0.792 | 1.489 
character:  13 | 0.740 | 1.210 
character:  14 | 0.447 | 1.459 
character:  15 | 0.981 | 1.573 
character:  16 | 0.623 | 1.034 
character:  17 | 1.000 | 1.608 
character:  18 | 0.660 | 1.040 
character:  19 | 0.804 | 1.569 
character:  20 | 0.890 | 1.798 


In [10]:
for character in range(1, 21):
    closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/character/character_trajectories.csv",
                     "../data/search_subseries/character/dba_averaged_{0}.csv".format(character),
                     1, optimize=False)
    expected = np.array(list(closest.keys()))
    true_starts = keys[keys.labels == character].start.values

    print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
        character,
        decision(true_starts, expected, 182, subseq_len),
        elapsed))

character:   1 | 0.932 | 1.930 
character:   2 | 0.793 | 1.986 
character:   3 | 0.932 | 1.912 
character:   4 | 0.924 | 1.150 
character:   5 | 0.975 | 1.806 
character:   6 | 0.912 | 2.222 
character:   7 | 0.843 | 1.655 
character:   8 | 0.787 | 1.184 
character:   9 | 1.000 | 3.132 
character:  10 | 0.566 | 1.325 
character:  11 | 0.983 | 2.382 
character:  12 | 0.887 | 2.415 
character:  13 | 0.860 | 1.798 
character:  14 | 0.511 | 2.216 
character:  15 | 0.981 | 2.368 
character:  16 | 0.755 | 1.455 
character:  17 | 1.000 | 2.486 
character:  18 | 0.720 | 1.827 
character:  19 | 0.804 | 2.217 
character:  20 | 0.973 | 2.742 


### Усреднение: mean

In [11]:
for character in range(1, 21):
    closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/character/character_trajectories.csv",
                     "../data/search_subseries/character/averaged_{0}.csv".format(character),
                     1, optimize=False)
    expected = np.array(list(closest.keys()))
    true_starts = keys[keys.labels == character].start.values

    print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
        character,
        decision(true_starts, expected, 182, subseq_len),
        elapsed))

character:   1 | 0.986 | 1.787 
character:   2 | 0.879 | 2.242 
character:   3 | 0.898 | 1.980 
character:   4 | 0.985 | 1.668 
character:   5 | 0.988 | 1.992 
character:   6 | 0.965 | 1.950 
character:   7 | 0.882 | 1.669 
character:   8 | 0.880 | 1.410 
character:   9 | 1.000 | 2.819 
character:  10 | 0.585 | 1.407 
character:  11 | 1.000 | 2.116 
character:  12 | 0.717 | 2.480 
character:  13 | 0.920 | 1.748 
character:  14 | 0.936 | 2.454 
character:  15 | 0.981 | 2.704 
character:  16 | 0.830 | 1.519 
character:  17 | 1.000 | 2.377 
character:  18 | 0.600 | 1.763 
character:  19 | 0.893 | 2.281 
character:  20 | 0.945 | 2.932 


In [12]:
for character in range(1, 21):
    closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/character/character_trajectories.csv",
                     "../data/search_subseries/character/averaged_{0}.csv".format(character),
                     2, optimize=False)
    expected = np.array(list(closest.keys()))
    true_starts = keys[keys.labels == character].start.values

    print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
        character,
        decision(true_starts, expected, 182, subseq_len),
        elapsed))

character:   1 | 0.863 | 1.510 
character:   2 | 0.828 | 1.520 
character:   3 | 0.881 | 1.185 
character:   4 | 0.970 | 1.031 
character:   5 | 0.988 | 1.079 
character:   6 | 0.947 | 1.471 
character:   7 | 0.863 | 1.145 
character:   8 | 0.893 | 1.331 
character:   9 | 0.980 | 2.044 
character:  10 | 0.547 | 1.146 
character:  11 | 0.948 | 1.389 
character:  12 | 0.717 | 1.615 
character:  13 | 0.780 | 1.431 
character:  14 | 0.872 | 1.553 
character:  15 | 1.000 | 1.662 
character:  16 | 0.660 | 1.246 
character:  17 | 1.000 | 1.902 
character:  18 | 0.540 | 1.023 
character:  19 | 0.893 | 1.584 
character:  20 | 0.918 | 1.857 


## Эпилепсия

In [15]:
keys = np.genfromtxt("../data/search_subseries/epi_series_epi_ranges.csv")
real_len = 206
real_starts = keys[:, 0]
closest_series_num = 20
subseq_len = 140
warp_window = 0.5

In [17]:
closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/epi_series.csv",
                     "../data/search_subseries/average_attack.csv",
                     2, optimize=False)
expected = np.array(list(closest.keys()))

print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
    0,
    decision(real_starts, expected, real_len, subseq_len),
    elapsed))

character:   0 | 0.750 | 19.129 
