In [4]:
import re
import numpy as np
import pandas as pd
import subprocess

from matplotlib import pyplot as plt
from scipy.io import arff
from scipy.stats import zscore

from DBA_multivariate import performDBA
from search_subsequence import search_dtw, decision, search_ed, OUPUT_STARTS_REGEXP, OUPUT_BASE_REGEXP

# Эксперимент 1
## Сравнение производителности с оптимизациями и без них
**Датасет**: рукописные цифры

In [5]:
keys = pd.read_csv("../data/search_subseries/character/character_trajectories_labels.csv", index_col=0)
warp_window = 0.05
subseq_len = 100
closest_series_num = 80 
character = 9
true_subseq_len = 182

### Усреднение: DBA

In [6]:
for character in range(1, 21):
    closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/character/character_trajectories.csv",
                     "../data/search_subseries/character/dba_averaged_{0}.csv".format(character),
                     1)
    expected = np.array(list(closest.keys()))
    true_starts = keys[keys.labels == character].start.values

    print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
        character,
        decision(true_starts, expected, 182, subseq_len),
        elapsed))

character:   1 | 0.932 | 1.838 
character:   2 | 0.793 | 2.030 
character:   3 | 0.932 | 1.628 
character:   4 | 0.924 | 1.305 
character:   5 | 0.975 | 2.001 
character:   6 | 0.912 | 1.736 
character:   7 | 0.843 | 1.321 
character:   8 | 0.787 | 1.176 
character:   9 | 1.000 | 2.533 
character:  10 | 0.566 | 1.233 
character:  11 | 0.983 | 2.172 
character:  12 | 0.887 | 2.077 
character:  13 | 0.860 | 1.763 
character:  14 | 0.511 | 2.149 
character:  15 | 0.981 | 2.384 
character:  16 | 0.755 | 1.444 
character:  17 | 1.000 | 2.278 
character:  18 | 0.720 | 1.644 
character:  19 | 0.804 | 2.239 
character:  20 | 0.973 | 2.747 


In [7]:
for character in range(1, 21):
    closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/character/character_trajectories.csv",
                     "../data/search_subseries/character/dba_averaged_{0}.csv".format(character),
                     2)
    expected = np.array(list(closest.keys()))
    true_starts = keys[keys.labels == character].start.values

    print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
        character,
        decision(true_starts, expected, 182, subseq_len),
        elapsed))

character:   1 | 0.877 | 1.263 
character:   2 | 0.741 | 1.381 
character:   3 | 0.898 | 1.086 
character:   4 | 0.924 | 0.740 
character:   5 | 0.975 | 1.096 
character:   6 | 0.877 | 1.261 
character:   7 | 0.804 | 0.905 
character:   8 | 0.853 | 0.842 
character:   9 | 1.000 | 1.548 
character:  10 | 0.528 | 0.833 
character:  11 | 0.948 | 1.386 
character:  12 | 0.792 | 1.488 
character:  13 | 0.740 | 1.206 
character:  14 | 0.447 | 1.448 
character:  15 | 0.981 | 1.566 
character:  16 | 0.623 | 1.026 
character:  17 | 1.000 | 1.588 
character:  18 | 0.660 | 1.026 
character:  19 | 0.804 | 1.554 
character:  20 | 0.890 | 1.804 


### Усреднение: mean

In [8]:
for character in range(1, 21):
    closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/character/character_trajectories.csv",
                     "../data/search_subseries/character/averaged_{0}.csv".format(character),
                     1)
    expected = np.array(list(closest.keys()))
    true_starts = keys[keys.labels == character].start.values

    print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
        character,
        decision(true_starts, expected, 182, subseq_len),
        elapsed))

character:   1 | 0.986 | 1.777 
character:   2 | 0.879 | 1.814 
character:   3 | 0.898 | 1.558 
character:   4 | 0.985 | 1.337 
character:   5 | 0.988 | 1.505 
character:   6 | 0.965 | 1.781 
character:   7 | 0.882 | 1.426 
character:   8 | 0.880 | 1.294 
character:   9 | 1.000 | 2.692 
character:  10 | 0.585 | 1.372 
character:  11 | 1.000 | 1.849 
character:  12 | 0.717 | 1.999 
character:  13 | 0.920 | 1.625 
character:  14 | 0.936 | 1.978 
character:  15 | 0.981 | 2.566 
character:  16 | 0.830 | 1.471 
character:  17 | 1.000 | 2.261 
character:  18 | 0.600 | 1.551 
character:  19 | 0.893 | 1.980 
character:  20 | 0.945 | 2.588 


In [9]:
for character in range(1, 21):
    closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/character/character_trajectories.csv",
                     "../data/search_subseries/character/averaged_{0}.csv".format(character),
                     2)
    expected = np.array(list(closest.keys()))
    true_starts = keys[keys.labels == character].start.values

    print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
        character,
        decision(true_starts, expected, 182, subseq_len),
        elapsed))

character:   1 | 0.863 | 1.188 
character:   2 | 0.828 | 1.345 
character:   3 | 0.881 | 1.082 
character:   4 | 0.970 | 0.983 
character:   5 | 0.988 | 1.017 
character:   6 | 0.947 | 1.339 
character:   7 | 0.863 | 0.976 
character:   8 | 0.893 | 0.961 
character:   9 | 0.980 | 1.610 
character:  10 | 0.547 | 0.924 
character:  11 | 0.948 | 1.257 
character:  12 | 0.717 | 1.451 
character:  13 | 0.780 | 1.167 
character:  14 | 0.872 | 1.367 
character:  15 | 1.000 | 1.603 
character:  16 | 0.660 | 1.010 
character:  17 | 1.000 | 1.575 
character:  18 | 0.540 | 1.012 
character:  19 | 0.893 | 1.305 
character:  20 | 0.918 | 1.830 


## Без оптимизаций

### Усреднение: DBA

In [10]:
for character in range(1, 21):
    closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/character/character_trajectories.csv",
                     "../data/search_subseries/character/dba_averaged_{0}.csv".format(character),
                     1, optimize=False)
    expected = np.array(list(closest.keys()))
    true_starts = keys[keys.labels == character].start.values

    print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
        character,
        decision(true_starts, expected, 182, subseq_len),
        elapsed))

character:   1 | 0.932 | 9.312 
character:   2 | 0.793 | 9.555 
character:   3 | 0.932 | 10.118 
character:   4 | 0.924 | 10.494 
character:   5 | 0.975 | 13.221 
character:   6 | 0.912 | 11.374 
character:   7 | 0.843 | 10.596 
character:   8 | 0.787 | 11.748 
character:   9 | 1.000 | 10.428 
character:  10 | 0.566 | 10.272 
character:  11 | 0.983 | 10.975 
character:  12 | 0.887 | 11.123 
character:  13 | 0.860 | 11.498 
character:  14 | 0.511 | 10.121 
character:  15 | 0.981 | 10.024 
character:  16 | 0.755 | 9.693 
character:  17 | 1.000 | 9.724 
character:  18 | 0.720 | 10.645 
character:  19 | 0.804 | 9.809 
character:  20 | 0.973 | 10.148 


In [11]:
for character in range(1, 21):
    closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/character/character_trajectories.csv",
                     "../data/search_subseries/character/dba_averaged_{0}.csv".format(character),
                     2, optimize=False)
    expected = np.array(list(closest.keys()))
    true_starts = keys[keys.labels == character].start.values

    print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
        character,
        decision(true_starts, expected, 182, subseq_len),
        elapsed))

character:   1 | 0.877 | 9.177 
character:   2 | 0.741 | 9.319 
character:   3 | 0.898 | 9.651 
character:   4 | 0.924 | 10.228 
character:   5 | 0.975 | 8.943 
character:   6 | 0.877 | 9.390 
character:   7 | 0.804 | 9.119 
character:   8 | 0.853 | 9.057 
character:   9 | 1.000 | 9.123 
character:  10 | 0.528 | 9.061 
character:  11 | 0.948 | 9.012 
character:  12 | 0.792 | 9.257 
character:  13 | 0.740 | 8.960 
character:  14 | 0.447 | 9.127 
character:  15 | 0.981 | 9.064 
character:  16 | 0.623 | 9.080 
character:  17 | 1.000 | 9.060 
character:  18 | 0.660 | 9.021 
character:  19 | 0.804 | 9.100 
character:  20 | 0.890 | 9.010 


### Усреднение: mean

In [12]:
for character in range(1, 21):
    closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/character/character_trajectories.csv",
                     "../data/search_subseries/character/averaged_{0}.csv".format(character),
                     1, optimize=False)
    expected = np.array(list(closest.keys()))
    true_starts = keys[keys.labels == character].start.values

    print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
        character,
        decision(true_starts, expected, 182, subseq_len),
        elapsed))

character:   1 | 0.986 | 9.442 
character:   2 | 0.879 | 9.389 
character:   3 | 0.898 | 9.367 
character:   4 | 0.985 | 9.411 
character:   5 | 0.988 | 9.379 
character:   6 | 0.965 | 9.390 
character:   7 | 0.882 | 9.368 
character:   8 | 0.880 | 9.352 
character:   9 | 1.000 | 9.446 
character:  10 | 0.585 | 9.386 
character:  11 | 1.000 | 9.424 
character:  12 | 0.717 | 9.413 
character:  13 | 0.920 | 9.426 
character:  14 | 0.936 | 9.384 
character:  15 | 0.981 | 9.414 
character:  16 | 0.830 | 9.400 
character:  17 | 1.000 | 9.517 
character:  18 | 0.600 | 9.548 
character:  19 | 0.893 | 9.423 
character:  20 | 0.945 | 9.426 


In [13]:
for character in range(1, 21):
    closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/character/character_trajectories.csv",
                     "../data/search_subseries/character/averaged_{0}.csv".format(character),
                     2, optimize=False)
    expected = np.array(list(closest.keys()))
    true_starts = keys[keys.labels == character].start.values

    print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
        character,
        decision(true_starts, expected, 182, subseq_len),
        elapsed))

character:   1 | 0.863 | 9.082 
character:   2 | 0.828 | 9.008 
character:   3 | 0.881 | 9.189 
character:   4 | 0.970 | 9.059 
character:   5 | 0.988 | 9.001 
character:   6 | 0.947 | 8.985 
character:   7 | 0.863 | 9.010 
character:   8 | 0.893 | 8.976 
character:   9 | 0.980 | 9.078 
character:  10 | 0.547 | 9.032 
character:  11 | 0.948 | 8.982 
character:  12 | 0.717 | 9.008 
character:  13 | 0.780 | 9.016 
character:  14 | 0.872 | 9.145 
character:  15 | 1.000 | 9.150 
character:  16 | 0.660 | 9.008 
character:  17 | 1.000 | 8.984 
character:  18 | 0.540 | 9.033 
character:  19 | 0.893 | 8.962 
character:  20 | 0.918 | 8.916 


## Эпилепсия

In [14]:
keys = np.genfromtxt("../data/search_subseries/epi_series_epi_ranges.csv")
real_len = 206
real_starts = keys[:, 0]
closest_series_num = 20
subseq_len = 140
warp_window = 0.5

In [15]:
closest, elapsed = search_dtw(closest_series_num, subseq_len, warp_window,
                     "../data/search_subseries/epi_series.csv",
                     "../data/search_subseries/average_attack.csv",
                     2, optimize=False)
expected = np.array(list(closest.keys()))

print("character: {0:>3d} | {1:.3f} | {2:.3f} ".format(
    0,
    decision(real_starts, expected, real_len, subseq_len),
    elapsed))

character:   0 | 0.750 | 14.506 
