# EMD algorithm - Parameter experiments with a single trip

## Maria Inês Silva
## 13/01/2019

***

## Data and library imports

In [1]:
import pandas as pd
import time
import pickle

# extendedMD code
import sys
import os
sys.path.insert(0, os.path.abspath('../'))
from extendedMD.emd import find_motifs_from_emd
from extendedMD.motif_viz import create_motif_table

In [2]:
cwd = os.getcwd()
data_folder = os.path.abspath(os.path.join(cwd, os.pardir, 'data'))
output_folder = os.path.abspath(os.path.join(cwd, os.pardir, 'outputs/experiments_v1'))

In [3]:
trip_df = pd.read_pickle(os.path.join(data_folder, 'trip_df.pkl'))
multi_dim_ts = trip_df[['ax', 'ay', 'az']]

***

## Experiment 1: Standard approach

In [4]:
R = 0.5
win_size = 12
paa_size = 3
alphabet_size = 3

save_folder = os.path.join(output_folder, 'trip_exp1')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [5]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, motif_center_list, pattern_list, mean_dist = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(motif_center_list, open(os.path.join(save_folder, "motif_center.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))
pickle.dump(mean_dist, open(os.path.join(save_folder, "mean_dist.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
motif candidates of size 14 successfully extracted
motif candidates of size 15 successfully extracted
motif candidates of size 16 successfully extracted
motif candidates of size 17 successfully extracted
motif candidates of size 18 successfully extracted
motif candidates of size 19 successfully extracted
motif candidates of size 20 successfully

In [6]:
create_motif_table(pattern_list, motif_point_list, mdl_cost_list, mean_dist).head(15)

Unnamed: 0,pattern,pattern_len,n_members,overlap_ratio,mdl_cost,mean_dist
4,[abb],1,231,0.120194,38310.4,0.07225
3,[bbb],1,526,0.437332,38448.0,0.069424
14,[bba],1,223,0.121166,39069.6,0.070995
7,[cbb],1,191,0.089565,39905.7,0.075185
22,[bbc],1,185,0.082585,40319.6,0.066618
13,[bab],1,207,0.130343,40326.5,0.068155
15,[baa],1,172,0.098633,41493.1,0.091619
20,[aab],1,182,0.142507,41726.2,0.085382
19,[bcb],1,165,0.098361,42147.8,0.067451
215,"[abb, bbb]",2,118,0.045721,42408.3,0.073877


***

## Experiment 2: Increase alphabet size

In [7]:
R = 0.5
win_size = 12
paa_size = 3
alphabet_size = 4

save_folder = os.path.join(output_folder, 'trip_exp2')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [8]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, motif_center_list, pattern_list, mean_dist = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(motif_center_list, open(os.path.join(save_folder, "motif_center.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))
pickle.dump(mean_dist, open(os.path.join(save_folder, "mean_dist.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
motif candidates of size 14 successfully extracted
motif candidates of size 15 successfully extracted
motif candidates of size 16 successfully extracted
motif candidates of size 17 successfully extracted
motif candidates of size 18 successfully extracted
motif candidates of size 19 successfully extracted
ExtendedMD algorithm run in 12.82 minute

In [9]:
create_motif_table(pattern_list, motif_point_list, mdl_cost_list, mean_dist).head(15)

Unnamed: 0,pattern,pattern_len,n_members,overlap_ratio,mdl_cost,mean_dist
8,[bcc],1,222,0.121743,43896.9,0.059178
54,[ccb],1,225,0.116446,44037.0,0.059878
55,[bbc],1,234,0.148726,44570.7,0.05962
20,[cbb],1,214,0.123997,45120.6,0.061435
11,[bbb],1,294,0.295264,45220.1,0.062387
31,[ccc],1,275,0.353452,46231.4,0.060373
52,[bcb],1,204,0.168473,46892.6,0.059241
32,[cbc],1,193,0.161611,47638.9,0.060128
421,"[bbb, bbc]",2,96,0.019565,50740.4,0.067579
77,"[bcc, ccc]",2,84,0.050955,52063.1,0.059798


***

## Experiment 3: Increase window size

In [10]:
R = 0.5
win_size = 24
paa_size = 3
alphabet_size = 3

save_folder = os.path.join(output_folder, 'trip_exp3')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [11]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, motif_center_list, pattern_list, mean_dist = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(motif_center_list, open(os.path.join(save_folder, "motif_center.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))
pickle.dump(mean_dist, open(os.path.join(save_folder, "mean_dist.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
motif candidates of size 14 successfully extracted
motif candidates of size 15 successfully extracted
motif candidates of size 16 successfully extracted
motif candidates of size 17 successfully extracted
motif candidates of size 18 successfully extracted
ExtendedMD algorithm run in 15.15 minutes


In [12]:
create_motif_table(pattern_list, motif_point_list, mdl_cost_list, mean_dist).head(15)

Unnamed: 0,pattern,pattern_len,n_members,overlap_ratio,mdl_cost,mean_dist
3,[bbb],1,330,0.508995,31699.1,0.106782
4,[abb],1,147,0.230209,35881.8,0.107476
14,[bba],1,144,0.232239,37112.4,0.106154
13,[bab],1,139,0.220797,37363.3,0.105413
7,[cbb],1,111,0.141968,38026.3,0.109845
19,[bcb],1,104,0.136806,38776.4,0.099282
22,[bbc],1,113,0.173138,39031.3,0.099398
155,"[abb, bbb]",2,82,0.12419,39357.2,0.116339
20,[aab],1,113,0.18041,39870.9,0.133514
15,[baa],1,102,0.209053,40899.2,0.138146


***

## Experiment 4: Decrease R

In [13]:
R = 0.2
win_size = 12
paa_size = 3
alphabet_size = 3

save_folder = os.path.join(output_folder, 'trip_exp4')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [14]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, motif_center_list, pattern_list, mean_dist = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(motif_center_list, open(os.path.join(save_folder, "motif_center.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))
pickle.dump(mean_dist, open(os.path.join(save_folder, "mean_dist.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
motif candidates of size 14 successfully extracted
motif candidates of size 15 successfully extracted
motif candidates of size 16 successfully extracted
motif candidates of size 17 successfully extracted
motif candidates of size 18 successfully extracted
motif candidates of size 19 successfully extracted
motif candidates of size 20 successfully

In [15]:
create_motif_table(pattern_list, motif_point_list, mdl_cost_list, mean_dist).head(15)

Unnamed: 0,pattern,pattern_len,n_members,overlap_ratio,mdl_cost,mean_dist
4,[abb],1,230,0.120735,38349.7,0.090145
3,[bbb],1,523,0.438216,38677.5,0.069424
14,[bba],1,222,0.121773,39113.7,0.070995
7,[cbb],1,190,0.090114,39946.2,0.082845
22,[bbc],1,185,0.082585,40319.6,0.066618
13,[bab],1,205,0.129321,40359.7,0.075045
20,[aab],1,178,0.13986,41771.2,0.107271
15,[baa],1,161,0.087782,41980.2,0.117908
19,[bcb],1,165,0.098361,42147.8,0.067451
210,"[abb, bbb]",2,118,0.045721,42408.3,0.073877


***

## Experiment 5: Increase R

In [16]:
R = 1
win_size = 12
paa_size = 3
alphabet_size = 3

save_folder = os.path.join(output_folder, 'trip_exp5')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [17]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, motif_center_list, pattern_list, mean_dist = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(motif_center_list, open(os.path.join(save_folder, "motif_center.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))
pickle.dump(mean_dist, open(os.path.join(save_folder, "mean_dist.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
motif candidates of size 14 successfully extracted
motif candidates of size 15 successfully extracted
motif candidates of size 16 successfully extracted
motif candidates of size 17 successfully extracted
motif candidates of size 18 successfully extracted
motif candidates of size 19 successfully extracted
motif candidates of size 20 successfully

In [18]:
create_motif_table(pattern_list, motif_point_list, mdl_cost_list, mean_dist).head(15)

Unnamed: 0,pattern,pattern_len,n_members,overlap_ratio,mdl_cost,mean_dist
4,[abb],1,231,0.120194,38310.4,0.07225
3,[bbb],1,526,0.437332,38448.0,0.069424
14,[bba],1,223,0.121166,39069.6,0.070995
7,[cbb],1,191,0.089565,39905.7,0.075185
22,[bbc],1,185,0.082585,40319.6,0.066618
13,[bab],1,207,0.130343,40326.5,0.068155
15,[baa],1,172,0.098633,41493.1,0.091619
20,[aab],1,182,0.142507,41726.2,0.085382
19,[bcb],1,165,0.098361,42147.8,0.067451
215,"[abb, bbb]",2,118,0.045721,42408.3,0.073877
