# EMD algorithm - Parameter experiments with a single trip

## Maria Inês Silva
## 13/01/2019

***

## Data and library imports

In [1]:
import pandas as pd
import time
import pickle

# extendedMD code
import sys
import os
sys.path.insert(0, os.path.abspath('../'))
from extendedMD.emd import find_motifs_from_emd
from extendedMD.motif_viz import create_motif_table

In [2]:
cwd = os.getcwd()
data_folder = os.path.abspath(os.path.join(cwd, os.pardir, 'data'))
output_folder = os.path.abspath(os.path.join(cwd, os.pardir, 'outputs/experiments_v1'))

In [3]:
trip_df = pd.read_pickle(os.path.join(data_folder, 'trip_df.pkl'))
multi_dim_ts = trip_df[['ax', 'ay', 'az']]

***

## Experiment 1: Standard approach

In [4]:
R = 0.5
win_size = 12
paa_size = 3
alphabet_size = 3

save_folder = os.path.join(output_folder, 'trip_exp1')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [None]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, pattern_list, mean_dist = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))
pickle.dump(mean_dist, open(os.path.join(save_folder, "mean_dist.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
motif candidates of size 14 successfully extracted
motif candidates of size 15 successfully extracted
ExtendedMD algorithm run in 21.7 minutes


In [None]:
create_motif_table(pattern_list, motif_point_list, mdl_cost_list, mean_dist).head(15)

Unnamed: 0,pattern,pattern_len,n_members,mdl_cost,mean_dist
8,[abc],1,516,41832.2,0.146786
12,[bac],1,417,42149.8,0.106965
15,[cba],1,486,42180.7,0.11048
10,[acb],1,422,42291.4,0.112651
16,[bca],1,386,42388.1,0.100452
9,[cab],1,367,42830.7,0.142409
5,[bbb],1,804,46082.8,0.111611
42,"[abc, acb]",2,151,48805.0,0.112783
14,[cbb],1,193,49882.9,0.101902
70,"[bac, abc]",2,131,50593.3,0.109283


***

## Experiment 2: Increase alphabet size

In [4]:
R = 0.5
win_size = 12
paa_size = 3
alphabet_size = 4

save_folder = os.path.join(output_folder, 'trip_exp2')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [5]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, pattern_list, mean_dist = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))
pickle.dump(mean_dist, open(os.path.join(save_folder, "mean_dist.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
ExtendedMD algorithm run in 20.55 minutes


In [6]:
create_motif_table(pattern_list, motif_point_list, mdl_cost_list, mean_dist).head(15)

Unnamed: 0,pattern,pattern_len,n_members,mdl_cost,mean_dist
5,[bbd],1,364,47996.0,0.103308
1,[acc],1,361,48079.0,0.115013
11,[dbb],1,335,48628.1,0.126539
3,[cca],1,325,49215.2,0.159847
14,[bbc],1,398,49773.2,0.091471
6,[cbc],1,357,49881.2,0.103139
13,[bcb],1,342,49927.1,0.106811
7,[cbb],1,397,50035.5,0.119521
0,[dca],1,263,50211.9,0.114092
21,[abd],1,268,50329.6,0.130421


***

## Experiment 3: Increase window size

In [7]:
R = 0.5
win_size = 24
paa_size = 3
alphabet_size = 3

save_folder = os.path.join(output_folder, 'trip_exp3')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [8]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, pattern_list, mean_dist = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))
pickle.dump(mean_dist, open(os.path.join(save_folder, "mean_dist.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
ExtendedMD algorithm run in 26.37 minutes


In [9]:
create_motif_table(pattern_list, motif_point_list, mdl_cost_list, mean_dist).head(15)

Unnamed: 0,pattern,pattern_len,n_members,mdl_cost,mean_dist
5,[abc],1,272,37224.3,0.162904
15,[cba],1,245,37313.8,0.159657
1,[bbb],1,565,37323.0,0.117205
8,[bac],1,211,38200.0,0.141388
6,[bca],1,186,39215.4,0.136148
10,[acb],1,195,39411.1,0.15507
14,[cab],1,182,40026.1,0.131585
3,[bbc],1,198,42084.8,0.120199
13,[bab],1,169,43043.0,0.11655
4,[abb],1,195,43076.7,0.147555


***

## Experiment 4: Decrease R

In [10]:
R = 0.2
win_size = 12
paa_size = 3
alphabet_size = 3

save_folder = os.path.join(output_folder, 'trip_exp4')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [11]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, pattern_list, mean_dist = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))
pickle.dump(mean_dist, open(os.path.join(save_folder, "mean_dist.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
motif candidates of size 14 successfully extracted
motif candidates of size 15 successfully extracted
ExtendedMD algorithm run in 21.84 minutes


In [12]:
create_motif_table(pattern_list, motif_point_list, mdl_cost_list, mean_dist).head(15)

Unnamed: 0,pattern,pattern_len,n_members,mdl_cost,mean_dist
5,[abc],1,453,43050.4,0.093512
8,[bac],1,380,43191.7,0.093076
10,[acb],1,382,43597.7,0.092995
3,[bca],1,352,43600.8,0.104092
15,[cba],1,415,43996.1,0.110272
14,[cab],1,280,46434.5,0.09297
1,[bbb],1,759,46973.9,0.087882
0,[cbb],1,180,50996.5,0.092226
81,"[abc, acb]",2,130,51443.4,0.100143
13,[bab],1,158,51571.3,0.096138


***

## Experiment 5: Increase R

In [13]:
R = 1
win_size = 12
paa_size = 3
alphabet_size = 3

save_folder = os.path.join(output_folder, 'trip_exp5')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [14]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, pattern_list, mean_dist = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))
pickle.dump(mean_dist, open(os.path.join(save_folder, "mean_dist.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
motif candidates of size 14 successfully extracted
motif candidates of size 15 successfully extracted
ExtendedMD algorithm run in 23.49 minutes


In [15]:
create_motif_table(pattern_list, motif_point_list, mdl_cost_list, mean_dist).head(15)

Unnamed: 0,pattern,pattern_len,n_members,mdl_cost,mean_dist
5,[abc],1,519,41720.6,0.117401
10,[acb],1,429,42057.5,0.101715
8,[bac],1,418,42096.2,0.10356
15,[cba],1,494,42096.9,0.113635
3,[bca],1,392,42199.0,0.102529
14,[cab],1,378,42384.5,0.110321
1,[bbb],1,812,45921.8,0.098293
81,"[abc, acb]",2,153,48518.6,0.112783
0,[cbb],1,195,49710.9,0.100204
144,"[bac, abc]",2,133,50422.1,0.109283
