# EMD algorithm - Parameter experiments with a single trip

## Maria Inês Silva
## 11/01/2019

***

## Data and library imports

In [1]:
import pandas as pd
import time
import pickle

# extendedMD code
import sys
import os
sys.path.insert(0, os.path.abspath('../'))
from extendedMD.emd import find_motifs_from_emd

In [2]:
cwd = os.getcwd()
data_folder = os.path.abspath(os.path.join(cwd, os.pardir, 'data'))
output_folder = os.path.abspath(os.path.join(cwd, os.pardir, 'outputs'))

In [3]:
trip_df = pd.read_pickle(os.path.join(data_folder, 'trip_df.pkl'))
multi_dim_ts = trip_df[['ax', 'ay', 'az', 'roll', 'pitch', 'yaw']]

***

## Experiment 1: Standard approach

In [4]:
R = 0.5
win_size = 9
paa_size = 3
alphabet_size = 3

save_folder = os.path.join(output_folder, 'trip_exp1')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [5]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, pattern_list = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
motif candidates of size 14 successfully extracted
motif candidates of size 15 successfully extracted
motif candidates of size 16 successfully extracted
motif candidates of size 17 successfully extracted
motif candidates of size 18 successfully extracted
motif candidates of size 19 successfully extracted
motif candidates of size 20 successfully

In [6]:
motif_dic = {'pattern' : pattern_list,
             'pattern_len' : [len(pattern) for pattern in pattern_list],
             'n_members' : [len(temp_motif) for temp_motif in motif_point_list],
             'mdl_cost' : mdl_cost_list}

motif_df = pd.DataFrame(motif_dic).assign(mdl_cost = lambda x: x['mdl_cost'].round(1)).sort_values('mdl_cost')
motif_df.head(15)

Unnamed: 0,pattern,pattern_len,n_members,mdl_cost
3,[ccc],1,5,21552.3
14,"[bba, cca, cba, caa, cbb, aaa]",6,3,26572.2
13,"[cca, cba, caa, cbb, aaa]",5,3,26586.3
11,"[cba, caa, cbb, aaa]",4,3,26629.7
9,"[caa, cbb, aaa]",3,3,26644.6
0,[bba],1,3,27208.4
2,[cba],1,2,27770.8
4,[aaa],1,2,29372.7
1,[abc],1,4,30347.8
8,"[abc, bbb, abc]",3,2,31081.9


***

## Experiment 2: Increase alphabet size

In [7]:
R = 0.5
win_size = 9
paa_size = 3
alphabet_size = 4

save_folder = os.path.join(output_folder, 'trip_exp2')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [8]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, pattern_list = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
motif candidates of size 14 successfully extracted
motif candidates of size 15 successfully extracted
motif candidates of size 16 successfully extracted
motif candidates of size 17 successfully extracted
motif candidates of size 18 successfully extracted
motif candidates of size 19 successfully extracted
motif candidates of size 20 successfully

In [9]:
motif_dic = {'pattern' : pattern_list,
             'pattern_len' : [len(pattern) for pattern in pattern_list],
             'n_members' : [len(temp_motif) for temp_motif in motif_point_list],
             'mdl_cost' : mdl_cost_list}

motif_df = pd.DataFrame(motif_dic).assign(mdl_cost = lambda x: x['mdl_cost'].round(1)).sort_values('mdl_cost')
motif_df.head(15)

Unnamed: 0,pattern,pattern_len,n_members,mdl_cost
4,[ddd],1,5,24228.0
21,"[abd, acd]",2,6,25793.2
5,[acd],1,7,25831.3
3,[abd],1,8,25861.0
15,"[dca, dba]",2,5,26055.9
10,[dca],1,5,26128.0
22,"[acd, abd]",2,4,26735.8
30,"[abd, acd, abd]",3,3,26739.4
11,"[bbb, abd]",2,2,26942.0
32,"[bbb, abd, acd]",3,2,26943.8


***

## Experiment 3: Increase window size

In [10]:
R = 0.5
win_size = 18
paa_size = 3
alphabet_size = 3

save_folder = os.path.join(output_folder, 'trip_exp3')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [11]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, pattern_list = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
motif candidates of size 14 successfully extracted
motif candidates of size 15 successfully extracted
motif candidates of size 16 successfully extracted
motif candidates of size 17 successfully extracted
motif candidates of size 18 successfully extracted
motif candidates of size 19 successfully extracted
motif candidates of size 20 successfully

In [12]:
motif_dic = {'pattern' : pattern_list,
             'pattern_len' : [len(pattern) for pattern in pattern_list],
             'n_members' : [len(temp_motif) for temp_motif in motif_point_list],
             'mdl_cost' : mdl_cost_list}

motif_df = pd.DataFrame(motif_dic).assign(mdl_cost = lambda x: x['mdl_cost'].round(1)).sort_values('mdl_cost')
motif_df.head(15)

Unnamed: 0,pattern,pattern_len,n_members,mdl_cost
5,[ccc],1,9,33221.0
31,"[ccc, abc, ccc]",3,3,36050.6
22,"[abc, ccc]",2,3,37681.4
44,"[bba, cca, cba, caa, cbb, aaa]",6,3,39528.3
42,"[cca, cba, caa, cbb, aaa]",5,3,39571.1
35,"[cba, caa, cbb, aaa]",4,3,39631.0
30,"[caa, cbb, aaa]",3,3,39694.5
43,"[bba, cca, cba, caa, cbb]",5,3,39848.0
39,"[bba, cca, cba, caa]",4,3,39879.4
37,"[cca, cba, caa, cbb]",4,3,39900.0


***

## Experiment 4: Decrease R

In [13]:
R = 0.2
win_size = 9
paa_size = 3
alphabet_size = 3

save_folder = os.path.join(output_folder, 'trip_exp4')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [14]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, pattern_list = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
motif candidates of size 14 successfully extracted
motif candidates of size 15 successfully extracted
motif candidates of size 16 successfully extracted
motif candidates of size 17 successfully extracted
motif candidates of size 18 successfully extracted
motif candidates of size 19 successfully extracted
motif candidates of size 20 successfully

In [15]:
motif_dic = {'pattern' : pattern_list,
             'pattern_len' : [len(pattern) for pattern in pattern_list],
             'n_members' : [len(temp_motif) for temp_motif in motif_point_list],
             'mdl_cost' : mdl_cost_list}

motif_df = pd.DataFrame(motif_dic).assign(mdl_cost = lambda x: x['mdl_cost'].round(1)).sort_values('mdl_cost')
motif_df.head(15)

Unnamed: 0,pattern,pattern_len,n_members,mdl_cost
8,"[bba, cca, cba, caa, cbb, aaa]",6,3,26572.2
7,"[cca, cba, caa, cbb, aaa]",5,3,26586.3
6,"[cba, caa, cbb, aaa]",4,3,26629.7
5,"[caa, cbb, aaa]",3,3,26644.6
0,[bba],1,3,27208.4
1,[abc],1,3,30745.6
4,"[abc, bbb]",2,2,31640.4
3,"[cba, bbb]",2,2,33761.5
2,[cba],1,2,33780.8


***

## Experiment 5: Increase R

In [16]:
R = 1
win_size = 9
paa_size = 3
alphabet_size = 3

save_folder = os.path.join(output_folder, 'trip_exp5')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [17]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, pattern_list = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
motif candidates of size 14 successfully extracted
motif candidates of size 15 successfully extracted
motif candidates of size 16 successfully extracted
motif candidates of size 17 successfully extracted
motif candidates of size 18 successfully extracted
motif candidates of size 19 successfully extracted
motif candidates of size 20 successfully

In [18]:
motif_dic = {'pattern' : pattern_list,
             'pattern_len' : [len(pattern) for pattern in pattern_list],
             'n_members' : [len(temp_motif) for temp_motif in motif_point_list],
             'mdl_cost' : mdl_cost_list}

motif_df = pd.DataFrame(motif_dic).assign(mdl_cost = lambda x: x['mdl_cost'].round(1)).sort_values('mdl_cost')
motif_df.head(15)

Unnamed: 0,pattern,pattern_len,n_members,mdl_cost
10,[aaa],1,8,19067.0
8,[ccc],1,7,19142.9
33,"[aaa, bbc, aac]",3,5,21936.9
23,"[bbc, aac]",2,5,22521.6
7,[aac],1,5,22572.3
5,[bbc],1,5,22594.0
24,"[aaa, bbc]",2,4,22826.5
31,"[abc, acc, abb]",3,3,23674.2
19,"[acc, abb]",2,3,23723.1
48,"[bba, cca, cba, caa, cbb]",5,3,24477.9
