# EMD algorithm - Parameter experiments with a single trip

## Maria Inês Silva
## 13/01/2019

***

## Data and library imports

In [1]:
import pandas as pd
import time
import pickle

# extendedMD code
import sys
import os
sys.path.insert(0, os.path.abspath('../'))
from extendedMD.emd import find_motifs_from_emd
from extendedMD.motif_viz import create_motif_table

In [2]:
cwd = os.getcwd()
data_folder = os.path.abspath(os.path.join(cwd, os.pardir, 'data'))
output_folder = os.path.abspath(os.path.join(cwd, os.pardir, 'outputs/experiments_v2'))

In [3]:
trip_df = pd.read_pickle(os.path.join(data_folder, 'trip_df.pkl'))
multi_dim_ts = trip_df[['ax', 'ay', 'az']]

***

## Experiment 1: Standard approach

In [4]:
R = 0.3
win_size = 12
paa_size = 4
alphabet_size = 5

save_folder = os.path.join(output_folder, 'trip_exp1')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [5]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, motif_center_list, pattern_list, mean_dist = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(motif_center_list, open(os.path.join(save_folder, "motif_center.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))
pickle.dump(mean_dist, open(os.path.join(save_folder, "mean_dist.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
motif candidates of size 14 successfully extracted
motif candidates of size 15 successfully extracted
motif candidates of size 16 successfully extracted
ExtendedMD algorithm run in 8.77 minutes


In [6]:
create_motif_table(pattern_list, motif_point_list, mdl_cost_list, mean_dist).head(15)

Unnamed: 0,pattern,pattern_len,n_members,overlap_ratio,mdl_cost,mean_dist
12,[bccc],1,89,0.0,60869.6,0.04414
95,[cccc],1,86,0.0,61253.0,0.041465
185,[cccd],1,86,0.0,61526.1,0.04679
252,[ccbb],1,76,0.0,61912.6,0.048898
360,[ccdc],1,72,0.0,62234.1,0.047134
402,[cbbc],1,71,0.0,62335.8,0.049081
293,[bbcc],1,78,0.0,62436.8,0.051131
303,[cbcc],1,76,0.0,62450.8,0.047349
21,[cbbb],1,71,0.0,62514.2,0.052263
386,[ccdd],1,73,0.0,62633.7,0.04687


***

## Experiment 2: Reduce window and PAA size

In [7]:
R = 0.3
win_size = 9
paa_size = 3
alphabet_size = 5

save_folder = os.path.join(output_folder, 'trip_exp2')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [8]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, motif_center_list, pattern_list, mean_dist = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(motif_center_list, open(os.path.join(save_folder, "motif_center.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))
pickle.dump(mean_dist, open(os.path.join(save_folder, "mean_dist.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
ExtendedMD algorithm run in 10.42 minutes


In [9]:
create_motif_table(pattern_list, motif_point_list, mdl_cost_list, mean_dist).head(15)

Unnamed: 0,pattern,pattern_len,n_members,overlap_ratio,mdl_cost,mean_dist
4,[ccc],1,170,0.0,50327.0,0.042169
96,[dcc],1,153,0.0,51547.6,0.044599
77,[ccd],1,157,0.0,51612.8,0.044967
71,[bcc],1,156,0.0,51773.6,0.044653
111,[cbb],1,149,0.0,51988.0,0.047206
97,[ccb],1,155,0.0,52235.9,0.04531
19,[bbc],1,151,0.0,52312.2,0.046324
113,[ddc],1,135,0.0,52545.5,0.044946
22,[cdd],1,129,0.0,52748.3,0.043189
102,[cdc],1,130,0.0,53084.8,0.040527


***

## Experiment 3: Reduce window and PAA size and increase R

In [10]:
R = 0.6
win_size = 9
paa_size = 3
alphabet_size = 5

save_folder = os.path.join(output_folder, 'trip_exp3')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [11]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, motif_center_list, pattern_list, mean_dist = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(motif_center_list, open(os.path.join(save_folder, "motif_center.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))
pickle.dump(mean_dist, open(os.path.join(save_folder, "mean_dist.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
ExtendedMD algorithm run in 10.47 minutes


In [12]:
create_motif_table(pattern_list, motif_point_list, mdl_cost_list, mean_dist).head(15)

Unnamed: 0,pattern,pattern_len,n_members,overlap_ratio,mdl_cost,mean_dist
4,[ccc],1,170,0.0,50327.0,0.042169
96,[dcc],1,153,0.0,51547.6,0.044599
77,[ccd],1,157,0.0,51612.8,0.044967
71,[bcc],1,156,0.0,51773.6,0.044653
111,[cbb],1,149,0.0,51988.0,0.047206
97,[ccb],1,155,0.0,52235.9,0.04531
19,[bbc],1,151,0.0,52312.2,0.046324
113,[ddc],1,135,0.0,52545.5,0.044946
22,[cdd],1,129,0.0,52748.3,0.043189
102,[cdc],1,130,0.0,53084.8,0.040527
