# EMD algorithm - Parameter experiments with a single trip

## Maria Inês Silva
## 13/01/2019

***

## Data and library imports

In [1]:
import pandas as pd
import time
import pickle

# extendedMD code
import sys
import os
sys.path.insert(0, os.path.abspath('../'))
from extendedMD.emd import find_motifs_from_emd
from extendedMD.motif_viz import create_motif_table

In [2]:
cwd = os.getcwd()
data_folder = os.path.abspath(os.path.join(cwd, os.pardir, 'data'))
output_folder = os.path.abspath(os.path.join(cwd, os.pardir, 'outputs/experiments_v2'))

In [3]:
trip_df = pd.read_pickle(os.path.join(data_folder, 'trip_df.pkl'))
multi_dim_ts = trip_df[['ax', 'ay', 'az']]

***

## Experiment 1: Standard approach

In [4]:
R = 0.3
win_size = 12
paa_size = 4
alphabet_size = 5

save_folder = os.path.join(output_folder, 'trip_exp1')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [5]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, pattern_list, mean_dist = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))
pickle.dump(mean_dist, open(os.path.join(save_folder, "mean_dist.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
motif candidates of size 14 successfully extracted
motif candidates of size 15 successfully extracted
motif candidates of size 16 successfully extracted
ExtendedMD algorithm run in 10.67 minutes


In [6]:
create_motif_table(pattern_list, motif_point_list, mdl_cost_list, mean_dist).head(15)

Unnamed: 0,pattern,pattern_len,n_members,overlap_ratio,mdl_cost,mean_dist
293,[bccc],1,104,0.080268,60820.6,0.043603
375,[cccd],1,102,0.082413,61457.0,0.046667
21,[cccc],1,173,0.371185,61742.7,0.041833
403,[ccbb],1,88,0.094854,61889.3,0.048537
77,[ccdc],1,88,0.064579,62015.5,0.046816
90,[cbbc],1,80,0.044681,62209.0,0.048694
291,[cbcc],1,95,0.096896,62366.4,0.047667
96,[bbcc],1,88,0.051506,62407.2,0.049742
200,[cbbb],1,84,0.098517,62521.6,0.053368
216,[ccdd],1,82,0.067708,62635.9,0.048074


***

## Experiment 2: Reduce window and PAA size

In [7]:
R = 0.3
win_size = 9
paa_size = 3
alphabet_size = 5

save_folder = os.path.join(output_folder, 'trip_exp2')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [8]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, pattern_list, mean_dist = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))
pickle.dump(mean_dist, open(os.path.join(save_folder, "mean_dist.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
ExtendedMD algorithm run in 12.76 minutes


In [9]:
create_motif_table(pattern_list, motif_point_list, mdl_cost_list, mean_dist).head(15)

Unnamed: 0,pattern,pattern_len,n_members,overlap_ratio,mdl_cost,mean_dist
61,[ccc],1,306,0.306496,51525.7,0.041106
81,[dcc],1,189,0.08433,51658.9,0.044531
43,[ccd],1,187,0.077061,51732.2,0.045299
120,[bcc],1,188,0.079976,51865.8,0.044092
78,[cbb],1,181,0.088761,52138.2,0.04801
111,[ccb],1,182,0.065375,52338.2,0.046036
14,[bbc],1,182,0.079674,52353.5,0.047413
36,[ddc],1,145,0.037037,52564.5,0.044731
106,[cdd],1,155,0.081218,52854.7,0.04492
37,[cdc],1,165,0.092877,53146.0,0.04119


***

## Experiment 3: Reduce window and PAA size and increase R

In [10]:
R = 0.6
win_size = 9
paa_size = 3
alphabet_size = 5

save_folder = os.path.join(output_folder, 'trip_exp3')
if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [11]:
start_time = time.time()

ts_1d, mdl_cost_list, motif_point_list, pattern_list, mean_dist = find_motifs_from_emd(multi_dim_ts, R, win_size, paa_size, alphabet_size)

pickle.dump(ts_1d, open(os.path.join(save_folder, "ts_1d.p"), "wb"))
pickle.dump(mdl_cost_list, open(os.path.join(save_folder, "mdl_cost.p"), "wb"))
pickle.dump(motif_point_list, open(os.path.join(save_folder, "motif_point.p"), "wb"))
pickle.dump(pattern_list, open(os.path.join(save_folder, "patterns.p"), "wb"))
pickle.dump(mean_dist, open(os.path.join(save_folder, "mean_dist.p"), "wb"))

print("ExtendedMD algorithm run in {} minutes".format(round((time.time() - start_time)/60, 2)))

motif candidates of size 1 successfully extracted
motif candidates of size 2 successfully extracted
motif candidates of size 3 successfully extracted
motif candidates of size 4 successfully extracted
motif candidates of size 5 successfully extracted
motif candidates of size 6 successfully extracted
motif candidates of size 7 successfully extracted
motif candidates of size 8 successfully extracted
motif candidates of size 9 successfully extracted
motif candidates of size 10 successfully extracted
motif candidates of size 11 successfully extracted
motif candidates of size 12 successfully extracted
motif candidates of size 13 successfully extracted
ExtendedMD algorithm run in 12.79 minutes


In [12]:
create_motif_table(pattern_list, motif_point_list, mdl_cost_list, mean_dist).head(15)

Unnamed: 0,pattern,pattern_len,n_members,overlap_ratio,mdl_cost,mean_dist
61,[ccc],1,306,0.306496,51525.7,0.041106
81,[dcc],1,189,0.08433,51658.9,0.044531
43,[ccd],1,187,0.077061,51732.2,0.045299
120,[bcc],1,188,0.079976,51865.8,0.044092
78,[cbb],1,181,0.088761,52138.2,0.04801
111,[ccb],1,182,0.065375,52338.2,0.046036
14,[bbc],1,182,0.079674,52353.5,0.047413
36,[ddc],1,145,0.037037,52564.5,0.044731
106,[cdd],1,155,0.081218,52854.7,0.04492
37,[cdc],1,165,0.092877,53146.0,0.04119
