In [1]:
import numpy as np
from sklearn.metrics import pairwise_distances  # parallelized scipy's pdist 
from sklearn.neighbors import LocalOutlierFactor
import time
import pickle
from datetime import datetime
from dtaidistance import dtw_ndim
import iisignature

# Personal libraries
from utils import dyadic_sig

In [None]:
folder_data = "Folder where TS1.txt to TS1000.txt are located"

In [3]:
%%time
## PARAMS ######################################################################
n_outliers = 10
n_clusters = 5

batch = 260
stream = 16
channels = 2
level = 4
dyadic_depth = 3
n_simu = 1000

lof_params = {'n_neighbors':10,  'metric':'precomputed'} 
# LOF: no need to set contamination if we want scores 
################################################################################
y_scores = {}
y_scores['sig'] = []
y_scores['dsig'] = []
y_scores['dtw'] = []
y_scores['euc'] = []
y_trues = []

for idx_dataset in range(1, n_simu+1): 
    if idx_dataset%100==0:
        print(idx_dataset)
    X_ = np.loadtxt(f"{folder_data}/TS{idx_dataset}.txt")
    X = np.zeros((batch, stream, channels))
    for i in range(batch):
        X[i, :, 0] = X_[i, :16]
        X[i, :, 1] = X_[i, 16:]
    y_true = np.concatenate((np.ones(250, dtype='int'), -1*np.ones(n_outliers, dtype='int')))

    num_batch = len(X) 
    inds_shuffle = np.arange(num_batch)
    np.random.shuffle(inds_shuffle)
    X = X[inds_shuffle]
    y_true = y_true[inds_shuffle]
    y_trues.append(y_true)
    
    # SIG
    dsig = dyadic_sig(X, level, 0)
    dsig = dsig.reshape(dsig.shape[0], dsig.shape[1]*dsig.shape[2])
    similarity_sig = pairwise_distances(dsig, metric = 'euclidean', n_jobs = -1)    
    clf = LocalOutlierFactor(**lof_params)
    clf.fit_predict(similarity_sig)
    y_scores['sig'].append(-clf.negative_outlier_factor_)    

    # MSIG
    dsig = dyadic_sig(X, level, dyadic_depth)
    dsig = dsig.reshape(dsig.shape[0], dsig.shape[1]*dsig.shape[2])
    similarity_sig = pairwise_distances(dsig, metric = 'euclidean', n_jobs = -1)    
    clf = LocalOutlierFactor(**lof_params)
    clf.fit_predict(similarity_sig)
    y_scores['dsig'].append(-clf.negative_outlier_factor_)

    # DTW
    similarity_dtw = dtw_ndim.distance_matrix_fast(X)
    clf = LocalOutlierFactor(**lof_params)
    clf.fit_predict(similarity_dtw)
    y_scores['dtw'].append(-clf.negative_outlier_factor_)

    # EUC
    similarity_euc = pairwise_distances(X_, metric = 'euclidean', n_jobs = -1)   
    clf = LocalOutlierFactor(**lof_params)
    clf.fit_predict(similarity_euc)
    y_scores['euc'].append(-clf.negative_outlier_factor_)
    

current_time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
with open(f'./outputs/dataset2/y_scores_{current_time}.pkl', 'wb') as f:
    pickle.dump(y_scores, f)
with open(f'./outputs/dataset2/y_trues_{current_time}.pkl', 'wb') as f:
    pickle.dump(y_trues, f)

100
200
300
400
500
600
700
800
900
1000
CPU times: user 11min 18s, sys: 10min 52s, total: 22min 11s
Wall time: 2min 21s


In [4]:
y_scores.keys()

dict_keys(['sig', 'dsig', 'dtw', 'euc'])