In [None]:
%load_ext autoreload
%autoreload 2

# UCR Datasets - Distances

In this notebook we are computing various distances for all UCR datasets so they could be used further in our analysis.

In [None]:
from pathlib import Path

import numpy as np
import pandas as pd
from fastdtw import fastdtw
from sklearn.metrics import pairwise_distances
from tqdm.notebook import tqdm
from tslearn.metrics import dtw

In [None]:
DATA_PATH = Path("../UCRArchive_2018/")

In [None]:
files_frame = pd.read_csv(DATA_PATH / "ucr_metadata.csv")
files_frame

In [None]:
def fastdtw_dist(x, y):
    return fastdtw(x[~np.isnan(x)], y[~np.isnan(y)])[0]


def itakura_dtw(x, y):
    window = np.rint(0.1 * max(x.shape[0], y.shape[0])).astype(int)

    return dtw(
        x[~np.isnan(x)],
        y[~np.isnan(y)],
        global_constraint="itakura",
        itakura_max_slope=window,
    )


def sakoe_chiba_dtw(x, y):
    window = np.rint(0.1 * max(x.shape[0], y.shape[0])).astype(int)

    return dtw(
        x[~np.isnan(x)],
        y[~np.isnan(y)],
        global_constraint="sakoe_chiba",
        sakoe_chiba_radius=window,
    )

In [None]:
def compute_and_save(X, save_path, metric="euclidean", n_jobs=4):
    matrix = pairwise_distances(X, metric=metric, n_jobs=n_jobs)
    np.savetxt(save_path, matrix, delimiter=",", fmt="%g")

In [None]:
n_jobs = 5

for dataset in tqdm(files_frame[:50].itertuples()):
    frame = pd.read_csv(dataset.path, header=None, index_col=None)

    def_path = dataset.path.replace(".csv", "")

    X = frame.values[:, 1:]
    X_der = np.diff(X, axis=1)
    y = frame.values[:, 0]

    try:
        ed = pairwise_distances(X, n_jobs=n_jobs)

        compute_and_save(X, save_path=def_path + "_dtw.gz", metric=dtw, n_jobs=n_jobs)

        compute_and_save(
            X, save_path=def_path + "_fdtw.gz", metric=fastdtw_dist, n_jobs=n_jobs
        )

        compute_and_save(
            X_der,
            save_path=def_path + "_der_fdtw.gz",
            metric=fastdtw_dist,
            n_jobs=n_jobs,
        )

        compute_and_save(
            X, save_path=def_path + "_itakura.gz", metric=itakura_dtw, n_jobs=n_jobs
        )

        compute_and_save(
            X,
            save_path=def_path + "_sakoe_chiba.gz",
            metric=sakoe_chiba_dtw,
            n_jobs=n_jobs,
        )

    except Exception as e:
        print(dataset.path, e, flush=True)

In [None]:
n_jobs = None

for dataset in tqdm(files_frame[:50].itertuples()):
    frame = pd.read_csv(dataset.path, header=None, index_col=None)

    def_path = dataset.path.replace(".csv", "")

    X = frame.values[:, 1:]
    X_der = np.diff(X, axis=1)
    y = frame.values[:, 0]

    try:
        ed = pairwise_distances(X, n_jobs=n_jobs)

        compute_and_save(
            X_der, save_path=def_path + "_der_dtw.gz", metric=dtw, n_jobs=n_jobs
        )

        compute_and_save(
            X_der,
            save_path=def_path + "_der_itakura.gz",
            metric=itakura_dtw,
            n_jobs=n_jobs,
        )

        compute_and_save(
            X_der,
            save_path=def_path + "_der_sakoe_chiba.gz",
            metric=sakoe_chiba_dtw,
            n_jobs=n_jobs,
        )

    except Exception as e:
        print(dataset.path, e, flush=True)