# Computing the DTW Distance On UCR Datasets

In [None]:
import csv
import time

import numpy as np
import pandas as pd
from dtw import dtw
from tqdm.notebook import tqdm

In [None]:
files = !(find ../UCRArchive_2018/ -type f -name "*TRAIN.tsv" -exec ls -al {} \; | sort -k 5 -n | sed 's/ \+/\t/g' | cut -f 9)
files

In [None]:
len(files)

In [None]:
ress = []

with open("../logs/create_features_dtw.csv", "w") as log_file:
    writer = csv.writer(log_file, delimiter=",", quotechar='"')
    writer.writerow(["name", "train_distance_time", "test_distance_time"])
    for file_name in tqdm(files, desc="Files processing"):
        np.random.seed(42)

        name = file_name.split("/")[-1].replace("_TRAIN.tsv", "")

        train_frame = pd.read_csv(file_name, delimiter="\t", header=None).interpolate(
            limit_direction="backward", axis=1
        )
        test_frame = pd.read_csv(
            file_name.replace("TRAIN.tsv", "TEST.tsv"), delimiter="\t", header=None
        ).interpolate(limit_direction="backward", axis=1)

        start_time = time.monotonic()
        train_dtw = pd.DataFrame(
            [
                [
                    dtw(w[~np.isnan(w)], x[~np.isnan(x)]).distance
                    for w in train_frame.values[:, 1:]
                ]
                for x in tqdm(
                    train_frame.values[:, 1:], desc=f"{name} Train frame", leave=False
                )
            ]
        )

        train_timer = time.monotonic() - start_time

        train_dtw.to_csv(
            file_name.replace("TRAIN.tsv", "TRAIN_train_dtw.csv"),
            header=None,
            index=None,
        )

        start_time = time.monotonic()

        test_dtw = pd.DataFrame(
            [
                [
                    dtw(w[~np.isnan(w)], x[~np.isnan(x)]).distance
                    for w in train_frame.values[:, 1:]
                ]
                for x in tqdm(
                    test_frame.values[:, 1:], desc=f"{name} Test frame", leave=False
                )
            ]
        )

        test_timer = time.monotonic() - start_time

        test_dtw.to_csv(
            file_name.replace("TRAIN.tsv", "TEST_train_dtw.csv"),
            header=None,
            index=None,
        )

        log = [name, train_timer, test_timer]
        writer.writerow(log)
        print(*log)