In [None]:
import csv
import time

import numpy as np
import pandas as pd
from dtw import dtw
from tqdm.notebook import tqdm

In [None]:
files = !(find ../../UCRArchive_2018/ -type f -name "*TRAIN.tsv" -exec ls -al {} \; | sort -k 5 -n | sed 's/ \+/\t/g' | cut -f 9)
files

In [None]:
len(files)

In [None]:
def compute_distances(data_frame, anchors_frame, desc):
    distances = []
    for point in tqdm(data_frame.values[:, 1:], desc=desc, leave=False):
        line = []
        for anchor in anchors_frame.values[:, 1:]:
            try:
                x = anchor[~np.isnan(anchor)]
                y = point[~np.isnan(point)]
                window_size = int(0.1 * max(anchor.shape[0], y.shape[0]))
                distance = dtw(
                    x,
                    y,
                    window_type="itakura",
                    window_args={"window_size": window_size},
                ).distance
            except ValueError:
                distance = np.nan
            except Exception as e:
                print(e)
                return pd.DataFrame()
            line.append(distance)
        distances.append(line)
    return pd.DataFrame(distances)

In [None]:
ress = []

with open("../logs/create_features_itakuradtw.csv", "w") as log_file:
    writer = csv.writer(log_file, delimiter=",", quotechar='"')
    writer.writerow(["name", "train_distance_time", "test_distance_time"])
    for file_name in tqdm(files, desc="Files processing"):
        np.random.seed(42)

        name = file_name.split("/")[-1].replace("_TRAIN.tsv", "")

        train_frame = pd.read_csv(file_name, delimiter="\t", header=None).interpolate(
            limit_direction="backward", axis=1
        )
        test_frame = pd.read_csv(
            file_name.replace("TRAIN.tsv", "TEST.tsv"), delimiter="\t", header=None
        ).interpolate(limit_direction="backward", axis=1)

        window_size = 0.1 * (train_frame.shape[1] - 1)

        start_time = time.monotonic()
        train_itakuradtw = pd.DataFrame(
            [
                [
                    dtw(
                        w[~np.isnan(w)],
                        x[~np.isnan(x)],
                        window_type="itakura",
                    ).distance
                    for w in train_frame.values[:, 1:]
                ]
                for x in tqdm(
                    train_frame.values[:, 1:], desc=f"{name} Train frame", leave=False
                )
            ]
        )

        train_timer = time.monotonic() - start_time

        train_itakuradtw.to_csv(
            file_name.replace("TRAIN.tsv", "TRAIN_train_itakuradtw.csv"),
            header=None,
            index=None,
        )

        start_time = time.monotonic()
        test_itakuradtw = pd.DataFrame(
            [
                [
                    dtw(
                        w[~np.isnan(w)],
                        x[~np.isnan(x)],
                        window_type="itakura",
                    ).distance
                    for w in train_frame.values[:, 1:]
                ]
                for x in tqdm(
                    test_frame.values[:, 1:], desc=f"{name} Test frame", leave=False
                )
            ]
        )

        test_timer = time.monotonic() - start_time

        test_itakuradtw.to_csv(
            file_name.replace("TRAIN.tsv", "TEST_train_itakuradtw.csv"),
            header=None,
            index=None,
        )

        log = [name, train_timer, test_timer]
        writer.writerow(log)
        log_file.flush()
        print(*log)