In [None]:
%load_ext lab_black
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
from datetime import datetime

import numpy as np
import pandas as pd

from tqdm.notebook import tqdm

In [None]:
import warnings

warnings.filterwarnings("ignore")

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier

from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score

In [None]:
DATA_PATH = Path("../UCRArchive_2018/")
RESULTS_PATH = Path("../results/")

In [None]:
files_frame = pd.read_csv(DATA_PATH / "ucr_metadata.csv")
files_frame

In [None]:
def load_y(path):
    frame = pd.read_csv(path, header=None, index_col=None)
    y = frame.values[:, 0]

    return y

In [None]:
methods = [
#    ("GradientBoostingClassifier", GradientBoostingClassifier(random_state=42)),
    ("LinearSVC", LinearSVC(random_state=42)),
    ("KNeighborsClassifier", KNeighborsClassifier(n_neighbors=1, metric="precomputed")),
]

n_splits = 5

kfold = KFold(n_splits=n_splits, random_state=42, shuffle=True)

results = []

with open(RESULTS_PATH / f"classification_feature_dtw.csv", "w") as res_file:
    for dataset in tqdm(files_frame[:50].sort_values("samples").itertuples()):

        y = load_y(dataset.path)

        def_path = dataset.path.replace(".csv", "")

        for name, method in methods:
            for metric in ("dtw", "fdtw", "itakura", "sakoe_chiba"):
                record = {
                    "dataset": def_path.split("/")[-1],
                    "metric": metric,
                    "method": name,
                    "accuracy": 0,
                }

                try:
                    X = np.loadtxt(f"{def_path}_{metric}.gz", delimiter=",")
                except OSError:
                    continue

                for train_index, test_index in kfold.split(X):
                    X_train = X[train_index][:, train_index]
                    y_train = y[train_index]

                    X_test = X[test_index][:, train_index]
                    y_test = y[test_index]

                    method.fit(X_train, y_train)
                    y_pred = method.predict(X_test)

                    record["accuracy"] += (
                        accuracy_score(y_true=y_test, y_pred=y_pred) / n_splits
                    )

                results.append(record)

                res_file.write(
                    "{dataset},{metric},{method},{accuracy:.5g}\n".format(**record)
                )

                res_file.flush()

In [None]:
pd.DataFrame(results).sample(20)

In [None]:
methods = [
    ("LinearSVC", LinearSVC(random_state=42)),
    ("KNeighborsClassifier", KNeighborsClassifier(n_neighbors=1, metric="precomputed")),
]

n_splits = 5

kfold = KFold(n_splits=n_splits, random_state=42, shuffle=True)

results = []

with open(RESULTS_PATH / f"classification_feature_dd_dtw.csv", "w") as res_file:
    for dataset in tqdm(files_frame[:50].sort_values("samples").itertuples()):

        y = load_y(dataset.path)

        def_path = dataset.path.replace(".csv", "")

        for name, method in methods:
            for metric in ("dtw", "fdtw", "itakura", "sakoe_chiba"):
                for a in (0.4, 0.6, 0.8):
                    record = {
                        "dataset": def_path.split("/")[-1],
                        "metric": f"dd_{metric}_{a:g}",
                        "method": name,
                        "accuracy": 0,
                    }

                    try:
                        X = np.loadtxt(f"{def_path}_{metric}.gz", delimiter=",")
                        X_der = np.loadtxt(f"{def_path}_der_{metric}.gz", delimiter=",")
                    except OSError:
                        continue

                    for train_index, test_index in kfold.split(X):
                        X_c = (1 - a) * X + a * X_der

                        X_train = X_c[train_index][:, train_index]
                        y_train = y[train_index]

                        X_test = X_c[test_index][:, train_index]
                        y_test = y[test_index]

                        method.fit(X_train, y_train)
                        y_pred = method.predict(X_test)

                        record["accuracy"] += (
                            accuracy_score(y_true=y_test, y_pred=y_pred) / n_splits
                        )

                    results.append(record)

                    res_file.write(
                        "{dataset},{metric},{method},{accuracy:.5g}\n".format(**record)
                    )

                res_file.flush()


In [None]:
np.sin(1.5), np.cos(1.5)