# Classification - One Prototype Per Class

In this notebook we are computing the accuracy of classificaiton pipeline where we are using the Prototyped Feature DTW transformation with one prototype from each class in UCR datasets.

In [None]:
import csv
import itertools
import os
from dataclasses import dataclass
from datetime import datetime

import numpy as np
import pandas as pd
from func_timeout import FunctionTimedOut, func_timeout
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC
from tqdm.notebook import tqdm

In [None]:
import warnings

warnings.filterwarnings("ignore")

In [None]:
files = !(find ../UCRArchive_2018/ -maxdepth 2 -type f -name "*TRAIN.tsv" -exec ls -al {} \; | sort -k 5 -n | sed 's/ \+/\t/g' | cut -f 9)

In [None]:
@dataclass
class FileNames:

    name: str

    train_file: str
    test_file: str

    train_dtw: str
    train_fastdtw: str

    test_dtw: str
    test_fastdtw: str

In [None]:
sort_files = []

for file_name in tqdm(files):
    name = file_name.split("/")[-1].replace("_TRAIN.tsv", "")
    test_file = file_name.replace("TRAIN.tsv", "TEST.tsv")

    train_dtw = file_name.replace(".tsv", "_train_dtw.csv")
    train_fastdtw = file_name.replace(".tsv", "_train_fastdtw.csv")

    test_dtw = test_file.replace(".tsv", "_train_dtw.csv")
    test_fastdtw = test_file.replace(".tsv", "_train_fastdtw.csv")

    if not all(
        [
            os.path.exists(x)
            for x in (
                train_dtw,
                train_fastdtw,
                test_dtw,
                test_fastdtw,
            )
        ]
    ):
        continue

    fl = FileNames(
        name=name,
        train_file=file_name,
        test_file=test_file,
        train_dtw=train_dtw,
        train_fastdtw=train_fastdtw,
        test_dtw=test_dtw,
        test_fastdtw=test_fastdtw,
    )

    frame = pd.read_csv(file_name, delimiter="\t", header=None)
    frame_test = pd.read_csv(test_file, delimiter="\t", header=None)
    sort_files.append([frame.shape[0] + frame_test.shape[0], frame.shape[0], fl])

In [None]:
sort_files = sorted(sort_files, key=lambda x: x[0] * x[1])

In [None]:
sort_files

In [None]:
np.random.seed(42)

with open(
    f"../logs/classification-class-{datetime.now().isoformat()}.csv", "w"
) as out_file:
    writer = csv.writer(out_file, delimiter=",")
    writer.writerow(
        [
            "dataset",
            "1NN_dtw",
            "1NN_fastdtw",
            "one_from_class",
        ]
    )
    for n_samples, n_len, file_name in tqdm(sort_files):

        name = file_name.name

        row = [name, n_samples]

        train_frame = pd.read_csv(file_name.train_file, delimiter="\t", header=None)
        test_frame = pd.read_csv(file_name.test_file, delimiter="\t", header=None)

        y_train = train_frame[0].values
        y_test = test_frame[0].values

        _, class_proto_index = np.unique(y_train, return_index=True)

        train_dtw = pd.read_csv(file_name.train_dtw, delimiter=",", header=None)
        train_fastdtw = pd.read_csv(file_name.train_fastdtw, delimiter=",", header=None)

        test_dtw = pd.read_csv(file_name.test_dtw, delimiter=",", header=None)
        test_fastdtw = pd.read_csv(file_name.test_fastdtw, delimiter=",", header=None)

        row.append(
            accuracy_score(
                y_pred=y_train[np.argmin(test_dtw.values, axis=1)], y_true=y_test
            )
        )

        row.append(
            accuracy_score(
                y_pred=y_train[np.argmin(test_fastdtw.values, axis=1)], y_true=y_test
            )
        )

        acc_r = []
        train_shape = train_fastdtw.shape
        try:

            X_train = train_fastdtw.values[:, class_proto_index]
            X_test = test_fastdtw.values[:, class_proto_index]

            svc = LinearSVC(random_state=42, max_iter=1000)
            func_timeout(600, svc.fit, args=(X_train, y_train))
            predicted = func_timeout(600, svc.predict, args=(X_test,))

            row.append(accuracy_score(y_true=y_test, y_pred=predicted))
        except FunctionTimedOut:
            continue

        print(f"{row[0]} {row[2]:.3f} {row[3]:.3f} {row[4]:.3f}")
        writer.writerow(row)
        out_file.flush()