# SkTime based model

We found the SkTime package quite early on, which promised easy ways to train classifiers on time series data.
Unfortunately, we had a lot of issues in wrangling the data into a shape that SkTime would actually accept.
Finally, when we finally found a classifier that would take variable length time series, we weren't actually sure if it was training, due to the lack of training messages.

We ended up dropping this avenue for a PyTorch based alternative.

In [1]:
from pathlib import Path

import pandas as pd
from sktime.datatypes import mtype
from sktime.classification.kernel_based import TimeSeriesSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from innovaid.dataloading import load_set

In [2]:
dataset = load_set(Path(r"../../data/preprocessed/samples/"))
dataset.head()

Found 3102 files


Loading ZZY7qwXSS8STjlthBKoK: 100%|██████████| 3102/3102 [02:41<00:00, 19.26it/s]


Unnamed: 0_level_0,Unnamed: 1_level_0,IMAGE_POSITION,IMAGE_TYPE,SCENE_INDEX,RX,RY,RANGE_BDI
SESSIONID,TIME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,right,neutral,1.0,0.555453,0.448529,mild
0,1,right,neutral,1.0,0.549212,0.449412,mild
0,2,right,neutral,1.0,0.546846,0.452118,mild
0,3,right,neutral,1.0,0.544481,0.454824,mild
0,4,right,neutral,1.0,0.551084,0.471765,mild


In [3]:
type_mapping = {
    "NONE": 0,
    "positive": 1,
    "negative": 2,
    "neutral": 3,
}
side_mapping = {
    "NONE": 0,
    "left": 1,
    "right": 2,
}
bdi_mapping = {
    "min": 0,
    "mild": 1,
    "moderate": 2,
    "mod_severe": 3,
}

print(type_mapping)
print(side_mapping)
print(bdi_mapping)

{'NONE': 0, 'positive': 1, 'negative': 2, 'neutral': 3}
{'NONE': 0, 'left': 1, 'right': 2}
{'min': 0, 'mild': 1, 'moderate': 2, 'mod_severe': 3}


In [4]:
dataset["IMAGE_TYPE"] = dataset["IMAGE_TYPE"].map(type_mapping)
dataset["IMAGE_POSITION"] = dataset["IMAGE_POSITION"].map(side_mapping)
dataset["RANGE_BDI"] = dataset["RANGE_BDI"].map(bdi_mapping)
dataset["SCENE_INDEX"] = dataset["SCENE_INDEX"].astype(int)
dataset.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,IMAGE_POSITION,IMAGE_TYPE,SCENE_INDEX,RX,RY,RANGE_BDI
SESSIONID,TIME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,2,3,1,0.555453,0.448529,1
0,1,2,3,1,0.549212,0.449412,1
0,2,2,3,1,0.546846,0.452118,1
0,3,2,3,1,0.544481,0.454824,1
0,4,2,3,1,0.551084,0.471765,1


In [5]:
print("Finding unique sessions...")
sessions = list(set([x[0] for x in dataset.index.unique()]))

print("Splitting dataset into train and test sets...")
train_sessions, test_sessions = train_test_split(
    sessions, test_size=0.2, random_state=42
)

print("Generating train set...")
train_x = dataset.loc[train_sessions]
train_y = dataset.loc[train_sessions]

print("Generating test set...")
test_x = dataset.loc[test_sessions]
test_y = dataset.loc[test_sessions]

Finding unique sessions...
Splitting dataset into train and test sets...
Generating train set...
Generating test set...


In [6]:
mtype(train_x, as_scitype="Panel")
mtype(test_x, as_scitype="Panel")

'pd-multiindex'

In [7]:
# Compact the labels to only sessionid and collapse repeated ind
train_y = train_y.reset_index().groupby("SESSIONID").first().loc[:, "RANGE_BDI"].to_numpy()
test_y = test_y.reset_index().groupby("SESSIONID").first().loc[:, "RANGE_BDI"].to_numpy()

In [8]:
train_x = pd.DataFrame(train_x["IMAGE_TYPE"])
test_x = pd.DataFrame(test_x["IMAGE_TYPE"])

In [10]:
classifier = TimeSeriesSVC()
classifier.fit(train_x, train_y)

KeyboardInterrupt: 

In [11]:
y_pred = classifier.predict(test_x)

NotFittedError: This instance of TimeSeriesSVC has not been fitted yet; please call `fit` first.

In [None]:
accuracy_score(test_y, y_pred)

In [None]:
# Save the model
import pickle

with open("model.pkl", "wb") as f:
    pickle.dump(classifier, f)
