In [None]:
from pathlib import Path

import numpy as np
import pandas as pd
import sklearn.metrics as skm
import torch
from tsai.all import *
from sklearn.model_selection import train_test_split

from innovaid.dataloading import load_set

torch.set_default_device('cpu')
computer_setup()

In [None]:
dataset = load_set(Path(r"../../data/proto/samples/"))
dataset.head()

In [None]:
type_mapping = {
    "NONE": 0,
    "positive": 1,
    "negative": 2,
    "neutral": 3,
}
side_mapping = {
    "NONE": 0,
    "left": 1,
    "right": 2,
}
bdi_mapping = {
    "min": 0,
    "mild": 1,
    "moderate": 2,
    "mod_severe": 3,
}

print(type_mapping)
print(side_mapping)
print(bdi_mapping)

In [None]:
dataset["IMAGE_TYPE"] = dataset["IMAGE_TYPE"].map(type_mapping)
dataset["IMAGE_POSITION"] = dataset["IMAGE_POSITION"].map(side_mapping)
dataset["RANGE_BDI"] = dataset["RANGE_BDI"].map(bdi_mapping)
dataset["SCENE_INDEX"] = dataset["SCENE_INDEX"].astype(int)
dataset.head()

In [None]:
print("Finding unique sessions...")
sessions = list(set([x[0] for x in dataset.index.unique()]))

print("Finding max sequence length...")
max_seq_len = dataset["IMAGE_TYPE"].groupby("SESSIONID").count().max()
print(max_seq_len)

print("Splitting dataset into train and test sets...")
train_sessions, test_sessions = train_test_split(
    sessions, test_size=0.2, random_state=42
)

print("Generating train set...")
train_x = dataset.loc[train_sessions]
train_y = dataset.loc[train_sessions]

print("Generating test set...")
test_x = dataset.loc[test_sessions]
test_y = dataset.loc[test_sessions]

In [None]:
# Compact the labels to only sessionid and collapse repeated ind
train_y = train_y.reset_index().groupby("SESSIONID").first().loc[:, "RANGE_BDI"].to_numpy()
test_y = test_y.reset_index().groupby("SESSIONID").first().loc[:, "RANGE_BDI"].to_numpy()

In [None]:
train_x = train_x["IMAGE_TYPE"].loc[train_sessions, :]
test_x = test_x["IMAGE_TYPE"].loc[test_sessions, :]

In [None]:
# Convert to numpy arrays
# Number of samples, features, time steps
train_x.groupby("SESSIONID").size().to_numpy()

train_x_np = np.zeros((len(train_sessions), 1, max_seq_len))
test_x_np = np.zeros((len(test_sessions), 1, max_seq_len))

for i, session in enumerate(train_sessions):
    train_x_np[i, 0, : train_x.loc[session].shape[0]] = train_x.loc[session].to_numpy()

for i, session in enumerate(test_sessions):
    test_x_np[i, 0, : test_x.loc[session].shape[0]] = test_x.loc[session].to_numpy()

In [None]:
print(train_x_np.shape)
print(train_y.shape)

In [None]:
transforms = [None, [Categorize()]]
x, y, splits = combine_split_data(
    [train_x_np, test_x_np], [train_y, test_y]

)
ds = TSDatasets(x, y, splits=splits, tfms=transforms, inplace=True)
dls = TSDataLoaders.from_dsets(ds.train, ds.valid, bs=[64, 128], batch_tfms=[TSStandardize()], num_workers=0)

dls.show_batch(sharey=True)

In [None]:
model = InceptionTime(dls.vars, dls.c)
learn = Learner(dls, model, metrics=accuracy)
learn.save('stage0')

In [None]:
learn.load('stage0')
learn.lr_find()

In [None]:
learn.fit_one_cycle(25, lr_max=1e-3)
learn.save('stage1')

In [None]:
learn.recorder.plot_metrics()

In [None]:
learn.save_all(path='export', dls_fname='dls', model_fname='model', learner_fname='learner')