In [1]:
from pathlib import Path

from sktime.datatypes import check_is_mtype, MTYPE_REGISTER
from sktime.classification.interval_based import TimeSeriesForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from innovaid.dataloading import load_set

In [2]:
dataset = load_set(Path(r"../../data/proto/samples/"))

Found 35 files


Loading 0aa0abPoXao4AYllJqUT: : 0it [00:00, ?it/s]

Loading 0ry2PkzfUruhOlbQQVVB: : 35it [00:54,  1.57s/it]


In [3]:
dataset.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,IMAGE_POSITION,IMAGE_TYPE,SCENE_INDEX,RX,RY,RANGE_BDI
SESSIONID,TIME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,right,positive,1.0,0.722431,0.189333,min
0,1,NONE,NONE,1.0,0.720382,0.170944,min
0,2,NONE,NONE,1.0,0.718333,0.152556,min
0,3,right,positive,1.0,0.726667,0.195111,min
0,4,right,positive,1.0,0.735,0.237667,min


In [4]:
dataset["RANGE_BDI"]

SESSIONID  TIME
0          0        min
           1        min
           2        min
           3        min
           4        min
                   ... 
34         7490    mild
           7491    mild
           7492    mild
           7493    mild
           7494    mild
Name: RANGE_BDI, Length: 268007, dtype: object

In [5]:
type_mapping = {
    "NONE": 0,
    "positive": 1,
    "negative": 2,
    "neutral": 3,
}
side_mapping = {
    "NONE": 0,
    "left": 1,
    "right": 2,
}
bdi_mapping = {
    "min": 0,
    "mild": 1,
    "moderate": 2,
    "mod_severe": 3,
}

print(type_mapping)
print(side_mapping)
print(bdi_mapping)

{'NONE': 0, 'positive': 1, 'negative': 2, 'neutral': 3}
{'NONE': 0, 'left': 1, 'right': 2}
{'min': 0, 'mild': 1, 'moderate': 2, 'mod_severe': 3}


In [6]:
dataset["IMAGE_TYPE"] = dataset["IMAGE_TYPE"].map(type_mapping)
dataset["IMAGE_POSITION"] = dataset["IMAGE_POSITION"].map(side_mapping)
dataset["RANGE_BDI"] = dataset["RANGE_BDI"].map(bdi_mapping)
dataset["SCENE_INDEX"] = dataset["SCENE_INDEX"].astype(int)
dataset.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,IMAGE_POSITION,IMAGE_TYPE,SCENE_INDEX,RX,RY,RANGE_BDI
SESSIONID,TIME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,2,1,1,0.722431,0.189333,0
0,1,0,0,1,0.720382,0.170944,0
0,2,0,0,1,0.718333,0.152556,0
0,3,2,1,1,0.726667,0.195111,0
0,4,2,1,1,0.735,0.237667,0


In [7]:
dataset["IMAGE_TYPE"]

SESSIONID  TIME
0          0       1
           1       0
           2       0
           3       1
           4       1
                  ..
34         7490    2
           7491    2
           7492    2
           7493    2
           7494    2
Name: IMAGE_TYPE, Length: 268007, dtype: int64

In [8]:
print("Finding unique sessions...")
sessions = list(set([x[0] for x in dataset.index.unique()]))

print("Splitting dataset into train and test sets...")
train_sessions, test_sessions = train_test_split(
    sessions, test_size=0.2, random_state=42
)

print("Splitting train set into train and val sets...")
train_sessions, val_sessions = train_test_split(
    train_sessions, test_size=0.2, random_state=42
)

print("Generating train set...")
train_x = dataset.loc[train_sessions, "IMAGE_TYPE"]
train_y = dataset.loc[train_sessions, "RANGE_BDI"]

print("Generating val set...")
val_x = dataset.loc[val_sessions, "IMAGE_TYPE"]
val_y = dataset.loc[val_sessions, "RANGE_BDI"]

print("Generating test set...")
test_x = dataset.loc[test_sessions, "IMAGE_TYPE"]
test_y = dataset.loc[test_sessions, "RANGE_BDI"]

print(train_x.shape)

Finding unique sessions...
Splitting dataset into train and test sets...
Splitting train set into train and val sets...
Generating train set...
Generating val set...
Generating test set...
(157636,)


In [16]:
np_x = train_x.to_numpy()

# check_is_mtype(train_x.squeeze(), mtype="pd.Series")

array([3, 3, 3, ..., 0, 0, 0], dtype=int64)

In [15]:
# Compact the labels to only sessionid and collapse repeated ind
train_y.droplevel(1)

SESSIONID
31    1
31    1
31    1
31    1
31    1
     ..
4     2
4     2
4     2
4     2
4     2
Name: RANGE_BDI, Length: 157636, dtype: int64

In [11]:
classifier = TimeSeriesForestClassifier()
classifier.fit(train_x, train_y)


TypeError: X is not of a supported input data type.X must be in a supported mtype format for Panel, found <class 'pandas.core.series.Series'>Use datatypes.check_is_mtype to check conformance with specifications.

In [None]:
y_pred = classifier.predict(test_x)

In [None]:
accuracy_score(test_y, y_pred)