In [None]:
import pandas as pd
from openml import OpenMLDataset
from openml.datasets import get_dataset as get_openml_dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [None]:
# This does not work on headless Linux systems.
from pandasgui import show

In [None]:
# Fetch dataset and its description from OpenML. Will be cached in ${HOME}/.openml
data: OpenMLDataset = get_openml_dataset(
    dataset_id="GesturePhaseSegmentationProcessed", version=1, error_if_multiple=True, download_data=True
)

x, y, categorical_indicator, attributed_names = data.get_data(
    target=data.default_target_attribute, dataset_format="dataframe"
)

In [None]:
# Analyze it with Pandas GUI library
show(x)
# show(y)

In [None]:
y = pd.Series(LabelEncoder().fit_transform(y), index=y.index, name=y.name)

In [None]:
test_size = 0.2
validation_size = 0.1

# Split into train/valid/test according to paper
train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=test_size, random_state=1, shuffle=True)
train_x, valid_x, train_y, valid_y = train_test_split(
    train_x, train_y, test_size=validation_size / (1.0 - test_size), random_state=1, shuffle=True
)

In [None]:
from sklearn.dummy import DummyClassifier

from xtime.datasets import Dataset, DatasetMetadata, DatasetSplit
from xtime.estimators import Estimator
from xtime.ml import ClassificationTask, Feature, FeatureType, TaskType

# All features in this dataset are continuous (float64)
features = [Feature(col, FeatureType.CONTINUOUS, cardinality=int(x[col].nunique())) for col in x.columns]

dataset = Dataset(
    metadata=DatasetMetadata(
        name="gesture_phase",
        version="NA",
        features=features,
        task=ClassificationTask(type_=TaskType.MULTI_CLASS_CLASSIFICATION, num_classes=5),
    ),
    splits={
        "train": DatasetSplit(x=train_x, y=train_y),
        "valid": DatasetSplit(x=valid_x, y=valid_y),
        "test": DatasetSplit(x=test_x, y=test_y),
    },
)

estimator = Estimator()
estimator.model = DummyClassifier(strategy="prior").fit(dataset.splits["train"].x, dataset.splits["train"].y)

metrics = estimator.evaluate(dataset)
print(metrics)