In [7]:
import os


import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline




from sklearn.metrics import (
    accuracy_score,
    recall_score,
    roc_auc_score,
    roc_curve,
    average_precision_score,
    f1_score,
    make_scorer,
)

from sktime.benchmarking.data import UEADataset, make_datasets
from sktime.benchmarking.evaluation import Evaluator
from sktime.benchmarking.metrics import PairwiseMetric, AggregateMetric
from sktime.benchmarking.orchestration import Orchestrator
from sktime.benchmarking.results import HDDResults
from sktime.benchmarking.strategies import TSCStrategy
from sktime.benchmarking.tasks import TSCTask
from sktime.series_as_features.model_selection import PresplitFilesCV



from sktime.classification.compose import (
    ColumnEnsembleClassifier,
    TimeSeriesForestClassifier,
)

from sktime.classification.dictionary_based import (
    IndividualBOSS,
    BOSSEnsemble,
    ContractableBOSS,
    TemporalDictionaryEnsemble,
    IndividualTDE,
    WEASEL,
    MUSE,
)

from sktime.classification.shapelet_based import (
    MrSEQLClassifier,
    ShapeletTransformClassifier,
)

from sktime.classification.interval_based import TimeSeriesForest, RandomIntervalSpectralForest


from sktime.classification.distance_based import (
    ElasticEnsemble,
    ProximityTree,
    ProximityForest,
    ProximityStump,
)

from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier


from sktime.classification.shapelet_based import ShapeletTransformClassifier


from sktime.classification.hybrid import HIVECOTEV1

In [8]:
import sktime
from sktime.utils.data_io import load_from_tsfile_to_dataframe, load_from_arff_to_dataframe

#DATA_PATH = os.path.join(os.path.dirname(sktime.__file__), "datasets/data")
DATA_PATH = os.path.join(os.path.dirname("C:\\Users\\rbabayev\\Desktop\\"), "Multivariate_ts")
#RESULTS_PATH = "results"

In [9]:
def is_multivariate(X):
    import pandas
    if type(X) == pandas.core.frame.DataFrame and len(X.shape) == 2:
        return X.shape[1] > 1
    else:
        return false

In [10]:
# list of the multivaruate datasets from UEA which are related to medical domain
# AtrialFibrillation, EigenWorms, Epilepsy, FingerMovements, HandMovementDirection, Heartbeat, 
# MotorImagery, StandWalkJump, FaceDetection, BasicMotions, ERing, SelfRegulationSCP1, SelfRegulationSCP2
# Non ts file datasets: EyesOpenShut


# (15, 2) (15,) (15, 2) (15,)
X_train, y_train = load_from_tsfile_to_dataframe(
    os.path.join(DATA_PATH, "AtrialFibrillation/AtrialFibrillation_TRAIN.ts")
)
X_test, y_test = load_from_tsfile_to_dataframe(
    os.path.join(DATA_PATH, "AtrialFibrillation/AtrialFibrillation_TEST.ts")
)


# # (30, 4) (30,) (270, 4) (270,) length of the series is 65
# X_train, y_train = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "ERing/ERing_TRAIN.ts")
# )
# X_test, y_test = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "ERing/ERing_TEST.ts")
# )


# # (268, 6) (268,) (293, 6) (293,) length of the series is 896
# X_train, y_train = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "SelfRegulationSCP1/SelfRegulationSCP1_TRAIN.ts")
# )
# X_test, y_test = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "SelfRegulationSCP1/SelfRegulationSCP1_TEST.ts")
# )



# # (200, 7) (200,) (180, 7) (180,) length of the series is 1152
# X_train, y_train = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "SelfRegulationSCP2/SelfRegulationSCP2_TRAIN.ts")
# )
# X_test, y_test = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "SelfRegulationSCP2/SelfRegulationSCP2_TEST.ts")
# )



print("Multivariate dataset -> ", is_multivariate(X_train))
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
print(X_train.shape)

Multivariate dataset ->  True
(15, 2) (15,) (15, 2) (15,)
(15, 2)


In [11]:
X_train.head()

Unnamed: 0,dim_0,dim_1
0,0 -0.34086 1 -0.38038 2 -0.34580 3...,0 0.14820 1 0.13338 2 0.10868 3...
1,0 -0.11362 1 -0.07410 2 -0.05928 3...,0 -0.00988 1 -0.02470 2 -0.00494 3...
2,0 -0.2079 1 -0.1683 2 -0.1980 3 ...,0 -0.02632 1 -0.04606 2 -0.08554 3...
3,0 -0.11805 1 -0.08657 2 -0.09444 3...,0 0.03510 1 0.04680 2 0.06435 3...
4,0 -0.11362 1 -0.06422 2 -0.05928 3...,0 -0.04940 1 0.01482 2 0.03952 3...


In [12]:
# multi-class target variable
np.unique(y_train)

array(['n', 's', 't'], dtype='<U1')

In [13]:
# Column ensembling
# We can also fit one classifier for each time series column and then aggregated their predictions. 
# The interface is similar to the familiar ColumnTransformer from sklearn.

clf_list = [
#     ColumnEnsembleClassifier(
#     estimators=[
#         ("TSFC0", TimeSeriesForestClassifier(n_estimators=100, random_state=1), [0]),
#     ]
# ),
    
    ColumnEnsembleClassifier(
    estimators=[("TSF_dim_" + str(i), TimeSeriesForest(n_estimators=100, random_state=1), [i]) for i in range(X_train.shape[1])]
#         [
#         ("TSF0", TimeSeriesForest(n_estimators=100, random_state=1), [0]),
#     ]
),
    
    ColumnEnsembleClassifier(
    estimators= [("RISF_dim_" + str(i), RandomIntervalSpectralForest(n_estimators=100, random_state=1), [i]) for i in range(X_train.shape[1])]
#         [
#         ("RandomIntervalSpectralForest0", RandomIntervalSpectralForest(n_estimators=100, random_state=1), [0]),
#     ]
),
    
#     ColumnEnsembleClassifier(
#     estimators=[
#         ("BOSSEnsemble0", BOSSEnsemble(max_ensemble_size=5, random_state=1), [0]),
#     ]
# ),
#     ColumnEnsembleClassifier(
#     estimators=[
#         ("TemporalDictionaryEnsemble0", TemporalDictionaryEnsemble(n_parameter_samples=250, max_ensemble_size=50,
#                                                                    randomly_selected_params=50, random_state=1), [0])
#     ]
# ),
   ColumnEnsembleClassifier(
    estimators=[("kNN_dim_" + str(i), KNeighborsTimeSeriesClassifier(n_neighbors=1, metric="dtw"), [i]) for i in range(X_train.shape[1])]
#        [
#         ("KNeighborsTimeSeriesClassifier0", KNeighborsTimeSeriesClassifier(n_neighbors=1, metric="dtw"), [0])
#     ]
),  
    
    ColumnEnsembleClassifier(
    estimators=[("cBOSS_dim_" + str(i), ContractableBOSS(n_parameter_samples=250, max_ensemble_size=50, random_state=1), [i]) for i in range(X_train.shape[1])]
#         [
#         ("ContractableBOSS0", ContractableBOSS(n_parameter_samples=250, max_ensemble_size=50, random_state=1), [0])
#     ]
),  
    
#     ColumnEnsembleClassifier(
#     estimators=[
#         ("CanonicalIntervalForest0", CanonicalIntervalForest(n_estimators=100, att_subsample_size=8, random_state=1), [0])
#     ]
# ),  
    
    
#     ColumnEnsembleClassifier(
#     estimators=[
#         ("STC0", ShapeletTransformClassifier(time_contract_in_mins=1, random_state=1), [0])
#     ]
# ),  
    
    ColumnEnsembleClassifier(
    estimators=[("WSL_dim_" + str(i), WEASEL(binning_strategy="equi-depth", anova=False, random_state=1), [i]) for i in range(X_train.shape[1])]
#         [
#         ("WSL0", WEASEL(binning_strategy="equi-depth", anova=False, random_state=1), [0])
#     ]
),  
    
#     ColumnEnsembleClassifier(
#     estimators=[
#         ("EE0", ElasticEnsemble(random_state=1), [0])
#     ]
# ),  
    
#      ColumnEnsembleClassifier(
#     estimators=[
#         ("PF0", ProximityForest(n_estimators=100, random_state=1), [0])
#     ]
# ),  
    
    MrSEQLClassifier(),
    MUSE(random_state=1),
]


for clf in clf_list:
    print("\n-------------------------------------------")
    print(clf)
    clf.fit(X_train, y_train)
    y_test_prob = clf.predict_proba(X_test)
    y_test_pred = clf.predict(X_test) 
    print("accuracy: ", accuracy_score(y_test, y_test_pred))
    print("f1 score: ", f1_score(y_test, y_test_pred, average='macro'))
    
    if len(np.unique(y_train)) == 2:
        # for binary classification make_scorer should be used: https://github.com/scikit-learn/scikit-learn/issues/10247
        print("auroc: ", make_scorer(roc_auc_score, needs_proba=True)(clf, X_test, y_test))
    else:
        print("auroc: ", roc_auc_score(y_test, y_test_prob, average='macro', multi_class="ovo"))
        
    #print("auprc: ", average_precision_score(y_test, y_test_prob)) # multiclass format is not supported
    #print("auprc: ", make_scorer(average_precision_score, needs_proba=True)(clf, X_test, y_test)) # multiclass format is not supported
    print("recall: ", recall_score(y_test, y_test_pred, average='macro'))
    #break


-------------------------------------------
ColumnEnsembleClassifier(estimators=[('TSF_dim_0',
                                      TimeSeriesForest(n_estimators=100,
                                                       random_state=1),
                                      [0]),
                                     ('TSF_dim_1',
                                      TimeSeriesForest(n_estimators=100,
                                                       random_state=1),
                                      [1])])
accuracy:  0.26666666666666666
f1 score:  0.22377622377622375
auroc:  0.41333333333333333
recall:  0.26666666666666666

-------------------------------------------
ColumnEnsembleClassifier(estimators=[('RISF_dim_0',
                                      RandomIntervalSpectralForest(n_estimators=100,
                                                                   random_state=1),
                                      [0]),
                                     ('RISF_d