In [None]:
# built with sktime 0.5.2
# install conda environment file from environment.yml file in your command line: conda env create -f environment.yml
# download sktime through: conda install -c conda-forge sktime
# or download and install latest sktime development version using the instructions on the file "sktime installation from git.txt"

import os


import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline




from sklearn.metrics import (
    accuracy_score,
    recall_score,
    roc_auc_score,
    roc_curve,
    average_precision_score,
    f1_score,
    make_scorer
)

from sktime.benchmarking.data import UEADataset, make_datasets
from sktime.benchmarking.evaluation import Evaluator
from sktime.benchmarking.metrics import PairwiseMetric, AggregateMetric
from sktime.benchmarking.orchestration import Orchestrator
from sktime.benchmarking.results import HDDResults
from sktime.benchmarking.strategies import TSCStrategy
from sktime.benchmarking.tasks import TSCTask
from sktime.series_as_features.model_selection import PresplitFilesCV



from sktime.classification.compose import (
    ColumnEnsembleClassifier,
    TimeSeriesForestClassifier,
)

from sktime.classification.dictionary_based import (
    IndividualBOSS,
    BOSSEnsemble,
    ContractableBOSS,
    TemporalDictionaryEnsemble,
    IndividualTDE,
    WEASEL,
    MUSE,
)

from sktime.classification.shapelet_based import (
    MrSEQLClassifier,
    ShapeletTransformClassifier,
)

from sktime.classification.interval_based import TimeSeriesForest, RandomIntervalSpectralForest


from sktime.classification.distance_based import (
    ElasticEnsemble,
    ProximityTree,
    ProximityForest,
    ProximityStump,
)

from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

In [None]:
import sktime
from sktime.utils.data_io import load_from_tsfile_to_dataframe

# dowload http://www.timeseriesclassification.com/Downloads/Archives/Multivariate2018_ts.zip and extract to your desired path
# change data path to the path where Multivariate_ts folder exists
DATA_PATH = os.path.join(os.path.dirname("C:\\Users\\rbabayev\\Desktop\\"), "Multivariate_ts")
#RESULTS_PATH = "results"

In [None]:
def is_multivariate(X):
    import pandas
    if type(X) == pandas.core.frame.DataFrame and len(X.shape) == 2:
        return X.shape[1] > 1
    else:
        return false

In [None]:
# you should run test for each dataset one by one by uncommenting them

# length of the series is 207
X_train, y_train = load_from_tsfile_to_dataframe(
    os.path.join(DATA_PATH, "Epilepsy/Epilepsy_TRAIN.ts")
)
X_test, y_test = load_from_tsfile_to_dataframe(
    os.path.join(DATA_PATH, "Epilepsy/Epilepsy_TEST.ts")
)


# # length of the series is 50
# X_train, y_train = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "FingerMovements/FingerMovements_TRAIN.ts")
# )
# X_test, y_test = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "FingerMovements/FingerMovements_TEST.ts")
# )


# # length of the series is 400
# X_train, y_train = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "HandMovementDirection/HandMovementDirection_TRAIN.ts")
# )
# X_test, y_test = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "HandMovementDirection/HandMovementDirection_TEST.ts")
# )


# # length of series is 405
# X_train, y_train = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "Heartbeat/Heartbeat_TRAIN.ts")
# )
# X_test, y_test = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "Heartbeat/Heartbeat_TEST.ts")
# )




# # length of the series is 100
# X_train, y_train = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "BasicMotions/BasicMotions_TRAIN.ts")
# )
# X_test, y_test = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "BasicMotions/BasicMotions_TEST.ts")
# )



# # length of the series is 896
# X_train, y_train = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "SelfRegulationSCP1/SelfRegulationSCP1_TRAIN.ts")
# )
# X_test, y_test = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "SelfRegulationSCP1/SelfRegulationSCP1_TEST.ts")
# )


# # length of the series is 1152
# X_train, y_train = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "SelfRegulationSCP2/SelfRegulationSCP2_TRAIN.ts")
# )
# X_test, y_test = load_from_tsfile_to_dataframe(
#     os.path.join(DATA_PATH, "SelfRegulationSCP2/SelfRegulationSCP2_TEST.ts")
# )


# download http://www.timeseriesclassification.com/Downloads/EyesOpenShut.zip 
# and extract EyesOpenShut_TRAIN.arff and EyesOpenShut_TEST.arff to your desired path
# # path for multivariate datasets which are not available in the UEA and UCR zip files
# o_path = os.path.join(os.path.dirname("C:\\Users\\rbabayev\\Desktop\\"), "Other_datasets_arff\\multivariate")


# # length of the time series is 128
# X_train, y_train = load_from_arff_to_dataframe(
#     os.path.join(o_path, "EyesOpenShut/EyesOpenShut_TRAIN.arff")
# )
# X_test, y_test = load_from_arff_to_dataframe(
#     os.path.join(o_path, "EyesOpenShut/EyesOpenShut_TEST.arff")
# )




print("Multivariate dataset -> ", is_multivariate(X_train))
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
print(X_train.shape)

In [None]:
X_train.head()

In [None]:
# multi-class target variable
np.unique(y_train)

In [None]:
# Column ensembling
# We can also fit one classifier for each time series column and then aggregated their predictions. 
# The interface is similar to the familiar ColumnTransformer from sklearn.

clf_list = [
#     ColumnEnsembleClassifier(
#     estimators=[
#         ("TSFC0", TimeSeriesForestClassifier(n_estimators=100, random_state=1), [0]),
#     ]
# ),
    
    ColumnEnsembleClassifier(
    estimators=[
        ("TSF0", TimeSeriesForest(n_estimators=100, random_state=1), [0]),
    ]
),
    
    ColumnEnsembleClassifier(
    estimators=[
        ("RandomIntervalSpectralForest0", RandomIntervalSpectralForest(n_estimators=100, random_state=1), [0]),
    ]
),
    
#     ColumnEnsembleClassifier(
#     estimators=[
#         ("BOSSEnsemble0", BOSSEnsemble(max_ensemble_size=5, random_state=1), [0]),
#     ]
# ),
#     ColumnEnsembleClassifier(
#     estimators=[
#         ("TemporalDictionaryEnsemble0", TemporalDictionaryEnsemble(n_parameter_samples=250, max_ensemble_size=50,
#                                                                    randomly_selected_params=50, random_state=1), [0])
#     ]
# ),
   ColumnEnsembleClassifier(
    estimators=[
        ("KNeighborsTimeSeriesClassifier0", KNeighborsTimeSeriesClassifier(n_neighbors=1, metric="dtw"), [0])
    ]
),  
    
    ColumnEnsembleClassifier(
    estimators=[
        ("ContractableBOSS0", ContractableBOSS(n_parameter_samples=250, max_ensemble_size=50, random_state=1), [0])
    ]
),  
    
#     ColumnEnsembleClassifier(
#     estimators=[
#         ("CanonicalIntervalForest0", CanonicalIntervalForest(n_estimators=100, att_subsample_size=8, random_state=1), [0])
#     ]
# ),  
    
    
#     ColumnEnsembleClassifier(
#     estimators=[
#         ("STC0", ShapeletTransformClassifier(time_contract_in_mins=1, random_state=1), [0])
#     ]
# ),  
    
    ColumnEnsembleClassifier(
    estimators=[
        ("WSL0", WEASEL(binning_strategy="equi-depth", anova=False, random_state=1), [0])
    ]
),  
    
#     ColumnEnsembleClassifier(
#     estimators=[
#         ("EE0", ElasticEnsemble(random_state=1), [0])
#     ]
# ),  
    
#      ColumnEnsembleClassifier(
#     estimators=[
#         ("PF0", ProximityForest(n_estimators=100, random_state=1), [0])
#     ]
# ),  
    
    MrSEQLClassifier(),
    MUSE(random_state=1),
]


for clf in clf_list:
    print("\n-------------------------------------------")
    print(clf)
    clf.fit(X_train, y_train)
    y_test_prob = clf.predict_proba(X_test)
    y_test_pred = clf.predict(X_test) 
    print("accuracy: ", accuracy_score(y_test, y_test_pred))
    print("f1 score: ", f1_score(y_test, y_test_pred, average='macro'))
    
    if len(np.unique(y_train)) == 2:
        # for binary classification make_scorer should be used: https://github.com/scikit-learn/scikit-learn/issues/10247
        print("auroc: ", make_scorer(roc_auc_score, needs_proba=True)(clf, X_test, y_test))
    else:
        print("auroc: ", roc_auc_score(y_test, y_test_prob, average='macro', multi_class="ovo"))
        
    #print("auprc: ", average_precision_score(y_test, y_test_prob)) # multiclass format is not supported
    #print("auprc: ", make_scorer(average_precision_score, needs_proba=True)(clf, X_test, y_test)) # multiclass format is not supported
    print("recall: ", recall_score(y_test, y_test_pred, average='macro'))
    #break

In [None]:
# Bespoke classification algorithms
# Another approach is to use bespoke (or classifier-specific) methods for multivariate time series data.
# Here, we try out the MrSEQL algorithm in multidimensional space.

# clf = MrSEQLClassifier()


# clf.fit(X_train, y_train)
# print("accuracy: ", clf.score(X_test, y_test))
# y_test_pred = clf.predict(X_test)
# y_test_prob = clf.predict_proba(X_test)
# print("f1 score: ", f1_score(y_test, y_test_pred, average='macro'))
# print("auroc: ", roc_auc_score(y_test, y_test_prob, average='macro', multi_class="ovo"))
# #print("auprc: ", average_precision_score(y_test, y_test_prob)) # multiclass format is not supported
# print("recall: ", recall_score(y_test, y_test_pred, average='macro'))

In [None]:
# clf = MUSE(random_state=1)


# clf.fit(X_train, y_train)
# print("accuracy: ", clf.score(X_test, y_test))
# y_test_pred = clf.predict(X_test)
# y_test_prob = clf.predict_proba(X_test)
# print("f1 score: ", f1_score(y_test, y_test_pred, average='macro'))
# print("auroc: ", roc_auc_score(y_test, y_test_prob, average='macro', multi_class="ovo"))
# #print("auprc: ", average_precision_score(y_test, y_test_prob)) # multiclass format is not supported
# print("recall: ", recall_score(y_test, y_test_pred, average='macro'))