In [2]:
import numpy as np
import pandas as pd

# Increase display width
pd.set_option("display.width", 1000)

from sktime.datasets import load_italy_power_demand

# load an example time series panel in pd-multiindex mtype
X, _ = load_italy_power_demand(return_type="pd-multiindex")

# renaming columns for illustrative purposes
X.columns = ["total_power_demand"]
X.index.names = ["day_ID", "hour_of_day"]

X

Unnamed: 0_level_0,Unnamed: 1_level_0,total_power_demand
day_ID,hour_of_day,Unnamed: 2_level_1
0,0,-0.710518
0,1,-1.183320
0,2,-1.372442
0,3,-1.593083
0,4,-1.467002
...,...,...
1095,19,0.180490
1095,20,-0.094058
1095,21,0.729587
1095,22,0.210995


In [3]:
from sktime.datasets import load_basic_motions

# load an example time series panel in pd-multiindex mtype
X, _ = load_basic_motions(return_type="pd-multiindex")

# renaming columns for illustrative purposes
X.columns = ["accel_1", "accel_2", "accel_3", "gyro_1", "gyro_2", "gyro_3"]
X.index.names = ["trial_no", "timepoint"]
X

Unnamed: 0_level_0,Unnamed: 1_level_0,accel_1,accel_2,accel_3,gyro_1,gyro_2,gyro_3
trial_no,timepoint,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0,0.079106,0.394032,0.551444,0.351565,0.023970,0.633883
0,1,0.079106,0.394032,0.551444,0.351565,0.023970,0.633883
0,2,-0.903497,-3.666397,-0.282844,-0.095881,-0.319605,0.972131
0,3,1.116125,-0.656101,0.333118,1.624657,-0.569962,1.209171
0,4,1.638200,1.405135,0.393875,1.187864,-0.271664,1.739182
...,...,...,...,...,...,...,...
79,95,28.459024,-16.633770,3.631869,8.978229,-3.611533,-1.491489
79,96,10.260094,0.102775,1.269261,-1.645964,-3.377157,1.283746
79,97,4.316471,-3.574319,2.063831,-1.717875,-1.843054,0.484734
79,98,0.704446,-4.920444,2.851857,-2.982977,-0.809665,-0.721774


In [4]:
from sktime.datasets import load_italy_power_demand

# load an example time series panel in numpy mtype
X, _ = load_italy_power_demand(return_type="numpy3D")
X

array([[[-0.71051757, -1.1833204 , -1.3724416 , ...,  0.58181015,
          0.1720477 , -0.26923494]],

       [[-0.99300935, -1.4267865 , -1.5798843 , ...,  0.69106647,
         -0.04890624, -0.38061813]],

       [[ 1.3190669 ,  0.56977448,  0.19512825, ...,  2.3493441 ,
          2.2556825 ,  1.6000516 ]],

       ...,

       [[-1.2627064 , -1.4497741 , -1.5199244 , ...,  0.81842084,
          0.28060142, -0.14030072]],

       [[ 0.71432123,  0.62902914,  0.11727662, ...,  2.420163  ,
          2.1642867 ,  1.9084104 ]],

       [[-0.58214415, -1.1312408 , -1.4973052 , ...,  0.72958677,
          0.21099547, -0.00254212]]])

In [6]:
# steps 1, 2 - prepare osuleaf dataset (train and new)
from sktime.datasets import load_italy_power_demand

X_train, y_train = load_italy_power_demand(split="train", return_type="numpy3D")
X_new, _ = load_italy_power_demand(split="test", return_type="numpy3D")

X_train.shape


(67, 1, 24)

In [8]:
# step 3 - specify the classifier
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

# example 1 - 3-NN with simple dynamic time warping distance (requires numba)
clf = KNeighborsTimeSeriesClassifier(n_neighbors=3)

# example 2 - custom distance:
# 3-nearest neighbour classifier with Euclidean distance (on flattened time series)
# (requires scipy)
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sktime.dists_kernels import FlatDist, ScipyDist

eucl_dist = FlatDist(ScipyDist())
clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, distance=eucl_dist)

# all classifiers is scikit-learn / scikit-base compatible!
# nested parameter interface via get_params, set_params
clf.get_params()

{'algorithm': 'brute',
 'distance': FlatDist(transformer=ScipyDist()),
 'distance_mtype': None,
 'distance_params': None,
 'leaf_size': 30,
 'n_jobs': None,
 'n_neighbors': 3,
 'pass_train_distances': False,
 'weights': 'uniform',
 'distance__transformer': ScipyDist(),
 'distance__transformer__colalign': 'intersect',
 'distance__transformer__metric': 'euclidean',
 'distance__transformer__metric_kwargs': None,
 'distance__transformer__p': 2,
 'distance__transformer__var_weights': None}

In [9]:
clf.fit(X_train, y_train)
y_pred = clf.predict(X_new)
unique, counts = np.unique(y_pred, return_counts=True)
unique, counts

(array(['1', '2'], dtype='<U1'), array([510, 519]))

In [10]:
# steps 1, 2 - prepare osuleaf dataset (train and new)
from sktime.datasets import load_italy_power_demand

X_train, y_train = load_italy_power_demand(split="train", return_type="numpy3D")
X_new, _ = load_italy_power_demand(split="test", return_type="numpy3D")

# step 3 - specify the classifier
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sktime.dists_kernels import FlatDist, ScipyDist

eucl_dist = FlatDist(ScipyDist())
clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, distance=eucl_dist)

# step 4 - fit/train the classifier
clf.fit(X_train, y_train)

# step 5 - predict labels on new data
y_pred = clf.predict(X_new)
y_pred

array(['2', '2', '2', ..., '2', '2', '2'], dtype='<U1')

In [11]:
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sktime.datasets import load_italy_power_demand

# data should be split into train/test
X_train, y_train = load_italy_power_demand(split="train", return_type="numpy3D")
X_test, y_test = load_italy_power_demand(split="test", return_type="numpy3D")

# step 3-5 are the same
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sktime.dists_kernels import FlatDist, ScipyDist

eucl_dist = FlatDist(ScipyDist())
clf = KNeighborsTimeSeriesClassifier(n_neighbors=3, distance=eucl_dist)

clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

# for simplest evaluation, compare ground truth to predictions
from sklearn.metrics import accuracy_score

accuracy_score(y_test, y_pred)

# steps 1, 2 - prepare dataset (train and new)
from sktime.datasets import load_covid_3month

X_train, y_train = load_covid_3month(split="train")
y_train = y_train.astype("float")
X_new, _ = load_covid_3month(split="test")
X_new = X_new.loc[:2]  # smaller dataset for faster notebook runtime

# step 3 - specify the regressor
from sktime.regression.distance_based import KNeighborsTimeSeriesRegressor

clf = KNeighborsTimeSeriesRegressor(n_neighbors=3, distance=eucl_dist)

# step 4 - fit/train the regressor
clf.fit(X_train, y_train)

# step 5 - predict labels on new data
y_pred = clf.predict(X_new)

In [12]:
# step 1 - prepare dataset (train and new)
from sktime.datasets import load_italy_power_demand

X, _ = load_italy_power_demand(split="train", return_type="numpy3D")

# step 2 - specify the clusterer
from sktime.clustering.dbscan import TimeSeriesDBSCAN
from sktime.dists_kernels import FlatDist, ScipyDist

eucl_dist = FlatDist(ScipyDist())
clst = TimeSeriesDBSCAN(distance=eucl_dist, eps=2)

# step 3 - fit the clusterer to the data
clst.fit(X)

# step 4 - inspect the clustering
clst.get_fitted_params()

# list all classifiers in sktime
# that can classify panels of time series containing missing data
from sktime.registry import all_estimators

all_estimators(
    "classifier",
    as_dataframe=True,
    filter_tags={"capability:missing_values": True},
)

Unnamed: 0,name,object
0,BaggingClassifier,<class 'sktime.classification.ensemble._baggin...
1,DummyClassifier,<class 'sktime.classification.dummy._dummy.Dum...
2,KNeighborsTimeSeriesClassifier,<class 'sktime.classification.distance_based._...
3,SklearnClassifierPipeline,<class 'sktime.classification.compose._pipelin...
4,TimeSeriesSVC,<class 'sktime.classification.kernel_based._sv...
5,WeightedEnsembleClassifier,<class 'sktime.classification.ensemble._weight...


In [13]:
from sktime.datasets import load_italy_power_demand
from sktime.transformations.series.detrend import Detrender

# load some panel data
X, _ = load_italy_power_demand(return_type="pd-multiindex")

# specify a linear detrender
detrender = Detrender()

# detrend X by removing linear trend from each instance
X_detrended = detrender.fit_transform(X)
X_detrended

Unnamed: 0_level_0,Unnamed: 1_level_0,dim_0
Unnamed: 0_level_1,timepoints,Unnamed: 2_level_1
0,0,0.267711
0,1,-0.290155
0,2,-0.564339
0,3,-0.870044
0,4,-0.829027
...,...,...
1095,19,-0.425904
1095,20,-0.781304
1095,21,-0.038512
1095,22,-0.637956


In [14]:
# example of a series-to-primitive transformer
from sktime.transformations.series.summarize import SummaryTransformer

# specify summary transformer
summary_trafo = SummaryTransformer()

# extract summary features - one per instance in the panel
X_summaries = summary_trafo.fit_transform(X)
X_summaries

Unnamed: 0,mean,std,min,max,0.1,0.25,0.5,0.75,0.9
0,-1.041667e-09,1.0,-1.593083,1.464375,-1.372442,-0.805078,0.030207,0.936412,1.218518
1,-1.958333e-09,1.0,-1.630917,1.201393,-1.533955,-0.999388,0.384871,0.735720,1.084018
2,-1.775000e-09,1.0,-1.397118,2.349344,-1.003740,-0.741487,-0.132687,0.265374,1.515756
3,-8.541667e-10,1.0,-1.646458,1.344487,-1.476779,-0.898722,0.266022,0.776495,1.039641
4,-3.416667e-09,1.0,-1.620240,1.303502,-1.511644,-0.978061,0.405495,0.692648,1.061249
...,...,...,...,...,...,...,...,...,...
1091,-1.041667e-09,1.0,-1.817799,1.630397,-1.323058,-0.643414,0.081208,0.568453,1.390523
1092,-4.166666e-10,1.0,-1.550077,1.513605,-1.343747,-0.768526,0.075550,0.857101,1.276013
1093,4.166667e-09,1.0,-1.706992,1.052255,-1.498879,-1.139943,0.467669,0.713195,0.993797
1094,1.583333e-09,1.0,-1.673857,2.420163,-0.744173,-0.479768,-0.266538,0.159923,1.550184


In [15]:
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sktime.transformations.series.exponent import ExponentTransformer

pipe = ExponentTransformer() * KNeighborsTimeSeriesClassifier()

# this constructs a ClassifierPipeline, which is also a classifier
pipe

In [17]:
from sktime.pipeline import make_pipeline

pipe = make_pipeline(ExponentTransformer(), KNeighborsTimeSeriesClassifier())

from sktime.datasets import load_unit_test

X_train, y_train = load_unit_test(split="TRAIN")
X_test, _ = load_unit_test(split="TEST")

# this is a ClassifierPipeline with the same interface as knn-classifier
# first applies exponent transform, then knn-classifier
pipe.fit(X_train, y_train)


In [18]:
from sklearn.ensemble import RandomForestClassifier

from sktime.transformations.series.summarize import SummaryTransformer

# specify summary transformer
summary_rf = SummaryTransformer() * RandomForestClassifier()

summary_rf.fit(X_train, y_train)

In [19]:
# all transformers that guarantee that the output is equal length and equal index
from sktime.registry import all_estimators

all_estimators(
    "transformer",
    as_dataframe=True,
    filter_tags={"capability:unequal_length:removes": True},
)

Unnamed: 0,name,object
0,ClearSky,<class 'sktime.transformations.series.clear_sk...
1,IntervalSegmenter,<class 'sktime.transformations.panel.segment.I...
2,PaddingTransformer,<class 'sktime.transformations.panel.padder.Pa...
3,RandomIntervalSegmenter,<class 'sktime.transformations.panel.segment.R...
4,SlopeTransformer,<class 'sktime.transformations.panel.slope.Slo...
5,TimeBinAggregate,<class 'sktime.transformations.series.binning....
6,TruncationTransformer,<class 'sktime.transformations.panel.truncatio...


In [20]:
from sktime.datasets import load_unit_test

X_train, y_train = load_unit_test(split="TRAIN")
X_test, _ = load_unit_test(split="TEST")

from sklearn.model_selection import KFold, cross_val_score

from sktime.classification.feature_based import SummaryClassifier

clf = SummaryClassifier()

cross_val_score(clf, X_train, y=y_train, cv=KFold(n_splits=4))



array([0.6, 0.8, 0.6, 0.8])

In [21]:
from sklearn.model_selection import GridSearchCV

from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

knn = KNeighborsTimeSeriesClassifier()
param_grid = {"n_neighbors": [1, 5], "distance": ["euclidean", "dtw"]}
parameter_tuning_method = GridSearchCV(knn, param_grid, cv=KFold(n_splits=4))

parameter_tuning_method.fit(X_train, y_train)
y_pred = parameter_tuning_method.predict(X_test)