In [1]:
!pip install sktime
import numpy as np
import sklearn.ensemble as ensemble
import sklearn.neighbors as neighbors
import sklearn.gaussian_process as gp
import sklearn.svm as svm
import sklearn.neural_network as nn
from sklearn.metrics import accuracy_score, confusion_matrix

import time
def readucr(filename):
    data = np.loadtxt(filename, delimiter="\t")
    y = data[:, 0]
    x = data[:, 1:]
    return x, y.astype(int)

root_url = "https://raw.githubusercontent.com/hfawaz/cd-diagram/master/FordA/"

x_train, y_train = readucr(root_url + "FordA_TRAIN.tsv")
x_test, y_test = readucr(root_url + "FordA_TEST.tsv")

y_train[y_train == -1] = 0
y_test[y_test == -1] = 0

# for the sktime interface to work, we need another dimension to indicate 
# that we have one feature 
X_train = np.expand_dims(x_train, axis = 1)
X_test = np.expand_dims(x_test, axis=1)

Collecting sktime
[?25l  Downloading https://files.pythonhosted.org/packages/be/47/1cdd9342586b1695397bc72b8602cac2f9af7a49e1b51ddb5452d23d99c9/sktime-0.6.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.7MB)
[K     |████████████████████████████████| 5.7MB 26.9MB/s 
Collecting statsmodels>=0.12.1
[?25l  Downloading https://files.pythonhosted.org/packages/da/69/8eef30a6237c54f3c0b524140e2975f4b1eea3489b45eb3339574fc8acee/statsmodels-0.12.2-cp37-cp37m-manylinux1_x86_64.whl (9.5MB)
[K     |████████████████████████████████| 9.5MB 32.8MB/s 
Collecting scikit-learn>=0.24.0
[?25l  Downloading https://files.pythonhosted.org/packages/a8/eb/a48f25c967526b66d5f1fa7a984594f0bf0a5afafa94a8c4dbc317744620/scikit_learn-0.24.2-cp37-cp37m-manylinux2010_x86_64.whl (22.3MB)
[K     |████████████████████████████████| 22.3MB 2.4MB/s 
Collecting threadpoolctl>=2.0.0
  Downloading https://files.pythonhosted.org/packages/f7/12/ec3f2e203afa394a149911729357aa48affc59c20e2c1c8297a60f33f133/thre

In [10]:
from sktime.classification.shapelet_based import ROCKETClassifier

print("Trainiere ... ROCKET")
clf = ROCKETClassifier()
clf.fit(X_train, y_train)

predictions = clf.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
conf_matrix = confusion_matrix(y_test, predictions)
print(f"Genauigkeit von ROCKET: {accuracy}")
print(conf_matrix)

Trainiere ... ROCKET
Genauigkeit von ROCKET: 0.9462121212121212
[[645  36]
 [ 35 604]]


In [6]:
# Mini Rockets
from sktime.transformations.panel.rocket import MiniRocket
from sklearn.linear_model import RidgeClassifierCV
minirocket = MiniRocket()  # by default, MiniRocket uses ~10,000 kernels
minirocket.fit(X_train)
X_train_transform = minirocket.transform(X_train)
clf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
clf.fit(X_train_transform, y_train)
X_test_transform = minirocket.transform(X_test)

predictions = clf.predict(X_test_transform)
accuracy = accuracy_score(y_test, predictions)
conf_matrix = confusion_matrix(y_test, predictions)
print(f"Genauigkeit von Mini-ROCKET: {accuracy}")
print(conf_matrix)

Genauigkeit von Mini-ROCKET: 0.946969696969697
[[651  30]
 [ 40 599]]


In [9]:
print(X_train_transform.head())

    0      1      2      3      4     ...   9991   9992  9993  9994  9995
0  0.346  0.830  0.074  0.532  0.980  ...  0.574  0.968  0.50  1.00  0.25
1  0.370  0.836  0.082  0.540  0.958  ...  0.550  0.992  1.00  1.00  0.75
2  0.340  0.864  0.058  0.540  0.970  ...  0.580  1.000  0.00  0.25  0.00
3  0.374  0.832  0.068  0.534  0.940  ...  0.564  0.964  0.50  1.00  0.00
4  0.356  0.814  0.114  0.526  0.930  ...  0.574  0.984  0.25  0.75  0.00

[5 rows x 9996 columns]
