In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


In [2]:
df = pd.read_csv('./Dataset.csv')
df.columns

Index(['Unnamed', 'X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9',
       ...
       'X170', 'X171', 'X172', 'X173', 'X174', 'X175', 'X176', 'X177', 'X178',
       'y'],
      dtype='object', length=180)

In [3]:
df.head()

Unnamed: 0,Unnamed,X1,X2,X3,X4,X5,X6,X7,X8,X9,...,X170,X171,X172,X173,X174,X175,X176,X177,X178,y
0,X21.V1.791,135,190,229,223,192,125,55,-9,-33,...,-17,-15,-31,-77,-103,-127,-116,-83,-51,4
1,X15.V1.924,386,382,356,331,320,315,307,272,244,...,164,150,146,152,157,156,154,143,129,1
2,X8.V1.1,-32,-39,-47,-37,-32,-36,-57,-73,-85,...,57,64,48,19,-12,-30,-35,-35,-36,5
3,X16.V1.60,-105,-101,-96,-92,-89,-95,-102,-100,-87,...,-82,-81,-80,-77,-85,-77,-72,-69,-65,5
4,X20.V1.54,-9,-65,-98,-102,-78,-48,-16,0,-21,...,4,2,-12,-32,-41,-65,-83,-89,-73,5


### Get features and labels

In [4]:
from sktime.datatypes._panel._convert import from_2d_array_to_nested

features = df.loc[:, "X1":'X178']
features = from_2d_array_to_nested(features)

In [5]:
def labels_transformer(label):
    return 1 if label == 1 else 0

labels = df['y'].apply(labels_transformer)

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features, labels)

In [7]:
from sktime.transformations.panel.catch22 import Catch22

ct22 = Catch22(n_jobs=4)
ct22.fit(X_train, y_train)

Catch22(n_jobs=4)

In [8]:
transformed_22_train = ct22.transform(X_train)

In [9]:
transformed_22_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
0,-6.0,-10.3,49.0,0.140449,-0.162921,11.0,47.0,455.05241,0.073631,7.566146,...,23.0,0.920904,11.0,1.513737,0.208333,0.071098,0.853659,0.219512,0.006803,0.0
1,342.300018,223.349991,19.0,0.016854,-0.168539,3.0,6.0,283737.533443,0.515418,481.433753,...,2.0,1.0,9.0,1.804044,0.75,0.110131,0.609756,0.219512,0.006371,11.0
2,-13.7,-20.65,47.0,0.359551,-0.252809,11.0,30.0,996.198983,0.073631,10.907594,...,29.0,0.937853,10.0,1.477048,0.171429,0.099171,0.804878,0.170732,0.08,0.0
3,-13.5,-23.85,28.0,0.730337,-0.449438,4.0,6.0,1900.689483,0.171806,31.678487,...,5.0,0.954802,9.0,1.730513,0.166667,0.110178,0.853659,0.195122,0.00823,10.0
4,-34.400002,-25.300001,23.0,-0.382022,-0.02809,7.0,15.0,1924.504578,0.147262,17.006455,...,9.0,0.966102,16.0,1.452505,0.7,0.09858,0.487805,0.707317,0.004614,53.0


### Transform features by MINImally RandOm Convolutional KErnel Transform

In [10]:
from sktime.transformations.panel.rocket import MiniRocket

mr = MiniRocket(num_kernels=1000, n_jobs=4)

transformed_mr_train = mr.fit_transform(X_train)

In [11]:
transformed_mr_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,914,915,916,917,918,919,920,921,922,923
0,0.02809,1.0,0.0,0.676471,1.0,0.176471,0.713483,0.0,0.410112,1.0,...,0.685393,1.0,0.505618,0.0,0.0,0.0,1.0,0.0,0.477528,1.0
1,0.460674,0.573034,0.376404,0.488235,0.488235,0.482353,0.488764,0.44382,0.483146,0.623529,...,0.522472,1.0,0.52809,1.0,0.477528,0.5,0.550562,0.0,0.494382,1.0
2,0.106742,1.0,0.0,0.552941,0.970588,0.205882,0.595506,0.0,0.331461,1.0,...,0.640449,1.0,0.685393,0.0,0.0,0.0,0.88764,1.0,0.747191,1.0
3,0.252809,1.0,0.0,0.535294,0.788235,0.347059,0.573034,0.016854,0.44382,1.0,...,0.657303,1.0,0.55618,1.0,0.044944,1.0,0.932584,0.0,0.713483,1.0
4,0.174157,1.0,0.0,0.570588,0.876471,0.329412,0.623595,0.016854,0.455056,1.0,...,0.646067,1.0,0.438202,0.0,0.016854,0.0,0.926966,1.0,0.544944,1.0


### Train RandomForestClassifier

In [12]:
from sklearn.ensemble import RandomForestClassifier

rc = RandomForestClassifier(n_estimators=1000)
rc.fit(transformed_22_train, y_train)

RandomForestClassifier(n_estimators=1000)

In [14]:
transformed_22_test = ct22.transform(X_test)

### Test accuracy

In [15]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test, rc.predict(transformed_22_test))

0.983304347826087

### Train catboost

In [16]:
from catboost import CatBoostClassifier

cb = CatBoostClassifier(iterations=50,
                        learning_rate=0.1)

cb.fit(transformed_22_train, y_train)

0:	learn: 0.4809019	total: 224ms	remaining: 11s
1:	learn: 0.3302361	total: 229ms	remaining: 5.49s
2:	learn: 0.2665176	total: 233ms	remaining: 3.65s
3:	learn: 0.2228918	total: 237ms	remaining: 2.72s
4:	learn: 0.1686958	total: 241ms	remaining: 2.17s
5:	learn: 0.1382185	total: 246ms	remaining: 1.8s
6:	learn: 0.1093829	total: 249ms	remaining: 1.53s
7:	learn: 0.0980791	total: 254ms	remaining: 1.33s
8:	learn: 0.0852446	total: 258ms	remaining: 1.18s
9:	learn: 0.0763056	total: 263ms	remaining: 1.05s
10:	learn: 0.0689818	total: 268ms	remaining: 949ms
11:	learn: 0.0621849	total: 272ms	remaining: 860ms
12:	learn: 0.0573800	total: 276ms	remaining: 787ms
13:	learn: 0.0549828	total: 281ms	remaining: 723ms
14:	learn: 0.0528779	total: 285ms	remaining: 666ms
15:	learn: 0.0498300	total: 290ms	remaining: 616ms
16:	learn: 0.0468018	total: 294ms	remaining: 571ms
17:	learn: 0.0452882	total: 298ms	remaining: 530ms
18:	learn: 0.0434052	total: 303ms	remaining: 495ms
19:	learn: 0.0420770	total: 308ms	remaining:

<catboost.core.CatBoostClassifier at 0x194dbeb9460>

In [17]:
accuracy_score(y_test, cb.predict(transformed_22_test))

0.9829565217391304

In [19]:
from tensorflow import keras

def make_model(input_shape):
    input_layer = keras.layers.Input(input_shape)
    
    dense_layer = keras.layers.Dense(256, activation="relu")(input_layer)
    
    dense_layer = keras.layers.Dense(128, activation="relu")(dense_layer)

    dense_layer = keras.layers.Dense(32, activation="relu")(dense_layer)
    
    output_layer = keras.layers.Dense(2, activation="softmax")(dense_layer)

    return keras.models.Model(inputs=input_layer, outputs=output_layer)

model = make_model(input_shape=transformed_22_train.iloc[1].shape)

In [20]:
epochs = 200
batch_size = 32

callbacks = [
    keras.callbacks.ModelCheckpoint(
        "best_model.h5", save_best_only=True, monitor="val_loss"
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=20, min_lr=0.0001
    ),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, verbose=1),
]
model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)

history = model.fit(
    transformed_22_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=callbacks,
    validation_split=0.2,
    verbose=1,
)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [21]:
model = keras.models.load_model("best_model.h5")

test_loss, test_acc = model.evaluate(transformed_22_test, y_test)

print("Test accuracy", test_acc)
print("Test loss", test_loss)

Test accuracy 0.9537391066551208
Test loss 2.7180848121643066


### Use MiniRocket transformed dataset

In [22]:
transformed_mr_test = mr.transform(X_test)

In [24]:
rfc = RandomForestClassifier(n_estimators=1000)

rfc.fit(transformed_mr_train, y_train)

RandomForestClassifier(n_estimators=1000)

In [26]:
accuracy_score(y_test, rfc.predict(transformed_mr_test))

0.9853913043478261

In [29]:
cb2 = CatBoostClassifier(iterations=50,
                         learning_rate=0.1)

cb2.fit(transformed_mr_train, y_train)

0:	learn: 0.4619851	total: 36.3ms	remaining: 1.78s
1:	learn: 0.3477541	total: 78.2ms	remaining: 1.88s
2:	learn: 0.2584965	total: 110ms	remaining: 1.72s
3:	learn: 0.1870093	total: 142ms	remaining: 1.64s
4:	learn: 0.1448217	total: 177ms	remaining: 1.59s
5:	learn: 0.1164832	total: 212ms	remaining: 1.55s
6:	learn: 0.0949558	total: 246ms	remaining: 1.51s
7:	learn: 0.0812881	total: 283ms	remaining: 1.48s
8:	learn: 0.0720234	total: 322ms	remaining: 1.46s
9:	learn: 0.0640147	total: 357ms	remaining: 1.43s
10:	learn: 0.0570303	total: 392ms	remaining: 1.39s
11:	learn: 0.0516095	total: 426ms	remaining: 1.35s
12:	learn: 0.0479761	total: 459ms	remaining: 1.31s
13:	learn: 0.0443297	total: 495ms	remaining: 1.27s
14:	learn: 0.0413124	total: 530ms	remaining: 1.24s
15:	learn: 0.0394240	total: 565ms	remaining: 1.2s
16:	learn: 0.0375479	total: 612ms	remaining: 1.19s
17:	learn: 0.0361016	total: 643ms	remaining: 1.14s
18:	learn: 0.0351825	total: 672ms	remaining: 1.1s
19:	learn: 0.0338705	total: 703ms	remaini

<catboost.core.CatBoostClassifier at 0x194f07bfcd0>

In [30]:
accuracy_score(y_test, cb2.predict(transformed_mr_test))

0.983304347826087