In [1]:
# package imports
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sktime.classification.kernel_based import RocketClassifier
from sklearn.metrics import accuracy_score
from sktime.dists_kernels import FlatDist, ScipyDist
from sklearn.model_selection import GridSearchCV

In [2]:
# load the data
data = np.load('data/ts_challenge.npz')

# separate the features from the labels
X = data['arr_0']
y = data['arr_1']

test_data = np.load('data/ts_test.npz')
X_test = test_data['arr_0']

In [35]:
# train a baseline KNN classifier 

# train test split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=12)

# iniciate the model 
eucl_dist = FlatDist(ScipyDist())
knn = KNeighborsTimeSeriesClassifier(n_neighbors=5, distance=eucl_dist)

# fit and predict
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_val)
print(f'Accuracy of KNN (k=5):{round(accuracy_score(y_val, knn_pred),2)}')

Accuracy of KNN (k=5):0.79


In [37]:
# use GridSearch to tune the number of neighbours 
knn = KNeighborsTimeSeriesClassifier(distance=eucl_dist)

grid = GridSearchCV(
    estimator=knn,
    param_grid={'n_neighbors':[5, 8, 12]},
    cv=5
)

grid.fit(X,y)
print('best params:', grid.best_params_)

# fit a model with the selected k and test the accuracy 

# fit the model with selected param and evaluate 
knn_h = KNeighborsTimeSeriesClassifier(
    n_neighbors=grid.best_params_['n_neighbors'], 
    distance=eucl_dist
)
cross_h = cross_val_score(
    estimator=knn_h,
    X=X,
    y=y
)

print('mean accuracy:', round(np.mean(cross_h),2))

best params: {'n_neighbors': 12}
mean accuracy: 0.71


In [22]:
knn_h = KNeighborsTimeSeriesClassifier(
    n_neighbors=4, 
    distance=eucl_dist
)
knn_h.fit(X_train, y_train)
preds = knn_h.predict(X_val)
print(preds)
print('mean accuracy:', accuracy_score(y_val, preds))

[0 1 0 0 0 1 0 1 0 1 1 1 1 0 0 0 1 0 0 0 0 1 0 0]
mean accuracy: 0.7916666666666666


In [154]:
# train and tune a ROCKET classifier

# train test split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=11)

# fitting and predicting
roc = RocketClassifier()

roc.fit(X_train, y_train)
roc_pred = roc.predict(X_val)

print('ROCKET accuracy:', round(accuracy_score(y_val, roc_pred),2))

ROCKET accuracy: 0.75


In [149]:
# use GridSearch to tune the params
params = {
    'num_kernels':[5000,10000],
    'max_dilations_per_kernel':[16,24,32],
    'n_features_per_kernel':[2,4]
}

# initialize a new model instance
roc = RocketClassifier()

# start the grid search
grid = GridSearchCV(
    estimator=roc,
    param_grid=params,
    cv=3
)

grid.fit(X,y)
print('best params for ROCKET:', grid.best_params_)

best params for ROCKET: {'max_dilations_per_kernel': 16, 'n_features_per_kernel': 4, 'num_kernels': 10000}


In [155]:
#fit a new instance of the model with the best params selected
roc = RocketClassifier(
    num_kernels=10000,
    max_dilations_per_kernel=16,
    n_features_per_kernel=4
)
# fit to the combined train and val sets
roc.fit(X, y)

# predict on the hold out test  
roc_pred = roc.predict(X_test)

# save the preds to a csv file 
np.savetxt('data/ypred.csv', roc_pred, delimiter=',')