In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

In [2]:
def cm_plot(y_test, y_pred):

    cm = confusion_matrix(y_test, y_pred)
    # Normalise
    cmn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    fig, ax = plt.subplots(figsize=(5,5))
    sns.heatmap(cmn, cmap='Blues', annot=True, fmt='.2f')
    sns.set(font_scale=1.3)
    plt.title("Confusion Matrix")

    return plt.show()


In [3]:
X_train = np.load("/kaggle/input/neymark-tsc/train_x.npy")
y_train = np.load("/kaggle/input/neymark-tsc/train_y.npy")
X_test_real = np.load("/kaggle/input/neymark-tsc/test_x.npy")


In [4]:
X_train.shape

(14667, 187)

In [5]:
X_test_real.shape

(7225, 187)

In [6]:
!pip install sktime

Collecting sktime
  Downloading sktime-0.16.0-py3-none-any.whl (16.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m32.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting deprecated>=1.2.13
  Downloading Deprecated-1.2.13-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: deprecated, sktime
Successfully installed deprecated-1.2.13 sktime-0.16.0
[0m

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.33, random_state=42)

In [8]:
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sklearn.metrics import f1_score

from sklearn.model_selection import GridSearchCV
parameters = {
    "n_neighbors":(1, 2, 3),
    "weights":("uniform", "distance"),
    "algorithm":("ball_tree", "kd_tree"),
    "distance":("euclidean", "squared", "dtw")
}
estimator = KNeighborsTimeSeriesClassifier()
clf = GridSearchCV(estimator, parameters, scoring="f1_macro")
clf.fit(X_train, y_train)

best_n_neighbors = clf.best_params_["n_neighbors"]
best_weights = clf.best_params_["weights"]
best_algorithm = clf.best_params_["algorithm"]
best_distance = clf.best_params_["distance"]

KeyboardInterrupt: 

In [None]:
print(best_n_neighbors)
print(best_weights)
print(best_algorithm)
print(best_distance)

In [None]:
y_pred = classifier.predict(X_test_real)


In [None]:
# y_pred_csv = pd.DataFrame(list(range(len(y_pred))), columns=["Id"])
y_pred_csv = pd.DataFrame()
y_pred_csv["ID"] = np.array(range(len(y_pred))).astype(int).astype(str)
y_pred_csv["Answer"] = np.array(y_pred).astype(int).astype(str)
y_pred_csv.to_csv("testing_submission.csv", index=False)
y_pred_csv