In [26]:
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import accuracy_score
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from dtaidistance import dtw
from scipy.spatial.distance import euclidean
from dtaidistance import dtw_visualisation as dtwvis
import seaborn as sns
import fastdtw as fastdtw
from tslearn.utils import to_time_series_dataset

In [33]:
def load_npy(filename):
    return np.load(filename)


dir_path = 'cleaned_time_series/'
len_threshold = 1280
X, y, ids = [], [], []

for file in os.listdir(dir_path):
    if os.path.splitext(file)[1] != '.npy':
        continue

    split = file.split("_")
    ids.append(split[0])  # track_id
    y.append(split[1][:-4])  # genre
    ts = load_npy(dir_path + file)

    if len(ts) > len_threshold:
        ts = ts[0:len_threshold]
    else:
        # pad = [np.mean(ts[:-5])] * (len_threshold-len(ts)) # fill by mean value of last n observations
        pad = [ts[-1]] * (len_threshold - len(ts))  # fill with last observation
        ts = np.append(ts, pad)

    X.append([ts])

X, y, ids = np.array(X), np.array(y), np.array(ids)
print(len(X))

10000


In [34]:
# Encode the class labels as integers
y = LabelEncoder().fit_transform(y)

In [35]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((8000, 1, 1280), (2000, 1, 1280), (8000,), (2000,))

In [36]:
scaler = TabularToSeriesAdaptor(MinMaxScaler(), fit_in_transform=True)
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# KNN with DTW

In [19]:
%%time
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

for i in range(15):
    knn = KNeighborsTimeSeriesClassifier(n_neighbors=i+1, distance="dtw", n_jobs=-1)
    knn.fit(X_train_scaled[:50], y_train[:50])

    y_pred = knn.predict(X_test_scaled[:50])
    print(f'Accuracy score:{accuracy_score(y_test[:50], y_pred[:50])} for n_neighbors={i+1}')
    
knn = KNeighborsTimeSeriesClassifier(n_neighbors=5, distance="dtw", n_jobs=-1)
knn.fit(X_train_scaled[:5], y_train[:5])

y_pred = knn.predict(X_test_scaled[:5])
print(accuracy_score(y_test[:5], y_pred[:5]))

Accuracy score:0.05 for n_neighbors=1
Accuracy score:0.15 for n_neighbors=2
Accuracy score:0.1 for n_neighbors=3
Accuracy score:0.1 for n_neighbors=4
Accuracy score:0.05 for n_neighbors=5
Accuracy score:0.1 for n_neighbors=6
Accuracy score:0.05 for n_neighbors=7
Accuracy score:0.0 for n_neighbors=8
Accuracy score:0.0 for n_neighbors=9
Accuracy score:0.05 for n_neighbors=10
0.2
CPU times: user 41.5 s, sys: 2.14 s, total: 43.6 s
Wall time: 44.4 s


# Shapelets

In [40]:
from tslearn.shapelets import ShapeletModel
from tslearn.shapelets import grabocka_params_to_shapelet_size_dict
from tslearn.utils import to_time_series_dataset
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tslearn.shapelets import LearningShapelets, grabocka_params_to_shapelet_size_dict
from tslearn.datasets import CachedDatasets
from tslearn.preprocessing import TimeSeriesScalerMinMax
from sklearn.preprocessing import LabelEncoder
from sktime.classification.shapelet_based import ShapeletTransformClassifier
from sklearn.metrics import classification_report

Shapelets are subsequences that can be used to represent a class. Matrix profiles make it possibile to identify these shapelets.


In [39]:
st = ShapeletTransformClassifier()

In [41]:
%%time
st.fit(X_train_scaled, y_train)
y_pred = st.predict(X_test_scaled)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           2       0.00      0.00      0.00         3
           3       0.20      0.50      0.29         2
           4       0.33      0.25      0.29         4
           5       0.00      0.00      0.00         2
           6       0.00      0.00      0.00         3
           7       0.00      0.00      0.00         6
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         3
          10       1.00      0.25      0.40         4
          11       0.16      0.75      0.26         4
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         2
          14       1.00      1.00      1.00         1
          15       0.00      0.00      0.00         4
          16       0.00      0.00      0.00         1
          17       0.00      0.00      0.00         3
          18       0.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [42]:
from sklearn.tree import DecisionTreeClassifier
from sktime.transformations.panel.shapelet_transform import RandomShapeletTransform

In [43]:
rst = RandomShapeletTransform(n_shapelet_samples=10000, max_shapelets=None, min_shapelet_length=3, max_shapelet_length=None, n_jobs=-1) #n_jobs -1 uses all processors

In [ ]:
%%time
rst.fit(X_train_scaled, y_train)

In [ ]:
%%time
shapelets_distances_train = rst.transform(X_train_scaled)
shapelets_distances_test = rst.transform(X_test_scaled)