In [90]:
from aeon.datasets import load_classification
import numpy as np
from sklearn.kernel_ridge import KernelRidge
from sklearn.model_selection import train_test_split
from sklearn_extra.cluster import KMedoids
from scipy.optimize import minimize
from statsmodels.nonparametric.kernel_regression import KernelReg
from skfda.preprocessing.smoothing import KernelSmoother
from skfda.misc.hat_matrix import (
    KNeighborsHatMatrix,
    LocalLinearRegressionHatMatrix,
    NadarayaWatsonHatMatrix,
)
from skfda import FDataGrid
from scipy.spatial.distance import euclidean
from fastdtw import fastdtw

In [91]:
X, y = load_classification("Coffee")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [92]:
X_train = X_train.reshape(X_train.shape[0], -1)
y_train = y_train.reshape(y_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)
y_test = y_test.reshape(y_test.shape[0], -1)

In [100]:
X_train[0].shape

(286,)

In [119]:
def smooth(X, bandwidth):
    smoother = KernelSmoother(NadarayaWatsonHatMatrix(bandwidth=bandwidth))
    X_smooth = np.zeros(X.shape)
    for i in range(X.shape[0]):
        ts = FDataGrid(X[i])
        X_smooth[i] = smoother.fit_transform(ts).data_matrix.reshape(X.shape[1])
    return X_smooth

def get_dissimilarity_matrix(X):
    dissimilarity_matrix = np.zeros((X.shape[0], X.shape[0]))
    for i in range(X.shape[0]):
        for j in range(X.shape[0]):
            #dissimilarity_matrix[i, j] = np.linalg.norm(X[i] - X[j])
            dissimilarity_matrix[i, j], _ = fastdtw(X[i], X[j], dist=2)
    return dissimilarity_matrix

def kernel_clustering(X, lmbda, n_clusters):
    X_smooth = smooth(X, lmbda)
    dissimilarity_matrix = get_dissimilarity_matrix(X_smooth)
    kmedoids = KMedoids(n_clusters=n_clusters, metric="precomputed", random_state=42)
    kmedoids.fit(dissimilarity_matrix)
    return kmedoids.inertia_ + abs(lmbda), kmedoids.labels_, kmedoids.cluster_centers_

def optimize_lambda(X, n_clusters):
    def objective(lmbda):
        return kernel_clustering(X, lmbda, n_clusters)[0]
    
    initial_lambda = 0.2
    bounds = [(0.05, 2.0)]

    result = minimize(objective, initial_lambda, method='Nelder-Mead', bounds=bounds)
    
    return result.x[0], result.fun

In [120]:
optimal_lambda, optimal_loss = optimize_lambda(X_train, 2)

In [122]:
loss, clustering, centers = kernel_clustering(X_train, optimal_lambda, 2)

In [124]:
from sklearn.metrics.cluster import rand_score
y_train = y_train.reshape(-1)
rand_score(clustering, y_train)

0.6162790697674418