# Perform $k$-means on the data

In [1]:
# Load packages
import numpy as np
import pickle

from FDApy.representation.functional_data import DenseFunctionalData
from FDApy.representation.functional_data import MultivariateFunctionalData

from skfda import FDataGrid
from skfda.ml.clustering import KMeans

In [2]:
# Load data
with open('./data/canadian_smooth.pkl', 'rb') as f:
    data_fd = pickle.load(f)

In [3]:
# Format data for skfda
temperature = data_fd[0].values
precipitation = data_fd[1].values

# skfda only accept data with same shape
new_prec = np.hstack([precipitation,
                      precipitation[:, -1][:, np.newaxis]])

In [4]:
# Create FDataGrid object
data_matrix = np.stack([temperature, new_prec], axis=-1)
sample_points = data_fd[0].argvals['input_dim_0']
fdata = FDataGrid(data_matrix, sample_points)

In [5]:
# Compute derivatives
fdata_derivatives = fdata.derivative(order=1)

In [6]:
# Perform k-means
res = {}
for i in np.arange(2, 9, 1):
    kmeans = KMeans(n_clusters=i)
    kmeans.fit(fdata)
    res[i] = kmeans.predict(fdata)

In [7]:
res[4]

array([3, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 1, 1, 0,
       1, 1, 1, 2, 2, 1, 3, 1, 0, 0, 0, 0, 0])

In [8]:
with open('./results/results_weather_kmeans_d1.pkl', 'wb') as f:
    pickle.dump(res, f)

In [9]:
# Perform k-means on derivatives
res_derivative = {}
for i in np.arange(2, 9, 1):
    kmeans = KMeans(n_clusters=i)
    kmeans.fit(fdata_derivatives)
    res_derivative[i] = kmeans.predict(fdata_derivatives)

In [10]:
res_derivative[4]

array([0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 3, 1, 1, 1, 0, 0, 0])

In [11]:
with open('./results/results_weather_kmeans_d2.pkl', 'wb') as f:
    pickle.dump(res_derivative, f)