In [1]:
# Load packages
import numpy as np
import pickle

from FDApy.preprocessing.dim_reduction.fpca import MFPCA
from FDApy.representation.functional_data import MultivariateFunctionalData
from FDApy.clustering.fcubt import Node, FCUBT

from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.metrics import adjusted_rand_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.utils import shuffle

In [2]:
# Load data
with open('./data/scenario_2_smooth.pkl', 'rb') as f:
    data_fd = pickle.load(f)
with open('./data/labels.pkl', 'rb') as f:
    labels = pickle.load(f)

In [3]:
# Split data into train/test set
x = np.arange(data_fd.n_obs)
np.random.shuffle(x)

data_shuffle = [data[x] for data in data_fd]
labels_shuffle = labels[x]
new_data = MultivariateFunctionalData(data_shuffle)

pct = 0.33
s = int(np.ceil((1 - pct) * new_data.n_obs))
train = MultivariateFunctionalData([data[:s] for data in new_data])
test = MultivariateFunctionalData([data[s:] for data in new_data])
labels_train = labels_shuffle[:s]
labels_test = labels_shuffle[s:]

## Perform MFPCA

In [8]:
# Do MFPCA on the data
fpca = MFPCA(n_components=[0.99, 0.99])
fpca.fit(train, method='NumInt')
    
# Compute scores
train_proj = fpca.transform(train)
test_proj = fpca.transform(test)

## Classification using GaussianProcessClassifier

In [9]:
# Fit GPC
gp = GaussianProcessClassifier(1.0 * RBF(1.0))
gp.fit(train_proj, labels_train)

GaussianProcessClassifier(kernel=1**2 * RBF(length_scale=1))

In [10]:
# Prediction for GPC
pred_gpc = gp.predict(test_proj)

## CART classification

In [11]:
# Fit CART
clf = RandomForestClassifier()
clf.fit(train_proj, labels_train)

RandomForestClassifier()

In [12]:
# Prediction for CART
pred_cart = clf.predict(test_proj)

## fCUBT clustering

In [13]:
# Initialization of the tree
root_node = Node(train, is_root=True)
fcubt = FCUBT(root_node=root_node)

In [14]:
# Grow and join the tree
fcubt.grow(n_components=[0.95, 0.95])
fcubt.join(n_components=[0.95, 0.95])

In [15]:
# Perform the prediction on the test set
pred_fcubt = fcubt.predict(test, step='join')

## Results

In [17]:
print(f'Adjusted Rand Index for FCUBT: {adjusted_rand_score(labels_test, pred_fcubt)}')
print(f'Adjusted Rand Index for GPC: {adjusted_rand_score(labels_test, pred_gpc)}')
print(f'Adjusted Rand Index for CART: {adjusted_rand_score(labels_test, pred_cart)}')

Adjusted Rand Index for FCUBT: 0.9113314232196531
Adjusted Rand Index for GPC: 0.877167367885907
Adjusted Rand Index for CART: 0.8936439496246166
