# Cluster the data using FCUBT

In [1]:
# Load packages
import os

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
import pandas as pd
import seaborn as sns

from FDApy.clustering.fcubt import Node, FCUBT
from FDApy.misc import utils
from FDApy.preprocessing.dim_reduction.fpca import MFPCA, UFPCA
from FDApy.preprocessing.smoothing import local_polynomial
from FDApy.representation.functional_data import DenseFunctionalData
from FDApy.representation.functional_data import IrregularFunctionalData
from FDApy.representation.functional_data import MultivariateFunctionalData
from FDApy.visualization.plot import plot

from sklearn import metrics, preprocessing
from scipy.integrate import simps

color = list(mcolors.XKCD_COLORS.values())

RECORDING_NUMBER = '00'
VAR = ['x', 'y', 'xVelocity', 'yVelocity', 'xAcceleration', 'yAcceleration']

In [2]:
# Load data
data_fd = pd.read_pickle(f'./data/tracks_smooth_fd.pkl')

In [3]:
# Print data
data_fd

Multivariate functional data object with 6 functions of 310 observations.

## Perform clustering

In [4]:
root_node = Node(data_fd, is_root=True)
fcubt = FCUBT(root_node=root_node)

In [5]:
%%time
fcubt.grow(n_components=[1, 1, 1, 1, 1, 1], min_size=20, max_group=3)

CPU times: user 3min 52s, sys: 33.2 s, total: 4min 25s
Wall time: 2min 4s


In [6]:
fcubt.mapping_grow

{Node(id=(4, 0), is_root=False, is_leaf=True): 0,
 Node(id=(4, 1), is_root=False, is_leaf=True): 1,
 Node(id=(4, 2), is_root=False, is_leaf=True): 2,
 Node(id=(4, 3), is_root=False, is_leaf=True): 3,
 Node(id=(4, 4), is_root=False, is_leaf=True): 4,
 Node(id=(4, 5), is_root=False, is_leaf=True): 5,
 Node(id=(4, 6), is_root=False, is_leaf=True): 6,
 Node(id=(4, 11), is_root=False, is_leaf=True): 7,
 Node(id=(4, 14), is_root=False, is_leaf=True): 8,
 Node(id=(4, 15), is_root=False, is_leaf=True): 9,
 Node(id=(5, 15), is_root=False, is_leaf=True): 10,
 Node(id=(5, 16), is_root=False, is_leaf=True): 11,
 Node(id=(5, 17), is_root=False, is_leaf=True): 12,
 Node(id=(5, 18), is_root=False, is_leaf=True): 13,
 Node(id=(5, 19), is_root=False, is_leaf=True): 14,
 Node(id=(5, 20), is_root=False, is_leaf=True): 15,
 Node(id=(5, 21), is_root=False, is_leaf=True): 16,
 Node(id=(5, 24), is_root=False, is_leaf=True): 17,
 Node(id=(5, 26), is_root=False, is_leaf=True): 18,
 Node(id=(5, 27), is_root=Fal

In [7]:
fcubt.join(n_components=[1, 1, 1, 1, 1, 1])

In [8]:
fcubt.mapping_join

{Node(id=(4, 0), is_root=False, is_leaf=True): 0,
 Node(id=(4, 1), is_root=False, is_leaf=True): 1,
 Node(id=(4, 2), is_root=False, is_leaf=True): 2,
 Node(id=(4, 3), is_root=False, is_leaf=True): 3,
 Node(id=(4, 4), is_root=False, is_leaf=True): 4,
 Node(id=(4, 5), is_root=False, is_leaf=True): 5,
 Node(id=(4, 6), is_root=False, is_leaf=True): 6,
 Node(id=(4, 11), is_root=False, is_leaf=True): 7,
 Node(id=(4, 14), is_root=False, is_leaf=True): 8,
 Node(id=(4, 15), is_root=False, is_leaf=True): 9,
 Node(id=(5, 15), is_root=False, is_leaf=True): 10,
 Node(id=(5, 16), is_root=False, is_leaf=True): 11,
 Node(id=(5, 17), is_root=False, is_leaf=True): 12,
 Node(id=(5, 18), is_root=False, is_leaf=True): 13,
 Node(id=(5, 19), is_root=False, is_leaf=True): 14,
 Node(id=(5, 20), is_root=False, is_leaf=True): 15,
 Node(id=(5, 21), is_root=False, is_leaf=True): 16,
 Node(id=(5, 26), is_root=False, is_leaf=True): 17,
 Node(id=(6, 28), is_root=False, is_leaf=True): 18,
 Node(id=(6, 29), is_root=Fal

In [9]:
# Save the tree
pd.to_pickle(fcubt, f'./data/tree.pkl')

In [10]:
# Save clustering results
pd.to_pickle(fcubt.labels_join, f'./data/fcubt_results.pkl')