# Convert the data into functional data format

In [1]:
# Load packages
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from FDApy.misc import utils
from FDApy.preprocessing.smoothing.bandwidth import Bandwidth
from FDApy.representation.functional_data import DenseFunctionalData
from FDApy.representation.functional_data import IrregularFunctionalData
from FDApy.representation.functional_data import MultivariateFunctionalData
from FDApy.visualization.plot import plot

RECORDING_NUMBER = '00'

In [2]:
# Load data
tracks = pd.read_pickle(f'./data/tracks_sub.pkl')
info = pd.read_pickle(f'./data/info.pkl')
meta = pd.read_pickle(f'./data/meta.pkl')

The different keys in the dictionnary data are: `recordingId`, `trackId`, `xCenter`, `yCenter`, `xVelocity`, `yVelocity`, `xAcceleration`, `yAcceleration`, ...

In [4]:
# Function to retrieve particular key
def get_key(l, key_name):
    """
    This method subset a list of dictionnary using a particular key.
    :param l: a list of dictionnary to subset.
    :param key_name: the name of the key to retrieve as string.
    :return: a list with the key as numpy array. 
    """
    return [x[key_name] for x in l]

In [5]:
x = get_key(tracks, 'xCenter')
y = get_key(tracks, 'yCenter')
xVelocity = get_key(tracks, 'xVelocity')
yVelocity = get_key(tracks, 'yVelocity')
xAcceleration = get_key(tracks, 'xAcceleration')
yAcceleration = get_key(tracks, 'yAcceleration')

In [6]:
print(f"""The mean number of sampling points in a curve is\
 {np.mean([len(x) for x in xVelocity])}.""")

The mean number of sampling points in a curve is 382.31935483870967.


In [7]:
# Function to create IrregularFunctionalData
def create_argvals(l):
    """
    This method creates a dictionary of argvals.
    :param l: a list
    :return: a dictionary
    """
    return {idx: np.linspace(0, 1, num=len(x)) for idx, x in enumerate(l)}

def create_values(l):
    """
    This method creates a dictionary of values.
    :param l: a list
    :return: a dictionary
    """
    return {idx: x for idx, x in enumerate(l)}

In [8]:
# Create IrregularFunctionalData
x_fd = IrregularFunctionalData({'input_dim_0': create_argvals(x)}, create_values(x))
y_fd = IrregularFunctionalData({'input_dim_0': create_argvals(y)}, create_values(y))
xVelocity_fd = IrregularFunctionalData({'input_dim_0': create_argvals(xVelocity)}, create_values(xVelocity))
yVelocity_fd = IrregularFunctionalData({'input_dim_0': create_argvals(yVelocity)}, create_values(yVelocity))
xAcceleration_fd = IrregularFunctionalData({'input_dim_0': create_argvals(xAcceleration)}, create_values(xAcceleration))
yAcceleration_fd = IrregularFunctionalData({'input_dim_0': create_argvals(yAcceleration)}, create_values(yAcceleration))

## Save the data

In [15]:
%%time
# x
pd.to_pickle(x_fd, f'./data/x_fd.pkl')

# y
pd.to_pickle(y_fd, f'./data/y_fd.pkl')

# xVelocity
pd.to_pickle(xVelocity_fd, f'./data/xVelocity_fd.pkl')

# yVelocity
pd.to_pickle(yVelocity_fd, f'./data/yVelocity_fd.pkl')

# xAcceleration
pd.to_pickle(xAcceleration_fd, f'./data/xAcceleration_fd.pkl')

# yAcceleration
pd.to_pickle(yAcceleration_fd, f'./data/yAcceleration_fd.pkl')

CPU times: user 36.8 ms, sys: 28.5 ms, total: 65.3 ms
Wall time: 120 ms


## Multivariate functional data

In [16]:
# Build the multivariate functional data
data_fd = MultivariateFunctionalData([x_fd, 
                                      y_fd,
                                      xVelocity_fd,
                                      yVelocity_fd,
                                      xAcceleration_fd,
                                      yAcceleration_fd])

In [17]:
# Save the functional data object
pd.to_pickle(data_fd, f'./data/tracks_fd.pkl')