[![Fixel Algorithms](https://fixelalgorithms.co/images/CCExt.png)](https://fixelalgorithms.gitlab.io)

# AI Program

## Machine Learning - Supervised Learning - Ensemble Methods - MNIST by 1D Features

The notebook is based on [Aaron Zuspan - Classifying MNIST as 1D Signals](https://www.aazuspan.dev/blog/classifying-mnist-as-1d-signals).

> Notebook by:
> - Royi Avital RoyiAvital@fixelalgorithms.com

## Revision History

| Version | Date       | User        |Content / Changes                                                   |
|---------|------------|-------------|--------------------------------------------------------------------|
| 1.0.000 | 25/08/2025 | Royi Avital | First version                                                      |

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/FixelAlgorithmsTeam/FixelCourses/blob/master/AIProgram/2024_02/0002PointLine.ipynb)

In [None]:
# Import Packages

# General Tools
import numpy as np
import scipy as sp
import pandas as pd

# Scientific Python

# Image Processing & Computer Vision
import skimage as ski

# Machine Learning
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
import pycatch22
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from tsfresh import extract_features
from tsfresh.feature_extraction import EfficientFCParameters

# Miscellaneous
import math
from platform import python_version
import random

# Typing 
from typing import Callable, Dict, List, Optional, Tuple
from numpy.typing import NDArray

# Visualization
from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt
import seaborn as sns

# Jupyter
from IPython import get_ipython

## Notations

* <font color='red'>(**?**)</font> Question to answer interactively.
* <font color='blue'>(**!**)</font> Simple task to add code for the notebook.
* <font color='green'>(**@**)</font> Optional / Extra self practice.
* <font color='brown'>(**#**)</font> Note / Useful resource / Food for thought.

Code Notations:

```python
someVar    = 2; #<! Notation for a variable
vVector    = np.random.rand(4) #<! Notation for 1D array
mMatrix    = np.random.rand(4, 3) #<! Notation for 2D array
tTensor    = np.random.rand(4, 3, 2, 3) #<! Notation for nD array (Tensor)
tuTuple    = (1, 2, 3) #<! Notation for a tuple
lList      = [1, 2, 3] #<! Notation for a list
dDict      = {1: 3, 2: 2, 3: 1} #<! Notation for a dictionary
oObj       = MyClass() #<! Notation for an object
dfData     = pd.DataFrame() #<! Notation for a data frame
dsData     = pd.Series() #<! Notation for a series
hObj       = plt.Axes() #<! Notation for an object / handler / function handler
```

### Code Exercise

 - Single line fill

```python
valToFill = ???
```

 - Multi Line to Fill (At least one)

 ```python
 # You need to start writing
 ?????
 ```

 - Section to Fill

```python
#===========================Fill This===========================#
# 1. Explanation about what to do.
# !! Remarks to follow / take under consideration.
mX = ???

?????
#===============================================================#
```

In [None]:
# Configuration
# %matplotlib inline

# warnings.filterwarnings("ignore")

seedNum = 512
np.random.seed(seedNum)
random.seed(seedNum)

# Matplotlib default color palette
lMatPltLibclr = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
# sns.set_theme() #>! Apply SeaBorn theme
# sns.set_palette("tab10")

runInGoogleColab = 'google.colab' in str(get_ipython())

In [None]:
# Constants

FIG_SIZE_DEF    = (8, 8)
ELM_SIZE_DEF    = 50
CLASS_COLOR     = ('b', 'r')
EDGE_COLOR      = 'k'
MARKER_SIZE_DEF = 10
LINE_WIDTH_DEF  = 2

TU_MNIST_IMG_SIZE = (28, 28)

In [None]:
# Course Packages


In [None]:
# Auxiliary Functions

def ConvertDataSet( mX: NDArray, /, *, resampleRes: Optional[int] = None ) -> NDArray:
    # mX: (numSamples, 768) MNIST like Array
    numSamples = np.size(mX, 0)

    tI = np.reshape(mX, (-1, 28, 28)) #<! The last dimension is contiguous
    tI = np.transpose(tI, (1, 2, 0))

    if resampleRes is not None:
        tI = ski.transform.resize(tI, (resampleRes, resampleRes))
    
    tP = ski.transform.warp_polar(tI, channel_axis = 2) #<! Polar (360, radius)
    mP = np.sum(tP, axis = 1) #<! (360, numSamples)

    return mP.T #<! Return a matrix of (numSamples, numFeatures)

def PlotMnistImages( mX: np.ndarray, vY: np.ndarray, numRows: int, numCols: Optional[int] = None, tuImgSize: Tuple = (28, 28), randomChoice: bool = True, lClasses: Optional[List] = None, hF: Optional[plt.Figure] = None ) -> plt.Figure:

    numSamples  = mX.shape[0]
    numPx       = mX.shape[1]

    if numCols is None:
        numCols = numRows

    tFigSize = (numCols * 3, numRows * 3)

    if hF is None:
        hF, hA = plt.subplots(numRows, numCols, figsize = tFigSize)
    else:
        hA = hF.axes
    
    hA = np.atleast_1d(hA) #<! To support numImg = 1
    hA = hA.flat
    
    for kk in range(numRows * numCols):
        idx = np.random.choice(numSamples) if randomChoice else kk
        mI  = np.reshape(mX[idx, :], tuImgSize)
    
        # hA[kk].imshow(mI.clip(0, 1), cmap = 'gray')
        if len(tuImgSize) == 2:
            hA[kk].imshow(mI, cmap = 'gray')
        elif len(tuImgSize) == 3:
            hA[kk].imshow(mI)
        else:
            raise ValueError(f'The length of the image size tuple is {len(tuImgSize)} which is not supported')
        hA[kk].tick_params(axis = 'both', left = False, top = False, right = False, bottom = False, 
                           labelleft = False, labeltop = False, labelright = False, labelbottom = False)
        if lClasses is None:
            hA[kk].set_title(f'Index = {idx}, Label = {vY[idx]}')
        else:
            hA[kk].set_title(f'Index = {idx}, Label = {lClasses[vY[idx]]}')
    
    return hF

def PlotLabelsHistogram( vY: np.ndarray, hA: Optional[plt.Axes] = None, lClass: Optional[List] = None, xLabelRot: Optional[int] = None ) -> plt.Axes:

    if hA is None:
        hF, hA = plt.subplots(figsize = (8, 6))
    
    vLabels, vCounts = np.unique(vY, return_counts = True)

    hA.bar(vLabels, vCounts, width = 0.9, align = 'center')
    hA.set_title('Histogram of Classes / Labels')
    hA.set_xlabel('Class')
    hA.set_xticks(vLabels, [f'{labelVal}' for labelVal in vLabels])
    hA.set_ylabel('Count')
    if lClass is not None:
        hA.set_xticklabels(lClass)
    
    if xLabelRot is not None:
        for xLabel in hA.get_xticklabels():
            xLabel.set_rotation(xLabelRot)

    return hA

## Feature Engineering

The role of feature engineering in the Machine Learning Pipeline in transforming the data in an optimal task oriented representation.

![](https://i.imgur.com/28w7PJs.png)
<!-- ![](https://i.postimg.cc/6qtzY9YV/image.png) -->

The potential effect of _Feature Engineering_ on the quality of th process is significant. 
Specifically, in Classic _Machine Learning_, it is the most important step.

Feature Engineering processing may include:
 - Feature Generation / Extraction.  
   Generate new feature from the given data.  
   <font color='magenta'>Example</font>: Extract the Day of Week from the date.  
   <font color='magenta'>Example</font>: Calculate the _Skewness_ and _Kurtosis_ of the sample.
 - Feature Transform.  
   Apply a function to generate a different representation for the feature.  
   <font color='magenta'>Example</font>: Normalization of the features.  
   <font color='magenta'>Example</font>: Use Polar coordinates.  
   <font color='magenta'>Example</font>: Use the _Kernel Trick_.
 - Feature Selection.  
   Reducing the number of features to a smaller set of features.  
   Either selecting a sub sample of the features ("Hard") or by a combination ("Soft").  
   The "Soft" approach is often applied by a Linear / Non Linear _Dimensionality Reduction_.  
   <font color='magenta'>Example</font>: Selection by Feature Importance analysis (_Permutation Importance_).  
   <font color='magenta'>Example</font>: Apply PCA or UMAP on the data.

</br>

* <font color='brown'>(**#**)</font> Mathematically "Feature Transform" generalizes all cases.
* <font color='brown'>(**#**)</font> If some transformation is applied during training, the same transformation should be applied in test (Production).
* <font color='brown'>(**#**)</font> _Data Leakage_ is a common mistake during the feature engineering phase.

### Features for 1D Signal Classification

One way to classify different features of 1D signals would be:

 - Statistical Features  
   Treat the data as a set of values.    
   Summarize data using descriptive statistics.  
   Insensitive to the ordering of observations are included in this set.  
   <font color='magenta'>Example</font>: Mean, Variance, Skewness, Kurtosis, Percentiles, Entropy.
 - Temporal Features  
   Features analyze the changes and patterns in data over time.  
   Sensitive to the order of the samples.  
   Captures temporal correlations, trends and rate of changes.  
   <font color='magenta'>Example</font>: Mean, Variance, Skewness, Kurtosis, Percentiles.
 - Spectral Features
 - Structural Features


In [None]:
# Parameters

numSamplesTrain = 9_000
numSamplesTest  = 1_000

numImg = 3

# Features
resampleRes = 56

# Visualization
exportFig = False

## Generate Data


### The MNIST Dataset

The MNIST Data Set s the "Hello World" dataset of Machine Learning.


* <font color='red'>(**?**)</font> Will the solution of the Squared Euclidean Distance be the same as the Euclidean Distance?

In [None]:
# Generate / Load Data 

mX, vY = fetch_openml('mnist_784', version = 1, return_X_y = True, as_frame = False, parser = 'auto')
vY = vY.astype(np.int_) #<! The labels are strings, convert to integer

print(f'The features data shape: {mX.shape}')
print(f'The labels data shape: {vY.shape}')
print(f'The unique values of the labels: {np.unique(vY)}')

In [None]:
# Pre Processing

# The image is in the range {0, 1, ..., 255}
# We scale it into [0, 1]

#===========================Fill This===========================#
# 1. Scale the values into the [0, 1] range.
mX = mX / 255.0

#===============================================================#

In [None]:
# Train Test Split

#===========================Fill This===========================#
# 1. Split the data such that the Train Data has `numSamplesTrain`.
# 2. Split the data such that the Test Data has `numSamplesTest`.
# 3. The distribution of the classes must match the original data.

numClass = len(np.unique(vY))
mXTrain, mXTest, vYTrain, vYTest = train_test_split(mX, vY, test_size = numSamplesTest, train_size = numSamplesTrain, shuffle = True, stratify = vY)

#===============================================================#

print(f'The training features data shape: {mXTrain.shape}')
print(f'The training labels data shape  : {vYTrain.shape}')
print(f'The test features data shape    : {mXTest.shape}')
print(f'The test labels data shape      : {vYTest.shape}')
print(f'The unique values of the labels : {np.unique(vY)}')

### Explore the Data

In [None]:
# Plot the Data

hF = PlotMnistImages(mX, vY, numImg)

In [None]:
# Distribution of Labels

hA = PlotLabelsHistogram(vY)
plt.show()

In [None]:
# Mean Image per Class

tI = np.zeros(shape = (numClass, ) + TU_MNIST_IMG_SIZE)

for ii in range(numClass):
    vIdx = vY == ii
    vF = np.mean(mX[vIdx], axis = 0) #<! (numFeatures, )
    tI[ii] = np.reshape(vF, TU_MNIST_IMG_SIZE)

In [None]:
# Plot Mean Images

hF, vHa = plt.subplots(nrows = 1, ncols = numClass, figsize = (18, 2))
vHa = vHa.flat

for ii, hA in enumerate(vHa):
    hA.imshow(tI[ii], cmap = 'gray')
    hA.tick_params(axis = 'both', left = False, top = False, right = False, bottom = False, 
                   labelleft = False, labeltop = False, labelright = False, labelbottom = False)
    hA.set_title(f'Label {ii}')

hF.suptitle('Mean Class Image');

### Cartesian and Polar Coordinate Systems

Main motivation is to transform rotations into translations.

In [None]:
# Cartesian and Polar Coordinates for an Image

hF = plt.figure(figsize = (8, 4))

hAxCart  = hF.add_subplot(1, 2, 1)
hAxPolar = hF.add_subplot(1, 2, 2, projection = 'polar')

# Cartesian Grid
Nx, Ny = 8, 8                   # grid size
hAxCart.set_xlim(0, Nx)
hAxCart.set_ylim(Ny, 0)         # y grows downward (like images)
hAxCart.set_aspect('equal')

# Grid
for x in range(Nx + 1):
    hAxCart.axvline(x, color = '0.75', lw = 1)
for y in range(Ny + 1):
    hAxCart.axhline(y, color = '0.75', lw = 1)

# Image in (x, y) Discrete Coordinates (1 based)
# Highlight pixel (6, 3) 
px, py = 6, 3
patchRect = Rectangle((px - 1, py - 1), 1, 1, facecolor = '0.7', edgecolor = 'k')
hAxCart.add_patch(patchRect)

# Annotations
hAxCart.annotate('Pixel (6, 3)', xy = (px - 0.5, py - 0.5),
                 xytext = (px - 2.5, py + 1.3),
                 arrowprops = dict(arrowstyle = "->", lw = 1.2),
                 ha = 'center', va = 'center')

hAxCart.set_title('Cartesian Coordinates')
hAxCart.set_xlabel('x')
hAxCart.set_xticks(range(Nx + 1))
hAxCart.set_xticklabels([])
hAxCart.set_ylabel('y')
hAxCart.set_yticks(range(Ny + 1))
hAxCart.set_yticklabels([])

# Polar Grid
nr, nt = 6, 24 #<! Radial and Angular Grid resolution
R = nr         #<! Outer radius
hAxPolar.set_ylim(0, R)
hAxPolar.set_thetalim(0, 2 * math.pi)

# Grid: radial circles and spokes
hAxPolar.set_rticks(range(1, nr + 1))
hAxPolar.set_thetagrids(np.degrees(np.linspace(0, 2 * math.pi, nt, endpoint = False)))
hAxPolar.grid(True, lw = 0.8, color = '0.75')

# Image in (r, θ) Discrete Coordinates (1 based)
# Highlight sector / pixel (2, 4)
ri, ti = 2, 4
dr = R / nr
dth = 2 * math.pi / nt
theta0 = (ti - 1) * dth
bottom = (ri - 1) * dr
# Use a polar bar to draw the annular sector
hAxPolar.bar(theta0, dr, width = dth, bottom = bottom, align = 'edge',
             color = '0.7', edgecolor = 'k')

hAxPolar.set_title('Polar Coordinates')
hAxPolar.annotate(f'Pixel ({ri}, {ti})',
                  xy = (theta0 + dth / 2, bottom + dr / 2),
                  xytext = (theta0 + 1.1 * dth, bottom + 2.2 * dr),
                  arrowprops = dict(arrowstyle = "->", lw = 1.2),
                  ha = 'center')

hAxPolar.set_yticklabels([]) 

hF.tight_layout()

if exportFig:
    hF.savefig('Coordinates.svg', transparent = True)

<!-- Should include the ExcaliDraw embedded in the image -->
![](https://i.imgur.com/hh5Hnhv.png)
<!-- ![](https://i.postimg.cc/9FP5HDQ9/Untitled-2025-05-03-2057-excalidraw.png) -->

### Aggregation per Angle

Summing pixels along the radial axis provides a profile of of the radial distribution around the image center.

In [None]:
# Polar and Aggregation Representation per Image

rndIdx = random.randrange(numSamplesTrain)
mI = np.reshape(mXTrain[rndIdx], TU_MNIST_IMG_SIZE)
mP = ski.transform.warp_polar(mI, output_shape = (280, 280))
vP = np.sum(mP, axis = 1)

In [None]:
# Plot Polar and Aggregation Representation per Image 

hF, vHa = plt.subplots(nrows = 1, ncols = 3, figsize = (12, 3))
vHa = vHa.flat

hA = vHa[0]
hA.imshow(mI, cmap = 'gray')
hA.tick_params(axis = 'both', left = False, top = False, right = False, bottom = False, 
               labelleft = False, labeltop = False, labelright = False, labelbottom = False)
hA.set_xlabel('x')
hA.set_ylabel('y')
hA.set_title('Cartesian Coordinates')

hA = vHa[1]
hA.imshow(mP.T[::-1], cmap = 'gray')
hA.tick_params(axis = 'both', left = False, top = False, right = False, bottom = False, 
               labelleft = False, labeltop = False, labelright = False, labelbottom = False)
hA.set_xlabel('θ')
hA.set_ylabel('r')
hA.set_title('Polar Coordinates')

hA = vHa[2]
hA.plot(vP)
hA.tick_params(axis = 'both', left = False, top = False, right = False, bottom = False, 
               labelleft = False, labeltop = False, labelright = False, labelbottom = False)
hA.set_xlabel('θ')
hA.set_ylabel('Sum of Values')
hA.set_title('Aggregation per θ');

In [None]:
# Plot Animation
mP = ski.transform.warp_polar(mI, output_shape = (360, 360))
vP = np.sum(mP, axis = 1)


hF, vHa = plt.subplots(nrows = 1, ncols = 2, figsize = (8, 4))
vHa = vHa.flat

tuCenter  = (TU_MNIST_IMG_SIZE[1] // 2, TU_MNIST_IMG_SIZE[0] // 2)
valRadius = math.sqrt(2) * max(tuCenter)

hA = vHa[0]
hA.imshow(mI, cmap = 'gray')
lineAngle, *_ = hA.plot([tuCenter[0], tuCenter[0] + valRadius], [tuCenter[1], tuCenter[1]], color = 'r', lw = 2)
hA.tick_params(axis = 'both', left = False, top = False, right = False, bottom = False, 
               labelleft = False, labeltop = False, labelright = False, labelbottom = False)
hA.set_xlabel('x')
hA.set_ylabel('y')
hA.set_xlim((0, TU_MNIST_IMG_SIZE[0] - 1))
hA.set_ylim((TU_MNIST_IMG_SIZE[1] - 1, 0))
hA.set_title(f'θ = {0:03d} [Deg]')

hA = vHa[1]
hA.plot(vP)
lineSum = hA.axvline(x = 0, color = 'r', lw = 2)
hA.set_xlim((0, 360))

figName = f'Figure{0:04d}.png'
if exportFig:
    hF.savefig(figName, dpi = 150)

for ii, θ in enumerate(range(360)):
    θRad = -math.radians(θ)  #<! Convert degrees to radians
    xEnd = tuCenter[0] + valRadius * math.cos(θRad)
    yEnd = tuCenter[1] - valRadius * math.sin(θRad) #<! Inverted as Y direction is down
    lineAngle.set_data([tuCenter[0], xEnd], [tuCenter[1], yEnd])
    lineSum.set_xdata([θ])
    vHa[0].set_title(f'θ = {θ:03d} [Deg]')
    
    hF.canvas.draw() #<! Update the canvas before exporting

    figName = f'Figure{(ii + 1):04d}.png'
    if exportFig:
        hF.savefig(figName, dpi = 150)
# ffmpeg -framerate 10 -i Figure%04d.png -c:v libx264 -pix_fmt yuv420p -an -movflags faststart -loop 1 output.mp4

### Projection Visualization

<iframe width="853" height="480" src="//sendvid.com/embed/kh2a1tox" frameborder="0" allowfullscreen></iframe>
<!-- <iframe allow="fullscreen" allowfullscreen height="480" src="https://streamable.com/e/g1g71s?" width="800" style="border:none;"></iframe> -->

### Analysis of the 1D Signals

In [None]:
# Transform Data into 1D
mPTrain = ConvertDataSet(mXTrain, resampleRes = resampleRes)
# Data Normalization
vMean = np.mean(mPTrain, axis = 0)
vStd  = np.std(mPTrain, axis = 0)
mPTrain -= vMean
mPTrain /= vStd

In [None]:
# Mean Curve per Class

mM = np.zeros(shape = (numClass, mPTrain.shape[1]))

for ii in range(numClass):
    vIdx = vYTrain == ii
    vF = np.mean(mPTrain[vIdx], axis = 0) #<! (360, )
    mM[ii] = vF

In [None]:
# Plot the Mean Curve per Class

hF, vHa = plt.subplots(nrows = 2, ncols = numClass // 2, figsize = (12, 4))
vHa = vHa.flat

for ii, hA in enumerate(vHa):
    vIdx = np.flatnonzero(vYTrain == ii)
    vIdx = np.random.choice(vIdx, size = min(25, len(vIdx)), replace = False)
    mL = mPTrain[vIdx]
    hA.plot(mL.T, lw = 0.5, color = 'k', alpha = 0.3)
    hA.plot(mM[ii], lw = 2)
    hA.tick_params(axis = 'both', left = False, top = False, right = False, bottom = False, 
                   labelleft = False, labeltop = False, labelright = False, labelbottom = False)
    hA.set_title(f'Label {ii}')

hF.suptitle('Mean Class Curve');

## Feature Extraction



In [None]:
dfData = pd.DataFrame({'Mean': np.zeros(numSamplesTrain), 'STD': np.zeros(numSamplesTrain), 'Label': vYTrain})
dfData

In [None]:
# Extract the Mean and STD

def CalcMoments( mX: NDArray ) -> Tuple[NDArray, NDArray]:
    vMean = np.mean(mX, axis = 1)
    vStd  = np.std(mX, axis = 1)
    vSkew = sp.stats.skew(mX, axis = 1)
    vKurt = sp.stats.kurtosis(mX, axis = 1)

    return vMean, vStd, vSkew, vKurt

In [None]:
tuFeatures = CalcMoments(mPTrain)

In [None]:
dfData['Mean'] = tuFeatures[0]
dfData['STD']  = tuFeatures[1]
dfData['Skew'] = tuFeatures[2]
dfData['Kurt'] = tuFeatures[3]
dfData = dfData.reindex(columns = ['Mean', 'STD', 'Skew', 'Kurt', 'Label'])
dfData

In [None]:
# Pair Plots
oPairGrid = sns.pairplot(dfData, hue = 'Label', palette = 'tab10', diag_kind = 'kde', plot_kws = {'alpha': 0.5, 's': 10, 'edgecolor': 'k'}, diag_kws = {'fill': True})

In [None]:
hF, hA = plt.subplots(figsize = (8, 6))
sns.scatterplot(data = dfData, x = 'Mean', y = 'STD', hue = 'Label', ax = hA)
hA.set_title('Mean vs STD by Label')
hA.set_xlabel('Mean')
hA.set_ylabel('STD');

 - [SciKit Time](https://github.com/sktime/sktime).
 - [`tsfresh`](https://github.com/blue-yonder/tsfresh).
 - [Catch22](https://github.com/DynamicsAndNeuralSystems/catch22) ([`pycatch22`](https://github.com/DynamicsAndNeuralSystems/pycatch22)).
 - [`tsflex`](https://github.com/predict-idlab/tsflex).
 - [Time Series Feature Extraction Library (`tsfel`)](https://github.com/fraunhoferportugal/tsfel).
 - [Cesium](https://github.com/cesium-ml/cesium).
 - [Facebook Kats](https://github.com/facebookresearch/Kats).

In [None]:
def ExtractFeaturesTSFresh( mX: NDArray, /, *, dFeatureCalculator: Dict = EfficientFCParameters(), numProc: int = 0, showProgress: bool = True ) -> pd.DataFrame:
    """
    Extract tsfresh features from a (numSamples, numValues) array using EfficientFCParameters.

    Parameters
    ----------
    mX : np.ndarray
        Univariate time series arranged as rows (numSamples, numValues).
    dFeatureCalculator : dict or None
        tsfresh feature calculator dict. If None, uses EfficientFCParameters().
    numProc : int
        Parallel workers for tsfresh. Use 0/None for default, or -1 for all cores.
    showProgress : bool
        Whether to show tsfresh's progress bar.

    Returns
    -------
    features_df : pd.DataFrame
        Index = sample id (0..num_samples-1). Columns = extracted features.
        Column names follow tsfresh's "value__feature__param" convention.
    """
    if np.ndim(mX) != 2:
        raise ValueError("`mX` must be 2D with shape (numSamples, numValues).")

    numSamples = np.size(mX, 0)
    numValues = np.size(mX, 1)

    numSamples, numValues = mX.shape

    # Build long format DataFrame expected by tsfresh: [id, time, value]
    vId = np.repeat(np.arange(numSamples), numValues) #<! Time Series identifier
    vT  = np.tile(np.arange(numValues), numSamples)   #<! Time Series time indices
    vV  = mX.reshape(-1)                              #<! Time Series values

    dfLong = pd.DataFrame({'id': vId.astype(int), 'time': vT.astype(int), 'value': vV})

    # Extract features
    dfF = extract_features(
        dfLong,
        column_id = 'id',
        column_sort = 'time',
        default_fc_parameters = dFeatureCalculator,
        n_jobs = numProc,
        disable_progressbar = not showProgress,
    )

    # Ensure rows are ordered by sample id 0..num_samples-1
    dfF = dfF.sort_index()
    
    return dfF

In [None]:
dFF = ExtractFeaturesTSFresh(mPTrain, numProc = 8)

In [None]:
aa = EfficientFCParameters()

In [None]:
dFeatureCalculator = {
    'abs_energy': None, 'absolute_maximum': None, 'absolute_sum_of_changes': None, 
    'benford_correlation': None, 'binned_entropy': [{'max_bins': 10}], 'c3': [{'lag': 1}, {'lag': 3}],
    'change_quantiles': [{'ql': 0.05, 'qh': 0.95, 'isabs': False, 'f_agg': 'mean'}, {'ql': 0.05, 'qh': 0.95, 'isabs': False, 'f_agg': 'var'}],
    'cid_ce': [{'normalize': True}], 'count_above_mean': None, 'count_below_mean': None, 'first_location_of_maximum': None,
    'first_location_of_minimum': None, 'fourier_entropy': [{'bins': 20}], 'kurtosis': None, 'large_standard_deviation': [{'r': 0.25}, {'r': 0.55}],
    'last_location_of_maximum': None, 'last_location_of_minimum': None, 'longest_strike_above_mean': None,
    'lempel_ziv_complexity': [{'bins': 5}, {'bins': 10}, {'bins': 10}], 
    'linear_trend': [{'attr': 'pvalue'}, {'attr': 'rvalue'}, {'attr': 'intercept'}, {'attr': 'slope'}, {'attr': 'stderr'}],
    'longest_strike_above_mean': None, 'longest_strike_below_mean': None, 'maximum': None, 'mean': None, 'mean_abs_change': None,
    'mean_change': None, 'mean_n_absolute_max': [{'number_of_maxima': 5}, {'number_of_maxima': 10}], 'mean_second_derivative_central': None,
    'median': None, 'minimum': None, 'number_crossing_m': [{'m': 0}], 'number_cwt_peaks': [{'n': 1}, {'n': 5}], 
    'quantile': [{'q': 0.1}, {'q': 0.25}, {'q': 0.5}, {'q': 0.75}, {'q': 0.9}], 'sample_entropy': None, 'skewness': None,
    'spkt_welch_density': [{'coeff': 2}, {'coeff': 5}, {'coeff': 8}], 'standard_deviation': None, 'sum_values': None,
    'symmetry_looking': [{'r': 0.05}, {'r': 0.1}, {'r': 0.25}], 'time_reversal_asymmetry_statistic': [{'lag': 1}, {'lag': 2}, {'lag': 3}],
    'variance': None, 'variation_coefficient': None,
}

In [None]:
dFF = ExtractFeaturesTSFresh(mPTrain, dFeatureCalculator = dFeatureCalculator, numProc = 8)

In [None]:
dFF

In [None]:
dFF.describe()

Look at the Standard Deviation of 'value__symmetry_looking__r_0.25'. What does it mean?

In [None]:
oCls = LGBMClassifier()

In [None]:
oCls = oCls.fit(dFF.to_numpy(), vYTrain)

In [None]:
oCls.score(dFF.to_numpy(), vYTrain)

In [None]:
# Transform Data into 1D
mPTest = ConvertDataSet(mXTest, resampleRes = resampleRes)
# Data Normalization (Using the Train Data)
mPTest -= vMean
mPTest /= vStd

In [None]:
dFFTest = ExtractFeaturesTSFresh(mPTest, dFeatureCalculator = dFeatureCalculator, numProc = 8)

In [None]:
oCls.score(dFFTest.to_numpy(), vYTest)

In [None]:
def ExtractFeaturesCatch22( mX: NDArray, /, *, shortNames: bool = True ) -> pd.DataFrame:
    """
    Extract Catch22 features from a (numSamples, numValues) array.

    Parameters
    ----------
    mX : np.ndarray
        Univariate time series arranged as rows (numSamples, numValues).
    shortNames : bool
        Whether to use short names for the features.

    Returns
    -------
    dfF : pd.DataFrame
        Index = sample id (0..num_samples-1). Columns = extracted features.
        Column names follow Catch22's "value__feature__param" convention.
    """
    if np.ndim(mX) != 2:
        raise ValueError("`mX` must be 2D with shape (numSamples, numValues).")

    numSamples = np.size(mX, 0)
    numValues  = np.size(mX, 1)

    # Get names
    dCatch22 = pycatch22.catch22_all(mX[0], catch24 = True, short_names = True)
    # Calculate features (Should be done in parallel for large )
    lFeat = [pycatch22.catch22_all(mX[ii], catch24 = True, short_names = shortNames)['values'] for ii in range(numSamples)]

    # Extract Catch22 features
    dfF = pd.DataFrame(lFeat)
    dfF.columns = dCatch22['short_names'] if shortNames else dCatch22['names']
    dfF.index = np.arange(numSamples)

    return dfF

In [None]:
dfF = ExtractFeaturesCatch22(mPTrain)

In [None]:
dfF

In [None]:
oCls = oCls.fit(dfF, vYTrain)
oCls.score(dfF, vYTrain)

In [None]:
dfF = ExtractFeaturesCatch22(mPTest)
oCls.score(dfF, vYTest)