# Benchmark for signal representation 

- Multifractal analysis (1ere approche)

- Discrete Fourier Transform (DFT) $\checkmark$
- Spectrogram
- Autoregression $\checkmark$
- Shannon encoding $\checkmark$
- Wavelets (en cours)

- Local symbolic features
- SAX representation
- Approximate entropy

ML

- Autoencoder

- RNN
- LSTM



## Imports

In [1]:
import numpy as np 
import matplotlib.pyplot as plt

# import pywt

# import pymultifracs.mfa as mfa
# from pymultifracs.utils import build_q_log
# from statsmodels.tsa.ar_model import AutoReg, ar_select_order


In [2]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler


In [3]:
# %pip install import_ipynb
# %pip install  --user git+https://github.com/neurospin/pymultifracs

In [4]:
import import_ipynb
from transformations import DataTransform,TransformationRegistry, IdentityTransform, FourierTransform, LowFourierTransform, LowPsdTransform, WaveDecTransform, DwtTransform, CwtTransform, AutoRegTransform, ShannonEncodingTransform, WaveletLeadersTransform, CrossCorTransform, AutoCorTransform, MultiFracsTransform

importing Jupyter notebook from transformations.ipynb


In [5]:
# Initialize the registry
registry = TransformationRegistry()

# Register transformations
registry.register('identity', IdentityTransform)
registry.register('fourier', FourierTransform)
registry.register('low_fourier', LowFourierTransform)
registry.register('low_psd', LowPsdTransform)
registry.register('wavedec', WaveDecTransform)
registry.register('dwt', DwtTransform)
registry.register('cwt', CwtTransform)
registry.register('autoreg', AutoRegTransform)
registry.register('shannon_encoding', ShannonEncodingTransform)
registry.register('wavelet_leaders', WaveletLeadersTransform)
registry.register('multifracs', MultiFracsTransform)
registry.register('crosscor', CrossCorTransform)
registry.register('autocor', AutoCorTransform)


## Data Load

Link here [ecgs_labels.npy](https://drive.google.com/file/d/1cbUKH9qGOeIZD6Mf73plMkyXpq56mwIu/view?usp=sharing)

In [6]:
ecgs_labels = np.load('ecgs_labels.npy')

X, y = ecgs_labels[1:,:-1], ecgs_labels[1:,-1]

In [7]:
np.isnan(X).sum()

0

In [8]:
n,p = X.shape

print(f'X.shape : {n,p}')

X.shape : (1164, 65000)


In [9]:
data_transformer = DataTransform(registry,save_data=False)
for trans_names in registry.transformations.keys():
        trans_names_str = [str(name) for name in trans_names]
        trans_name_str = '+'.join(trans_names_str) if isinstance(trans_names, list) else trans_names
        kwargs = trans_names[1] if isinstance(trans_names, list) and len(trans_names) > 1 else {}
        trans_names = trans_names[0] if isinstance(trans_names, list) else trans_names
        
        # Apply transformation
        transformed_X = data_transformer.apply_transformation(np.random.randn((10*p)).reshape((10,p)), trans_names, **kwargs)
        
        print(f"Transformation: {trans_name_str}, Shape: {transformed_X.shape}")

Transformation: identity, Shape: (10, 65000)
Computating  fourier{} ...

Transformation: fourier, Shape: (10, 65000)
Transformation: low_fourier, Shape: (10, 1092)
Transformation: low_psd, Shape: (10, 1092)
Transformation: wavedec, Shape: (10, 4063)
Transformation: dwt, Shape: (10, 32500)
Transformation: cwt, Shape: (10, 2)
Transformation: autoreg, Shape: (10, 3)
Transformation: shannon_encoding, Shape: (10, 8)
Transformation: wavelet_leaders, Shape: (10, 2)
Transformation: multifracs, Shape: (10, 3)
Transformation: crosscor, Shape: (10, 10)
Transformation: autocor, Shape: (10, 26000)


In [10]:
# Initialize the data transformer
data_transformer = DataTransform(registry)

In [11]:
# Define the classifiers to be tested
classifiers = {
    'SVM': SVC(),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}

# Define the transformations to be tested
transformations = [
    # ['identity'],
    ['crosscor'],
    ['autocor', {'m':5000,'k':4}],
    ['fourier', {'new_dimension':40}],
    ['low_fourier'],
    ['low_psd'],
    ['wavedec'],
    ['autoreg', {'k': 3}],
    ['shannon_encoding'],
    ['wavelet_leaders'],
    ['multifracs'],
    ['multifracs', {'j1':1,'j2':12}],
    [['wavelet_leaders','shannon_encoding']],
    [['wavelet_leaders','multifracs']],
    [['fourier','multifracs',], {'new_dimension':40}],
    [['fourier','multifracs',], {'new_dimension':40}],
    [['fourier','multifracs','shannon_encoding'], {'new_dimension':40}],
    [['low_fourier','multifracs','autoreg'], {'k':3}],
    
]




# Function to evaluate a classifier using cross-validation
def evaluate_classifier_cv(classifier, X, y):
    scores = cross_val_score(classifier, X, y, cv=5)  # 5-fold cross-validation
    return np.mean(scores), np.std(scores)

# Loop over each transformation and each classifier
results = {}

for trans_names in transformations:
    # print()
    trans_names_str = [str(name) for name in trans_names]
    trans_name_str = '+'.join(trans_names_str) if isinstance(trans_names, list) else trans_names
    kwargs = trans_names[1] if isinstance(trans_names, list) and len(trans_names) > 1 else {}
    trans_names = trans_names[0] if isinstance(trans_names, list) else trans_names
    
    # Apply transformation
    transformed_X = data_transformer.apply_transformation(X, trans_names, **kwargs)
    print(f"Transformation: {trans_name_str}, Shape: {transformed_X.shape}" )
    # Standardize the data (important for some classifiers like SVM)
    scaler = StandardScaler()
    transformed_X = scaler.fit_transform(transformed_X)
    
    results[trans_name_str] = {}
    for clf_name, clf in classifiers.items():
        # Evaluate the classifier with cross-validation
        mean_accuracy, std_accuracy = evaluate_classifier_cv(clf, transformed_X, y)
        results[trans_name_str][clf_name] = (mean_accuracy, std_accuracy)
        print(f"Transformation: {trans_name_str}, Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")

    print()
# Print the results
for trans_name, clf_results in results.items():
    print()
    for clf_name, (mean_accuracy, std_accuracy) in clf_results.items():
        print(f"Transformation: {trans_name}, Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")
        

Transformation: crosscor, Shape: (1164, 10)
Transformation: crosscor, Classifier: SVM, Mean Accuracy: 0.733, Std Dev: 0.058
Transformation: crosscor, Classifier: DecisionTree, Mean Accuracy: 0.656, Std Dev: 0.037
Transformation: crosscor, Classifier: RandomForest, Mean Accuracy: 0.747, Std Dev: 0.041

Transformation: autocor+{'m': 5000, 'k': 4}, Shape: (1164, 52)
Transformation: autocor+{'m': 5000, 'k': 4}, Classifier: SVM, Mean Accuracy: 0.472, Std Dev: 0.009
Transformation: autocor+{'m': 5000, 'k': 4}, Classifier: DecisionTree, Mean Accuracy: 0.339, Std Dev: 0.035
Transformation: autocor+{'m': 5000, 'k': 4}, Classifier: RandomForest, Mean Accuracy: 0.451, Std Dev: 0.022

Transformation: fourier+{'new_dimension': 40}, Shape: (1164, 40)
Transformation: fourier+{'new_dimension': 40}, Classifier: SVM, Mean Accuracy: 0.709, Std Dev: 0.027
Transformation: fourier+{'new_dimension': 40}, Classifier: DecisionTree, Mean Accuracy: 0.625, Std Dev: 0.032
Transformation: fourier+{'new_dimension': 

  return np.power(array, exponent)
  return array ** exponent
  log_T_X_j = np.log(T_X_j)
  R_j = temp / Z
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),


Transformation: wavelet_leaders, Shape: (1164, 2)
Transformation: wavelet_leaders, Classifier: SVM, Mean Accuracy: 0.731, Std Dev: 0.090
Transformation: wavelet_leaders, Classifier: DecisionTree, Mean Accuracy: 0.686, Std Dev: 0.065
Transformation: wavelet_leaders, Classifier: RandomForest, Mean Accuracy: 0.709, Std Dev: 0.103

Computating  multifracs{} ...

  return np.power(array, exponent)
  return array ** exponent
  log_T_X_j = np.log(T_X_j)
  R_j = temp / Z
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),


Transformation: multifracs, Shape: (1164, 3)
Transformation: multifracs, Classifier: SVM, Mean Accuracy: 0.780, Std Dev: 0.067
Transformation: multifracs, Classifier: DecisionTree, Mean Accuracy: 0.728, Std Dev: 0.071
Transformation: multifracs, Classifier: RandomForest, Mean Accuracy: 0.762, Std Dev: 0.069

Computating  multifracs{'j1': 1, 'j2': 12} ...

  return np.power(array, exponent)
  return array ** exponent
  log_T_X_j = np.log(T_X_j)
  R_j = temp / Z
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),


Transformation: multifracs+{'j1': 1, 'j2': 12}, Shape: (1164, 3)
Transformation: multifracs+{'j1': 1, 'j2': 12}, Classifier: SVM, Mean Accuracy: 0.868, Std Dev: 0.067
Transformation: multifracs+{'j1': 1, 'j2': 12}, Classifier: DecisionTree, Mean Accuracy: 0.816, Std Dev: 0.071
Transformation: multifracs+{'j1': 1, 'j2': 12}, Classifier: RandomForest, Mean Accuracy: 0.852, Std Dev: 0.073

Transformation: ['wavelet_leaders', 'shannon_encoding'], Shape: (1164, 10)
Transformation: ['wavelet_leaders', 'shannon_encoding'], Classifier: SVM, Mean Accuracy: 0.973, Std Dev: 0.051
Transformation: ['wavelet_leaders', 'shannon_encoding'], Classifier: DecisionTree, Mean Accuracy: 0.995, Std Dev: 0.010
Transformation: ['wavelet_leaders', 'shannon_encoding'], Classifier: RandomForest, Mean Accuracy: 0.974, Std Dev: 0.052

Transformation: ['wavelet_leaders', 'multifracs'], Shape: (1164, 5)
Transformation: ['wavelet_leaders', 'multifracs'], Classifier: SVM, Mean Accuracy: 0.826, Std Dev: 0.096
Transforma

  return np.power(array, exponent)
  return array ** exponent
  log_T_X_j = np.log(T_X_j)
  R_j = temp / Z
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),


Transformation: ['fourier', 'multifracs']+{'new_dimension': 40}, Shape: (1164, 43)
Transformation: ['fourier', 'multifracs']+{'new_dimension': 40}, Classifier: SVM, Mean Accuracy: 0.802, Std Dev: 0.060
Transformation: ['fourier', 'multifracs']+{'new_dimension': 40}, Classifier: DecisionTree, Mean Accuracy: 0.720, Std Dev: 0.061
Transformation: ['fourier', 'multifracs']+{'new_dimension': 40}, Classifier: RandomForest, Mean Accuracy: 0.820, Std Dev: 0.056

Transformation: ['fourier', 'multifracs']+{'new_dimension': 40}, Shape: (1164, 43)
Transformation: ['fourier', 'multifracs']+{'new_dimension': 40}, Classifier: SVM, Mean Accuracy: 0.802, Std Dev: 0.060
Transformation: ['fourier', 'multifracs']+{'new_dimension': 40}, Classifier: DecisionTree, Mean Accuracy: 0.717, Std Dev: 0.059
Transformation: ['fourier', 'multifracs']+{'new_dimension': 40}, Classifier: RandomForest, Mean Accuracy: 0.820, Std Dev: 0.056

Transformation: ['fourier', 'multifracs', 'shannon_encoding']+{'new_dimension': 40

  return np.power(array, exponent)
  return array ** exponent
  log_T_X_j = np.log(T_X_j)
  R_j = temp / Z
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),


Transformation: ['low_fourier', 'multifracs', 'autoreg']+{'k': 3}, Shape: (1164, 1098)
Transformation: ['low_fourier', 'multifracs', 'autoreg']+{'k': 3}, Classifier: SVM, Mean Accuracy: 0.925, Std Dev: 0.066
Transformation: ['low_fourier', 'multifracs', 'autoreg']+{'k': 3}, Classifier: DecisionTree, Mean Accuracy: 0.896, Std Dev: 0.057
Transformation: ['low_fourier', 'multifracs', 'autoreg']+{'k': 3}, Classifier: RandomForest, Mean Accuracy: 0.939, Std Dev: 0.047


Transformation: crosscor, Classifier: SVM, Mean Accuracy: 0.733, Std Dev: 0.058
Transformation: crosscor, Classifier: DecisionTree, Mean Accuracy: 0.656, Std Dev: 0.037
Transformation: crosscor, Classifier: RandomForest, Mean Accuracy: 0.747, Std Dev: 0.041

Transformation: autocor+{'m': 5000, 'k': 4}, Classifier: SVM, Mean Accuracy: 0.472, Std Dev: 0.009
Transformation: autocor+{'m': 5000, 'k': 4}, Classifier: DecisionTree, Mean Accuracy: 0.339, Std Dev: 0.035
Transformation: autocor+{'m': 5000, 'k': 4}, Classifier: RandomF