# Benchmark for signal representation 

- Multifractal analysis (1ere approche)

- Discrete Fourier Transform (DFT) $\checkmark$
- Spectrogram
- Autoregression $\checkmark$
- Shannon encoding $\checkmark$
- Wavelets (en cours)

- Local symbolic features
- SAX representation
- Approximate entropy

ML

- Autoencoder

- RNN
- LSTM



## Imports

In [1]:
import numpy as np 
import matplotlib.pyplot as plt

# import pywt

# import pymultifracs.mfa as mfa
# from pymultifracs.utils import build_q_log
# from statsmodels.tsa.ar_model import AutoReg, ar_select_order


In [2]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler


In [3]:
import import_ipynb
from transformations import DataTransform,TransformationRegistry, IdentityTransform, FourierTransform, LowFourierTransform, LowPsdTransform, WaveDecTransform, DwtTransform, AutoRegTransform, ShannonEncodingTransform, WaveletLeadersTransform, CrossCorTransform, AutoCorTransform, MultiFracsTransform

importing Jupyter notebook from transformations.ipynb


In [4]:
# Initialize the registry
registry = TransformationRegistry()

# Register transformations
registry.register('identity', IdentityTransform)
registry.register('fourier', FourierTransform)
registry.register('low_fourier', LowFourierTransform)
registry.register('low_psd', LowPsdTransform)
registry.register('wavedec', WaveDecTransform)
registry.register('dwt', DwtTransform)
registry.register('autoreg', AutoRegTransform)
registry.register('shannon_encoding', ShannonEncodingTransform)
registry.register('wavelet_leaders', WaveletLeadersTransform)
registry.register('multifracs', MultiFracsTransform)
registry.register('crosscor', CrossCorTransform)
registry.register('autocor', AutoCorTransform)


## Data Load

In [5]:
ecgs_labels = np.load('ecgs_labels.npy')

X, y = ecgs_labels[:,:-1], ecgs_labels[:,-1]

In [6]:
np.isnan(X).sum()

0

In [8]:
# Define the classifiers to be tested
classifiers = {
    'SVM': SVC(),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}

# Define the transformations to be tested
transformations = [
    # # ['identity'],
    ['crosscor'],
    ['autocor', {'m':5000,'k':4}],
    ['fourier', {'new_dimension':40}],
    ['low_fourier',{'n':100}],
    ['low_psd',{'n':100}],
    ['wavedec'],
    ['autoreg', {'k': 3}],
    ['shannon_encoding'],
    ['wavelet_leaders'],
    ['multifracs'],
    [['wavelet_leaders','shannon_encoding']]
]

# Initialize the data transformer
data_transformer = DataTransform(registry)

# Function to evaluate a classifier using cross-validation
def evaluate_classifier_cv(classifier, X, y):
    scores = cross_val_score(classifier, X, y, cv=5)  # 5-fold cross-validation
    return np.mean(scores), np.std(scores)

# Loop over each transformation and each classifier
results = {}

for trans_names in transformations:
    # print()
    trans_names_str = [str(name) for name in trans_names]
    trans_name_str = '+'.join(trans_names_str) if isinstance(trans_names, list) else trans_names
    kwargs = trans_names[1] if isinstance(trans_names, list) and len(trans_names) > 1 else {}
    trans_names = trans_names[0] if isinstance(trans_names, list) else trans_names
    
    # Apply transformation
    transformed_X = data_transformer.apply_transformation(X, trans_names, **kwargs)
    print(f"Transformation: {trans_name_str}, Shape: {transformed_X.shape}" )
    # Standardize the data (important for some classifiers like SVM)
    scaler = StandardScaler()
    transformed_X = scaler.fit_transform(transformed_X)
    
    results[trans_name_str] = {}
    for clf_name, clf in classifiers.items():
        # Evaluate the classifier with cross-validation
        mean_accuracy, std_accuracy = evaluate_classifier_cv(clf, transformed_X, y)
        results[trans_name_str][clf_name] = (mean_accuracy, std_accuracy)
        print(f"Transformation: {trans_name_str}, Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")

    print()
# Print the results
for trans_name, clf_results in results.items():
    for clf_name, (mean_accuracy, std_accuracy) in clf_results.items():
        print(f"Transformation: {trans_name}, Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")
        print()

Transformation: crosscor, Shape: (1165, 10)
Transformation: crosscor, Classifier: SVM, Mean Accuracy: 0.735, Std Dev: 0.057
Transformation: crosscor, Classifier: DecisionTree, Mean Accuracy: 0.643, Std Dev: 0.056
Transformation: crosscor, Classifier: RandomForest, Mean Accuracy: 0.758, Std Dev: 0.044

Transformation: autocor+{'m': 5000, 'k': 4}, Shape: (1165, 52)
Transformation: autocor+{'m': 5000, 'k': 4}, Classifier: SVM, Mean Accuracy: 0.474, Std Dev: 0.013
Transformation: autocor+{'m': 5000, 'k': 4}, Classifier: DecisionTree, Mean Accuracy: 0.355, Std Dev: 0.018
Transformation: autocor+{'m': 5000, 'k': 4}, Classifier: RandomForest, Mean Accuracy: 0.467, Std Dev: 0.019

Transformation: fourier+{'new_dimension': 40}, Shape: (1165, 40)
Transformation: fourier+{'new_dimension': 40}, Classifier: SVM, Mean Accuracy: 0.708, Std Dev: 0.026
Transformation: fourier+{'new_dimension': 40}, Classifier: DecisionTree, Mean Accuracy: 0.633, Std Dev: 0.030
Transformation: fourier+{'new_dimension': 

  return np.power(array, exponent)
  return array ** exponent
  log_T_X_j = np.log(T_X_j)
  R_j = temp / Z
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),


Transformation: wavelet_leaders, Shape: (1165, 2)
Transformation: wavelet_leaders, Classifier: SVM, Mean Accuracy: 0.730, Std Dev: 0.090
Transformation: wavelet_leaders, Classifier: DecisionTree, Mean Accuracy: 0.694, Std Dev: 0.052
Transformation: wavelet_leaders, Classifier: RandomForest, Mean Accuracy: 0.713, Std Dev: 0.089



  return np.power(array, exponent)
  return array ** exponent
  log_T_X_j = np.log(T_X_j)
  R_j = temp / Z
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),


Transformation: multifracs, Shape: (1165, 3)
Transformation: multifracs, Classifier: SVM, Mean Accuracy: 0.780, Std Dev: 0.068
Transformation: multifracs, Classifier: DecisionTree, Mean Accuracy: 0.710, Std Dev: 0.064
Transformation: multifracs, Classifier: RandomForest, Mean Accuracy: 0.755, Std Dev: 0.074



  return np.power(array, exponent)
  return array ** exponent
  log_T_X_j = np.log(T_X_j)
  R_j = temp / Z
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),


Transformation: ['wavelet_leaders', 'shannon_encoding'], Shape: (1165, 10)
Transformation: ['wavelet_leaders', 'shannon_encoding'], Classifier: SVM, Mean Accuracy: 0.973, Std Dev: 0.051
Transformation: ['wavelet_leaders', 'shannon_encoding'], Classifier: DecisionTree, Mean Accuracy: 0.996, Std Dev: 0.009
Transformation: ['wavelet_leaders', 'shannon_encoding'], Classifier: RandomForest, Mean Accuracy: 0.974, Std Dev: 0.052

Transformation: crosscor, Classifier: SVM, Mean Accuracy: 0.735, Std Dev: 0.057

Transformation: crosscor, Classifier: DecisionTree, Mean Accuracy: 0.643, Std Dev: 0.056

Transformation: crosscor, Classifier: RandomForest, Mean Accuracy: 0.758, Std Dev: 0.044

Transformation: autocor+{'m': 5000, 'k': 4}, Classifier: SVM, Mean Accuracy: 0.474, Std Dev: 0.013

Transformation: autocor+{'m': 5000, 'k': 4}, Classifier: DecisionTree, Mean Accuracy: 0.355, Std Dev: 0.018

Transformation: autocor+{'m': 5000, 'k': 4}, Classifier: RandomForest, Mean Accuracy: 0.467, Std Dev: 0