# Benchmark for signal representation 

- Multifractal analysis (1ere approche)

- Discrete Fourier Transform (DFT) $\checkmark$
- Spectrogram
- Autoregression $\checkmark$
- Shannon encoding $\checkmark$
- Wavelets (en cours)

- Local symbolic features
- SAX representation
- Approximate entropy

ML

- Autoencoder

- RNN
- LSTM



## Imports

In [1]:
import numpy as np 
import matplotlib.pyplot as plt

# import pywt

# import pymultifracs.mfa as mfa
# from pymultifracs.utils import build_q_log
# from statsmodels.tsa.ar_model import AutoReg, ar_select_order


In [2]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler


In [3]:
# %pip install import_ipynb
# %pip install  --user git+https://github.com/neurospin/pymultifracs

In [4]:
import import_ipynb
from transformations import DataTransform,TransformationRegistry, IdentityTransform, FourierTransform, LowFourierTransform, LowPsdTransform, WaveDecTransform, DwtTransform, CwtTransform, AutoRegTransform, ShannonEncodingTransform, WaveletLeadersTransform, CrossCorTransform, AutoCorTransform, MultiFracsTransform

importing Jupyter notebook from transformations.ipynb


In [5]:
# Initialize the registry
registry = TransformationRegistry()

# Register transformations
registry.register('identity', IdentityTransform)
registry.register('fourier', FourierTransform)
registry.register('low_fourier', LowFourierTransform)
registry.register('low_psd', LowPsdTransform)
registry.register('wavedec', WaveDecTransform)
registry.register('dwt', DwtTransform)
registry.register('cwt', CwtTransform)
registry.register('autoreg', AutoRegTransform)
registry.register('shannon_encoding', ShannonEncodingTransform)
registry.register('wavelet_leaders', WaveletLeadersTransform)
registry.register('multifracs', MultiFracsTransform)
registry.register('crosscor', CrossCorTransform)
registry.register('autocor', AutoCorTransform)


## Data Load

Link here [ecgs_labels.npy](https://drive.google.com/file/d/1cbUKH9qGOeIZD6Mf73plMkyXpq56mwIu/view?usp=sharing)

In [6]:
ecgs_labels = np.load('ecgs_labels.npy')

X, y = ecgs_labels[1:,:-1], ecgs_labels[1:,-1]

In [7]:
np.isnan(X).sum()

0

In [8]:
n,p = X.shape

print(f'X.shape : {n,p}')

X.shape : (1164, 65000)


In [9]:
data_transformer = DataTransform(registry,save_data=False)
for trans_names in registry.transformations.keys():
        trans_names_str = [str(name) for name in trans_names]
        trans_name_str = '+'.join(trans_names_str) if isinstance(trans_names, list) else trans_names
        kwargs = trans_names[1] if isinstance(trans_names, list) and len(trans_names) > 1 else {}
        trans_names = trans_names[0] if isinstance(trans_names, list) else trans_names
        
        # Apply transformation
        transformed_X = data_transformer.apply_transformation(np.random.randn((10*p)).reshape((10,p)), trans_names, **kwargs)
        
        print(f"Transformation: {trans_name_str}, Shape: {transformed_X.shape}")

Transformation: identity, Shape: (10, 65000)
Transformation: fourier, Shape: (10, 65000)
Transformation: low_fourier, Shape: (10, 1092)
Transformation: low_psd, Shape: (10, 1092)
Transformation: wavedec, Shape: (10, 4063)
Transformation: dwt, Shape: (10, 32500)
Transformation: cwt, Shape: (10, 10)
Transformation: autoreg, Shape: (10, 3)
Transformation: shannon_encoding, Shape: (10, 8)
Transformation: wavelet_leaders, Shape: (10, 2)
Computating  multifracs{} ...

KeyboardInterrupt: 

In [None]:
# Initialize the data transformer
data_transformer = DataTransform(registry)

In [11]:
%%script false --no-raise-error

# Define the classifiers to be tested
classifiers = {
    'SVM': SVC(),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}

# Define the transformations to be tested
transformations = [
    # ['identity'],
    ['crosscor'],
    ['autocor', {'m':5000,'k':4}],
    ['fourier', {'new_dimension':40}],
    ['low_fourier'],
    ['low_psd'],
    ['cwt',{'pca_components' : 10}],
    ['wavedec'],
    ['autoreg', {'k': 3}],
    ['shannon_encoding'],
    ['wavelet_leaders'],
    ['multifracs'],
    ['multifracs', {'j1':1,'j2':12}],
    [['wavelet_leaders','shannon_encoding']],
    [['wavelet_leaders','multifracs']],
    [['fourier','multifracs',], {'new_dimension':40}],
    [['fourier','multifracs',], {'new_dimension':40}],
    [['fourier','multifracs','shannon_encoding'], {'new_dimension':40}],
    [['low_fourier','multifracs','autoreg'], {'k':3}],
    
]




# Function to evaluate a classifier using cross-validation
def evaluate_classifier_cv(classifier, X, y):
    scores = cross_val_score(classifier, X, y, cv=5)  # 5-fold cross-validation
    return np.mean(scores), np.std(scores)

# Loop over each transformation and each classifier
results = {}

for trans_names in transformations:
    # print()
    trans_names_str = [str(name) for name in trans_names]
    trans_name_str = '+'.join(trans_names_str) if isinstance(trans_names, list) else trans_names
    kwargs = trans_names[1] if isinstance(trans_names, list) and len(trans_names) > 1 else {}
    trans_names = trans_names[0] if isinstance(trans_names, list) else trans_names
    
    # Apply transformation
    transformed_X = data_transformer.apply_transformation(X, trans_names, **kwargs)
    print(f"Transformation: {trans_name_str}, Shape: {transformed_X.shape}" )
    # Standardize the data (important for some classifiers like SVM)
    scaler = StandardScaler()
    transformed_X = scaler.fit_transform(transformed_X)
    
    results[trans_name_str] = {}
    for clf_name, clf in classifiers.items():
        # Evaluate the classifier with cross-validation
        mean_accuracy, std_accuracy = evaluate_classifier_cv(clf, transformed_X, y)
        results[trans_name_str][clf_name] = (mean_accuracy, std_accuracy)
        print(f"Transformation: {trans_name_str}, Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")

    print()
# Print the results
for trans_name, clf_results in results.items():
    print()
    for clf_name, (mean_accuracy, std_accuracy) in clf_results.items():
        print(f"Transformation: {trans_name}, Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")
        

Couldn't find program: 'false'


In [18]:
import random
import itertools
import json

# Define the classifiers to be tested
classifiers = {
    'SVM': SVC(),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}

# Initialize the data transformer
data_transformer = DataTransform(registry)

# Define the transformations to be tested
transformation_names = ['crosscor','low_psd','low_fourier',['autoreg',{'k':3}],['autoreg',{'k':5}],'multifracs',
    ['multifracs', {'j1':1,'j2':12}],'shannon_encoding'] #list(registry.transformations.keys())




# Function to evaluate a classifier using cross-validation
def evaluate_classifier_cv(classifier, X, y):
    scores = cross_val_score(classifier, X, y, cv=5)  # 5-fold cross-validation
    return np.mean(scores), np.std(scores)

# Function to randomly combine transformations
def random_combination_transformations(transformation_list, n_combinations=10):
    # all_combinations = []
    # for r in range(1, len(transformation_names) + 1):
    #     combinations = list(itertools.combinations(transformation_names, r))
    #     all_combinations.extend(combinations)
    
    # return random.sample(all_combinations, min(n_combinations, len(all_combinations)))
    nb_transformation = len(transformation_list)
    combined_transformations = []
    for _ in range(n_combinations):
        nb_trans = np.random.randint(1,5)

        already_drawn = []
        trans_names = []
        kwargs = {}
        
        for _ in range(nb_trans):
            while True:
                random_ind = np.random.randint(nb_transformation)
                if random_ind not in already_drawn:
                    already_drawn.append(random_ind)
                    break
            trans = transformation_list[random_ind]
            # print(trans)
            # print(type(trans))
            temp_kwargs = trans[1] if isinstance(trans, list) and len(trans) > 1 else {}
            temp_trans_name = trans[0] if isinstance(trans, list) else trans
            # print(temp_trans_name)
            trans_names.append(temp_trans_name)
            kwargs = kwargs | temp_kwargs
        combined_transformations.append([trans_names,kwargs])
    return combined_transformations
# Generate random combinations of transformations
random_transformations = random_combination_transformations(transformation_names, n_combinations=10)
print(random_transformations)
# # Example input data
# X = np.random.randn(100, 10)  # Example input data
# y = np.random.randint(0, 2, 100)  # Example labels

# Dictionary to store results
results = {}

# Loop over each random combination of transformations and each classifier
for trans_names in random_transformations:
    # print()
    trans_names_str = [str(name) for name in trans_names]
    trans_name_str = '+'.join(trans_names_str) if isinstance(trans_names, list) else trans_names
    kwargs = trans_names[1] if isinstance(trans_names, list) and len(trans_names) > 1 else {}
    trans_names = trans_names[0] if isinstance(trans_names, list) else trans_names
    
    # Apply transformation
    transformed_X = data_transformer.apply_transformation(X, trans_names, **kwargs)

    # trans_name_str = '+'.join(trans_comb)
    # transformed_X = data_transformer.apply_transformation(X, trans_comb)
    
    # Standardize the data (important for some classifiers like SVM)
    scaler = StandardScaler()
    transformed_X = scaler.fit_transform(transformed_X)
    
    results[trans_name_str] = {}
    for clf_name, clf in classifiers.items():
        # Evaluate the classifier with cross-validation
        mean_accuracy, std_accuracy = evaluate_classifier_cv(clf, transformed_X, y)
        results[trans_name_str][clf_name] = {'mean_accuracy': mean_accuracy, 'std_accuracy': std_accuracy}
        print(f"Transformations: {trans_name_str}, Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")

# Save results to a JSON file
with open('transformation_results.json', 'w') as f:
    json.dump(results, f, indent=4)


[[['shannon_encoding', 'crosscor'], {}], [['autoreg', 'autoreg'], {'k': 5}], [['multifracs', 'autoreg', 'shannon_encoding'], {'k': 3}], [['low_psd', 'multifracs'], {}], [['autoreg', 'low_psd', 'autoreg', 'low_fourier'], {'k': 3}], [['crosscor', 'autoreg', 'multifracs', 'multifracs'], {'k': 5, 'j1': 1, 'j2': 12}], [['multifracs', 'autoreg', 'autoreg'], {'k': 5}], [['multifracs', 'low_fourier'], {}], [['multifracs'], {}], [['autoreg', 'multifracs', 'multifracs', 'autoreg'], {'k': 5, 'j1': 1, 'j2': 12}]]
Transformations: ['shannon_encoding', 'crosscor']+{}, Classifier: SVM, Mean Accuracy: 0.9681256474766908, Std Dev: 0.051053885431750476
Transformations: ['shannon_encoding', 'crosscor']+{}, Classifier: DecisionTree, Mean Accuracy: 0.9905357407133344, Std Dev: 0.011674622353346107
Transformations: ['shannon_encoding', 'crosscor']+{}, Classifier: RandomForest, Mean Accuracy: 0.9741379310344828, Std Dev: 0.051724137931034475
Transformations: ['autoreg', 'autoreg']+{'k': 5}, Classifier: SVM, 

  return np.power(array, exponent)
  return array ** exponent
  log_T_X_j = np.log(T_X_j)
  R_j = temp / Z
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),


Transformations: ['multifracs', 'autoreg', 'shannon_encoding']+{'k': 3}, Classifier: SVM, Mean Accuracy: 0.9732795619357704, Std Dev: 0.051321878697974777
Transformations: ['multifracs', 'autoreg', 'shannon_encoding']+{'k': 3}, Classifier: DecisionTree, Mean Accuracy: 0.9836687879236349, Std Dev: 0.013961938480565001
Transformations: ['multifracs', 'autoreg', 'shannon_encoding']+{'k': 3}, Classifier: RandomForest, Mean Accuracy: 0.9689840165754033, Std Dev: 0.05197120550548132
Computating  multifracs{} ...

  return np.power(array, exponent)
  return array ** exponent
  log_T_X_j = np.log(T_X_j)
  R_j = temp / Z
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),


Transformations: ['low_psd', 'multifracs']+{}, Classifier: SVM, Mean Accuracy: 0.8452974692911056, Std Dev: 0.0546872400014875
Transformations: ['low_psd', 'multifracs']+{}, Classifier: DecisionTree, Mean Accuracy: 0.8280893887820039, Std Dev: 0.052889129962685515
Transformations: ['low_psd', 'multifracs']+{}, Classifier: RandomForest, Mean Accuracy: 0.9053981056681959, Std Dev: 0.06200018654901476
Transformations: ['autoreg', 'low_psd', 'autoreg', 'low_fourier']+{'k': 3}, Classifier: SVM, Mean Accuracy: 0.9105705194612994, Std Dev: 0.057870146916511774
Transformations: ['autoreg', 'low_psd', 'autoreg', 'low_fourier']+{'k': 3}, Classifier: DecisionTree, Mean Accuracy: 0.9201420748853042, Std Dev: 0.04310987000406322
Transformations: ['autoreg', 'low_psd', 'autoreg', 'low_fourier']+{'k': 3}, Classifier: RandomForest, Mean Accuracy: 0.9354965221252034, Std Dev: 0.04588808396371548
Computating  multifracs{'k': 5, 'j1': 1, 'j2': 12} ...

  return np.power(array, exponent)
  return array ** exponent
  log_T_X_j = np.log(T_X_j)
  R_j = temp / Z
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),


Transformations: ['crosscor', 'autoreg', 'multifracs', 'multifracs']+{'k': 5, 'j1': 1, 'j2': 12}, Classifier: SVM, Mean Accuracy: 0.9543695426964629, Std Dev: 0.059705757955474184
Transformations: ['crosscor', 'autoreg', 'multifracs', 'multifracs']+{'k': 5, 'j1': 1, 'j2': 12}, Classifier: DecisionTree, Mean Accuracy: 0.9896773716146219, Std Dev: 0.012642634898833972
Transformations: ['crosscor', 'autoreg', 'multifracs', 'multifracs']+{'k': 5, 'j1': 1, 'j2': 12}, Classifier: RandomForest, Mean Accuracy: 0.993991416309013, Std Dev: 0.010010217845227971
Computating  multifracs{'k': 5} ...

  return np.power(array, exponent)
  return array ** exponent
  log_T_X_j = np.log(T_X_j)
  R_j = temp / Z
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),


Transformations: ['multifracs', 'autoreg', 'autoreg']+{'k': 5}, Classifier: SVM, Mean Accuracy: 0.9612290957525529, Std Dev: 0.06543083676688517
Transformations: ['multifracs', 'autoreg', 'autoreg']+{'k': 5}, Classifier: DecisionTree, Mean Accuracy: 0.9586465887228061, Std Dev: 0.06835559304485055
Transformations: ['multifracs', 'autoreg', 'autoreg']+{'k': 5}, Classifier: RandomForest, Mean Accuracy: 0.9948460855409206, Std Dev: 0.008321818849150918
Transformations: ['multifracs', 'low_fourier']+{}, Classifier: SVM, Mean Accuracy: 0.8985089536776677, Std Dev: 0.07104612948526684
Transformations: ['multifracs', 'low_fourier']+{}, Classifier: DecisionTree, Mean Accuracy: 0.8169009915643037, Std Dev: 0.06647583296427664
Transformations: ['multifracs', 'low_fourier']+{}, Classifier: RandomForest, Mean Accuracy: 0.9088204824626314, Std Dev: 0.07017258171116492
Transformations: ['multifracs']+{}, Classifier: SVM, Mean Accuracy: 0.7799652212520349, Std Dev: 0.06670542839059332
Transformations