# Benchmark for signal representation 

- Multifractal analysis (1ere approche)

- Discrete Fourier Transform (DFT) $\checkmark$
- Spectrogram
- Autoregression $\checkmark$
- Shannon encoding $\checkmark$
- Wavelets (en cours)

- Local symbolic features
- SAX representation
- Approximate entropy

ML

- Autoencoder

- RNN
- LSTM



## Imports

In [1]:
import numpy as np 
import matplotlib.pyplot as plt

# import pywt

# import pymultifracs.mfa as mfa
# from pymultifracs.utils import build_q_log
# from statsmodels.tsa.ar_model import AutoReg, ar_select_order


In [2]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler


In [3]:
# %pip install import_ipynb
# %pip install  --user git+https://github.com/neurospin/pymultifracs

In [4]:
import import_ipynb
from transformations import DataTransform,TransformationRegistry, IdentityTransform, FourierTransform, LowFourierTransform, LowPsdTransform, WaveDecTransform, DwtTransform, CwtTransform, AutoRegTransform, ShannonEncodingTransform, WaveletLeadersTransform, CrossCorTransform, AutoCorTransform, MultiFracsTransform, AutoEncoderTransform  

importing Jupyter notebook from transformations.ipynb


In [5]:
# Initialize the registry
registry = TransformationRegistry()

# Register transformations
registry.register('identity', IdentityTransform)
registry.register('fourier', FourierTransform)
registry.register('low_fourier', LowFourierTransform)
registry.register('low_psd', LowPsdTransform)
registry.register('wavedec', WaveDecTransform)
registry.register('dwt', DwtTransform)
registry.register('cwt', CwtTransform)
registry.register('autoreg', AutoRegTransform)
registry.register('shannon_encoding', ShannonEncodingTransform)
registry.register('wavelet_leaders', WaveletLeadersTransform)
registry.register('multifracs', MultiFracsTransform)
registry.register('crosscor', CrossCorTransform)
registry.register('autocor', AutoCorTransform)
registry.register('autoencoder', AutoEncoderTransform)

## Data Load

Link here [ecgs_labels.npy](https://drive.google.com/file/d/1cbUKH9qGOeIZD6Mf73plMkyXpq56mwIu/view?usp=sharing)

In [6]:
ecgs_labels = np.load('ecgs_labels.npy')

X, y = ecgs_labels[1:,:-1], ecgs_labels[1:,-1]

In [7]:
# X, y = np.load('hrv_signals.npy'), np.load('hrv_labels.npy')

In [8]:
hrvs_labels = np.load('hrv_data.npy')

X, y = hrvs_labels[:,:-1], hrvs_labels[:,-1]

In [9]:
np.isnan(X).sum()

0

In [10]:
n,p = X.shape

print(f'X.shape : {n,p}')

X.shape : (68, 71933)


In [11]:
j2max = min(12,int(np.log2(p) - 3))
j2max

12

In [12]:
p_ = 65000

In [13]:
data_transformer = DataTransform(registry,save_data=False)
for trans_names in registry.transformations.keys():
        trans_names_str = [str(name) for name in trans_names]
        trans_name_str = '+'.join(trans_names_str) if isinstance(trans_names, list) else trans_names
        kwargs = trans_names[1] if isinstance(trans_names, list) and len(trans_names) > 1 else {}
        trans_names = trans_names[0] if isinstance(trans_names, list) else trans_names
        # Apply transformation
        transformed_X = data_transformer.apply_transformation(np.random.randn((10*p_)).reshape((10,p_)), trans_names, **kwargs)
        
        print(f"Transformation: {trans_name_str}, Shape: {transformed_X.shape}")

Transformation: identity, Shape: (10, 65000)
Computing  fourier ...

Transformation: fourier, Shape: (10, 65000)
Transformation: low_fourier, Shape: (10, 1092)
Transformation: low_psd, Shape: (10, 1092)
Transformation: wavedec, Shape: (10, 4063)
Transformation: dwt, Shape: (10, 32500)
Transformation: cwt, Shape: (10, 10)
Transformation: autoreg, Shape: (10, 3)
Transformation: shannon_encoding, Shape: (10, 8)
Transformation: wavelet_leaders, Shape: (10, 2)
Transformation: multifracs, Shape: (10, 3)
Transformation: crosscor, Shape: (10, 10)
Transformation: autocor, Shape: (10, 26000)
Transformation: autoencoder, Shape: (10, 16)


In [14]:
# Initialize the data transformer
data_transformer = DataTransform(registry)

In [15]:
%%script false --no-raise-error

# Define the classifiers to be tested
classifiers = {
    'SVM': SVC(),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}

# Define the transformations to be tested
transformations = [
    # ['identity'],
    ['crosscor'],
    ['autocor', {'m':5000,'k':4}],
    ['fourier', {'new_dimension':40}],
    ['low_fourier'],
    ['low_psd'],
    ['cwt',{'pca_components' : 10}],
    ['wavedec'],
    ['autoreg', {'k': 3}],
    ['shannon_encoding'],
    ['wavelet_leaders'],
    ['multifracs'],
    ['multifracs', {'j1':1,'j2':12}],
    [['wavelet_leaders','shannon_encoding']],
    [['wavelet_leaders','multifracs']],
    [['fourier','multifracs',], {'new_dimension':40}],
    [['fourier','multifracs',], {'new_dimension':40}],
    [['fourier','multifracs','shannon_encoding'], {'new_dimension':40}],
    [['low_fourier','multifracs','autoreg'], {'k':3}],
    
]




# Function to evaluate a classifier using cross-validation
def evaluate_classifier_cv(classifier, X, y):
    scores = cross_val_score(classifier, X, y, cv=5)  # 5-fold cross-validation
    return np.mean(scores), np.std(scores)

# Loop over each transformation and each classifier
results = {}

for trans_names in transformations:
    # print()
    trans_names_str = [str(name) for name in trans_names]
    trans_name_str = '+'.join(trans_names_str) if isinstance(trans_names, list) else trans_names
    kwargs = trans_names[1] if isinstance(trans_names, list) and len(trans_names) > 1 else {}
    trans_names = trans_names[0] if isinstance(trans_names, list) else trans_names
    
    # Apply transformation
    transformed_X = data_transformer.apply_transformation(X, trans_names, **kwargs)
    print(f"Transformation: {trans_name_str}, Shape: {transformed_X.shape}" )
    # Standardize the data (important for some classifiers like SVM)
    scaler = StandardScaler()
    transformed_X = scaler.fit_transform(transformed_X)
    
    results[trans_name_str] = {}
    for clf_name, clf in classifiers.items():
        # Evaluate the classifier with cross-validation
        mean_accuracy, std_accuracy = evaluate_classifier_cv(clf, transformed_X, y)
        results[trans_name_str][clf_name] = (mean_accuracy, std_accuracy)
        print(f"Transformation: {trans_name_str}, Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")

    print()
# Print the results
for trans_name, clf_results in results.items():
    print()
    for clf_name, (mean_accuracy, std_accuracy) in clf_results.items():
        print(f"Transformation: {trans_name}, Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")
        

Couldn't find program: 'false'


In [16]:
import random
import itertools
import json

# Define the classifiers to be tested
classifiers = {
    'SVM': SVC(),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}

# Initialize the data transformer
data_transformer = DataTransform(registry)

# Define the transformations to be tested
transformation_names = ['crosscor','low_psd','low_fourier',['autoreg',{'k':3}],['autoreg',{'k':5}], #'multifracs',
                        ['multifracs', {'j1':1,'j2':j2max}],
                        'shannon_encoding',['autoencoder',{'fourier_transform':True}]] #list(registry.transformations.keys())




# Function to evaluate a classifier using cross-validation
def evaluate_classifier_cv(classifier, X, y):
    scores = cross_val_score(classifier, X, y, cv=5)  # 5-fold cross-validation
    return np.mean(scores), np.std(scores)

# Function to randomly combine transformations
def random_combination_transformations(transformation_list, n_combinations=10):
    # all_combinations = []
    # for r in range(1, len(transformation_names) + 1):
    #     combinations = list(itertools.combinations(transformation_names, r))
    #     all_combinations.extend(combinations)
    
    # return random.sample(all_combinations, min(n_combinations, len(all_combinations)))
    nb_transformation = len(transformation_list)
    combined_transformations = []
    for _ in range(n_combinations):
        nb_trans = np.random.randint(1,5)

        already_drawn = []
        transs = []
        for _ in range(nb_trans):
            while True:
                random_ind = np.random.randint(nb_transformation)
                if random_ind not in already_drawn:
                    already_drawn.append(random_ind)
                    break
            trans = transformation_list[random_ind]
            transs.append(trans)
            
        combined_transformations.append(transs)
    return combined_transformations
# Generate random combinations of transformations
random_transformations = random_combination_transformations(transformation_names, n_combinations=20)
print(random_transformations)
# # Example input data
# X = np.random.randn(100, 10)  # Example input data
# y = np.random.randint(0, 2, 100)  # Example labels

# Dictionary to store results
results = {}

i = 0
# Loop over each random combination of transformations and each classifier
for trans in random_transformations:
    print()
    trans_name_str = DataTransform.get_full_trans_kwargs_str(trans)
    
        
    transformed_X = data_transformer.apply_transformation(X, trans)

    # trans_name_str = '+'.join(trans_comb)
    # transformed_X = data_transformer.apply_transformation(X, trans_comb)
    
    # Standardize the data (important for some classifiers like SVM)
    scaler = StandardScaler()
    transformed_X = scaler.fit_transform(transformed_X)
    
    results[trans_name_str] = {}
    i += 1
    print(f"Transformations n°{i}: {trans_name_str} {transformed_X.shape}")
    for clf_name, clf in classifiers.items():
        # Evaluate the classifier with cross-validation
        mean_accuracy, std_accuracy = evaluate_classifier_cv(clf, transformed_X, y)
        results[trans_name_str][clf_name] = {'mean_accuracy': mean_accuracy, 'std_accuracy': std_accuracy}
        print(f"Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")

# Save results to a JSON file
with open('transformation_results.json', 'w') as f:
    json.dump(results, f, indent=4)


[[['autoreg', {'k': 5}]], [['autoreg', {'k': 3}], ['multifracs', {'j1': 1, 'j2': 12}]], ['low_fourier'], ['crosscor', ['autoreg', {'k': 5}], ['autoreg', {'k': 3}]], [['multifracs', {'j1': 1, 'j2': 12}]], ['low_fourier'], [['multifracs', {'j1': 1, 'j2': 12}], ['autoencoder', {'fourier_transform': True}]], ['shannon_encoding'], ['shannon_encoding', 'low_psd', ['autoreg', {'k': 5}], ['multifracs', {'j1': 1, 'j2': 12}]], ['low_fourier', 'low_psd', 'crosscor'], [['autoreg', {'k': 5}], ['autoreg', {'k': 3}], 'low_psd', 'crosscor'], [['autoreg', {'k': 3}], 'crosscor', ['autoreg', {'k': 5}], ['autoencoder', {'fourier_transform': True}]], [['multifracs', {'j1': 1, 'j2': 12}], ['autoreg', {'k': 3}], 'shannon_encoding', ['autoencoder', {'fourier_transform': True}]], [['multifracs', {'j1': 1, 'j2': 12}], ['autoreg', {'k': 3}], 'crosscor'], [['multifracs', {'j1': 1, 'j2': 12}], ['autoreg', {'k': 5}], ['autoencoder', {'fourier_transform': True}], 'crosscor'], [['autoencoder', {'fourier_transform': T

  return np.power(array, exponent)
  return array ** exponent
  log_T_X_j = np.log(T_X_j)
  R_j = temp / Z
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  V[:, ind_j, :] = fixednansum(R_j * np.log2(mrq_values_j), axis=1)
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),
  U[:, ind_j, :] = np.log2(nj) + fixednansum((R_j * np.log2(R_j)),


Transformations n°2: autoreg_k=3_multifracs_j1=1_j2=12 (68, 6)
Classifier: SVM, Mean Accuracy: 0.843, Std Dev: 0.131
Classifier: DecisionTree, Mean Accuracy: 0.813, Std Dev: 0.132
Classifier: RandomForest, Mean Accuracy: 0.812, Std Dev: 0.115

Transformations n°3: low_fourier (68, 1208)
Classifier: SVM, Mean Accuracy: 0.736, Std Dev: 0.027
Classifier: DecisionTree, Mean Accuracy: 0.503, Std Dev: 0.113
Classifier: RandomForest, Mean Accuracy: 0.707, Std Dev: 0.080

Transformations n°4: crosscor_autoreg_k=5_autoreg_k=3 (68, 18)
Classifier: SVM, Mean Accuracy: 0.841, Std Dev: 0.081
Classifier: DecisionTree, Mean Accuracy: 0.692, Std Dev: 0.136
Classifier: RandomForest, Mean Accuracy: 0.785, Std Dev: 0.162

Transformations n°5: multifracs_j1=1_j2=12 (68, 3)
Classifier: SVM, Mean Accuracy: 0.736, Std Dev: 0.027
Classifier: DecisionTree, Mean Accuracy: 0.634, Std Dev: 0.073
Classifier: RandomForest, Mean Accuracy: 0.707, Std Dev: 0.041

Transformations n°6: low_fourier (68, 1208)
Classifier:

In [17]:
# Save results to a JSON file
with open('transformation_results_hrv_chall2002.json', 'w') as f:
    json.dump(results, f, indent=4)

In [18]:
import json
##
# Load the results from the JSON file
with open('transformation_results_hrv_chall2002.json', 'r') as f:
    results = json.load(f)

# Find the best transformation and classifier
best_score = 0
best_transformation = None
best_classifier = None

for trans_comb, clf_results in results.items():
    for clf_name, scores in clf_results.items():
        if scores['mean_accuracy'] > best_score:
            best_score = scores['mean_accuracy']
            best_transformation = trans_comb
            best_classifier = clf_name

print(f"Best Transformation: {best_transformation}")
print(f"Best Classifier: {best_classifier}")
print(f"Best Mean Accuracy: {best_score}")


Best Transformation: autoreg_k=3_multifracs_j1=1_j2=12
Best Classifier: SVM
Best Mean Accuracy: 0.8428571428571429
