# Benchmark for signal representation 

- Multifractal analysis (1ere approche)

- Discrete Fourier Transform (DFT) $\checkmark$
- Spectrogram
- Autoregression $\checkmark$
- Shannon encoding $\checkmark$
- Wavelets (en cours)

- Local symbolic features
- SAX representation
- Approximate entropy

ML

- Autoencoder

- RNN
- LSTM



## Imports

In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"


In [2]:
import numpy as np

import torch
import pywt

import inspect
import pymultifracs.mfa as mfa
from pymultifracs.utils import build_q_log

%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
# import pandas_datareader as pdr
# import seaborn as sns
# from statsmodels.tsa.api import acf, graphics, pacf
from statsmodels.tsa.ar_model import AutoReg, ar_select_order


## Data Load

In [3]:
ecgs_labels = np.load('ecgs_labels.npy')

X, y = ecgs_labels[:,:-1], ecgs_labels[:,-1]

## Data Representation

In [4]:
%%script false --no-raise-error
class DataTransform:
    def __init__(self) -> None:
        self.transformed_X = None

    @property
    def X(self):
        return self.transformed_X
    
    @property
    def shape(self):
        if self.transformed_X is not None:
            return self.transformed_X.shape
        else:
            return None

    @staticmethod
    def identity(X, **kwargs):
        return X
    
    @staticmethod
    def fourier(X, new_dimension=None, **kwargs):
        fourier_transform = np.fft.fft(X, n=new_dimension)
        modulus = np.abs(fourier_transform)
        return modulus
    
    @staticmethod
    def wavedec(X, level=4, wavelet='db1', mode='symmetric', **kwargs):
        array = np.array(X)
        coeffs = pywt.wavedec(array, wavelet, mode=mode, level=level)
        coeffs_torch = [torch.tensor(c) for c in coeffs[:1]]
        return torch.cat(coeffs_torch, dim=-1)
    
    @staticmethod
    def dwt(X, wavelet='db1', mode='symmetric', **kwargs):
        array = np.arrat(X)
        coeffs = pywt.dwt(array, wavelet, mode=mode)
        coeffs_torch = [torch.tensor(c) for c in coeffs]
        return torch.cat(coeffs_torch, dim=-1)
    
    @staticmethod
    def get_ar_coefficients(X, k, **kwargs):
        n, p = X.shape
        X = np.array(X)
        ar_coefficients = np.zeros((n, k))
        for i in range(n):
            model = AutoReg(X[i], lags=k).fit()
            ar_coefficients[i] = model.params[1:k+1] 
        return ar_coefficients
    
    @staticmethod
    def autoreg(X, k, **kwargs):
        return DataTransform.get_ar_coefficients(X, k)
    
    @staticmethod
    def shannon_encoding(X, level=4, wavelet='db1', mode='symmetric', **kwargs):
        def compute_shannon_entropy(signal):
            return -np.nansum(signal**2 * np.log(signal**2))
        
        n_examples = X.shape[0]
        wp = pywt.WaveletPacket(X[0, :], wavelet="sym8", maxlevel=3)
        packet_names = [node.path for node in wp.get_level(3, "natural")]
        
        feature_matrix_wav_packet_entropy = np.full((n_examples, 8), np.nan)
        for i in range(len(X)):
            wp = pywt.WaveletPacket(X[i, :], wavelet="sym8", maxlevel=3)
            for j in range(len(packet_names)):
                new_wp = pywt.WaveletPacket(data=None, wavelet="sym8", maxlevel=3)
                new_wp[packet_names[j]] = wp[packet_names[j]].data
                reconstructed_signal = new_wp.reconstruct(update=False)
                feature_matrix_wav_packet_entropy[i, j] = compute_shannon_entropy(reconstructed_signal)
        return feature_matrix_wav_packet_entropy
    
    @staticmethod
    def wavelet_leaders(X, j1=2, j2=6, **kwargs):
        n = X.shape[0] if X.ndim > 1 else 1
        transformed_X = -np.ones((n, 2))
        for i in range(X.shape[0]):
            dwt, lwt = mfa.mf_analysis_full(
                X[i],
                scaling_ranges=[(j1, j2)],
                q=mfa.build_q_log(1, 10, 20),
                n_cumul=2,
                p_exp=np.inf,
                gamint=0.0
            )
            sf, cumul, mfs, hmin = lwt
            transformed_X[i, :] = sf.H.item(), cumul.log_cumulants[1].item()
        return transformed_X

    def apply_transformation(self, X, transformation_name, **kwargs):
        transformation_methods = {
            'identity': self.identity,
            'fourier': self.fourier,
            'wavedec': self.wavedec,
            'dwt': self.dwt,
            'autoreg': self.autoreg,
            'shannon_encoding': self.shannon_encoding,
            'wavelet_leaders': self.wavelet_leaders,
        }
        
        if transformation_name in transformation_methods.keys():
            method = transformation_methods[transformation_name]
            return method(X, **kwargs)
        else:
            raise ValueError(f"Transformation {transformation_name} not recognized.")


Couldn't find program: 'false'


In [5]:
class DataTransform:
    def __init__(self) -> None:
        self.transformed_X = None

    @property
    def X(self):
        return self.transformed_X
    
    @property
    def shape(self):
        if self.transformed_X is not None:
            return self.transformed_X.shape
        else:
            return None

    @staticmethod
    def identity(X):
        return X
    
    @staticmethod
    def fourier(X, new_dimension=None):
        fourier_transform = np.fft.fft(X, n=new_dimension)
        modulus = np.abs(fourier_transform)
        return modulus
    
    @staticmethod
    def wavedec(X, level=4, wavelet='db1', mode='symmetric'):
        array = np.array(X)
        coeffs = pywt.wavedec(array, wavelet, mode=mode, level=level)
        coeffs_torch = [torch.tensor(c) for c in coeffs[:1]]
        return np.array(torch.cat(coeffs_torch, dim=-1))
    
    @staticmethod
    def dwt(X, wavelet='db1', mode='symmetric'):
        array = np.array(X)
        coeffs = pywt.dwt(array, wavelet, mode=mode)
        coeffs_torch = [torch.tensor(c) for c in coeffs[:1]]
        return np.array(torch.cat(coeffs_torch, dim=-1))
    
    @staticmethod
    def get_ar_coefficients(X, k):
        n, p = X.shape
        ar_coefficients = np.zeros((n, k))
        for i in range(n):
            model = AutoReg(X[i], lags=k).fit()
            ar_coefficients[i] = model.params[1:k+1] 
        return ar_coefficients
    
    @staticmethod
    def autoreg(X, k):
        return DataTransform.get_ar_coefficients(X, k)
    
    @staticmethod
    def shannon_encoding(X, level=4, wavelet='db1', mode='symmetric'):
        def compute_shannon_entropy(signal):
            return -np.nansum(signal**2 * np.log(signal**2))
        
        n_examples = X.shape[0]
        wp = pywt.WaveletPacket(X[0, :], wavelet="sym8", maxlevel=3)
        packet_names = [node.path for node in wp.get_level(3, "natural")]
        
        feature_matrix_wav_packet_entropy = np.full((n_examples, 8), np.nan)
        for i in range(len(X)):
            wp = pywt.WaveletPacket(X[i, :], wavelet="sym8", maxlevel=3)
            for j in range(len(packet_names)):
                new_wp = pywt.WaveletPacket(data=None, wavelet="sym8", maxlevel=3)
                new_wp[packet_names[j]] = wp[packet_names[j]].data
                reconstructed_signal = new_wp.reconstruct(update=False)
                feature_matrix_wav_packet_entropy[i, j] = compute_shannon_entropy(reconstructed_signal)
        return feature_matrix_wav_packet_entropy
    
    @staticmethod
    def wavelet_leaders(X, j1=2, j2=6):
        n = X.shape[0] if X.ndim > 1 else 1
        transformed_X = -np.ones((n, 2))
        for i in range(X.shape[0]):
            dwt, lwt = mfa.mf_analysis_full(
                X[i],
                scaling_ranges=[(j1, j2)],
                q=mfa.build_q_log(1, 10, 20),
                n_cumul=2,
                p_exp=np.inf,
                gamint=0.0
            )
            sf, cumul, mfs, hmin = lwt
            transformed_X[i, :] = sf.H.item(), cumul.log_cumulants[1].item()
        return transformed_X

    def apply_transformation(self, X, transformation_name, **kwargs):
        transformation_methods = {
            'identity': self.identity,
            'fourier': self.fourier,
            'wavedec': self.wavedec,
            'dwt': self.dwt,
            'autoreg': self.autoreg,
            'shannon_encoding': self.shannon_encoding,
            'wavelet_leaders': self.wavelet_leaders,
        }
        
        if transformation_name in transformation_methods.keys():
            method = transformation_methods[transformation_name]
            # Get the method's argument names
            method_args = inspect.signature(method).parameters
            # Filter kwargs to only include the ones that are relevant for the method
            filtered_kwargs = {k: v for k, v in kwargs.items() if k in method_args}
            return method(X, **filtered_kwargs)
        else:
            raise ValueError(f"Transformation {transformation_name} not recognized.")


## Data prep

In [6]:
data_transformer = DataTransform()
# X = np.randn(100, 10)  # Example input data

# # Apply different transformations
# transformed_X_identity = data_transformer.apply_transformation(X, 'identity')
# transformed_X_fourier = data_transformer.apply_transformation(X, 'fourier',new_dimension = 8)
# transformed_X_wavedec = data_transformer.apply_transformation(X, 'wavedec', level=4)
# transformed_X_autoreg = data_transformer.apply_transformation(X, 'autoreg', k=3)

# Iterate over multiple transformations
transformations = [ 'fourier', 'wavedec', 'autoreg']
for trans in transformations:
    transformed_X = data_transformer.apply_transformation(X, trans, level=2 if trans == 'wavedec' else 3,  new_dimension = 64,k = 5)
    print(f"Transformation: {trans}, Shape: {transformed_X.shape}")


Transformation: fourier, Shape: (1170, 64)
Transformation: wavedec, Shape: (1170, 16250)
Transformation: autoreg, Shape: (1170, 5)


In [7]:
import torch
import numpy as np
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Assuming DataTransform class is already defined as provided

# Initialize the data transformer
data_transformer = DataTransform()

# # Example input data
# X = torch.randn(100, 10)  # Example input data
# y = np.random.randint(0, 2, 100)  # Example labels

# Define the classifiers to be tested
classifiers = {
    'SVM': SVC(),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}

# Define the transformations to be tested
transformations = {
    #'identity': {},
    'fourier': {},
    'wavedec': {'level': 3},
    'autoreg': {'k': 3}
}

# Function to evaluate a classifier on transformed data
def evaluate_classifier(classifier, X_train, X_test, y_train, y_test):
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    return accuracy_score(y_test, y_pred)

# Loop over each transformation and each classifier
results = {}
for trans_name, trans_kwargs in transformations.items():
    # Apply transformation
    transformed_X = data_transformer.apply_transformation(np.array(X), trans_name, **trans_kwargs)
    # Ensure the transformed data is in the right shape
    if isinstance(transformed_X, torch.Tensor):
        transformed_X = transformed_X.numpy()
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(transformed_X, y, test_size=0.2, random_state=42)
    
    results[trans_name] = {}
    for clf_name, clf in classifiers.items():
        # Evaluate the classifier
        accuracy = evaluate_classifier(clf, X_train, X_test, y_train, y_test)
        results[trans_name][clf_name] = accuracy
        print(f"Transformation: {trans_name}, Classifier: {clf_name}, Accuracy: {accuracy:.3f}")

# Print the results
for trans_name, clf_results in results.items():
    for clf_name, accuracy in clf_results.items():
        print(f"Transformation: {trans_name}, Classifier: {clf_name}, Accuracy: {accuracy:.3f}")


Transformation: fourier, Classifier: SVM, Accuracy: 0.987
Transformation: fourier, Classifier: DecisionTree, Accuracy: 0.979
Transformation: fourier, Classifier: RandomForest, Accuracy: 0.983
Transformation: wavedec, Classifier: SVM, Accuracy: 0.880
Transformation: wavedec, Classifier: DecisionTree, Accuracy: 0.607
Transformation: wavedec, Classifier: RandomForest, Accuracy: 0.816
Transformation: autoreg, Classifier: SVM, Accuracy: 0.846
Transformation: autoreg, Classifier: DecisionTree, Accuracy: 0.932
Transformation: autoreg, Classifier: RandomForest, Accuracy: 0.970
Transformation: fourier, Classifier: SVM, Accuracy: 0.987
Transformation: fourier, Classifier: DecisionTree, Accuracy: 0.979
Transformation: fourier, Classifier: RandomForest, Accuracy: 0.983
Transformation: wavedec, Classifier: SVM, Accuracy: 0.880
Transformation: wavedec, Classifier: DecisionTree, Accuracy: 0.607
Transformation: wavedec, Classifier: RandomForest, Accuracy: 0.816
Transformation: autoreg, Classifier: SVM

In [8]:
import torch
import numpy as np
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler

# Assuming DataTransform class is already defined as provided

# Initialize the data transformer
data_transformer = DataTransform()

# Define the classifiers to be tested
classifiers = {
    'SVM': SVC(),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}

# Define the transformations to be tested
transformations = {
    #'identity': {},
    'fourier': {},
    'wavedec': {'level': 4},
    'autoreg': {'k': 3}
}

# Function to evaluate a classifier using cross-validation
def evaluate_classifier_cv(classifier, X, y):
    scores = cross_val_score(classifier, X, y, cv=5)  # 5-fold cross-validation
    return np.mean(scores), np.std(scores)

# Loop over each transformation and each classifier
results = {}
for trans_name, trans_kwargs in transformations.items():
    # Apply transformation
    transformed_X = data_transformer.apply_transformation(X, trans_name, **trans_kwargs)
    # Ensure the transformed data is in the right shape
    if isinstance(transformed_X, torch.Tensor):
        transformed_X = transformed_X.numpy()
    
    # Standardize the data (important for some classifiers like SVM)
    scaler = StandardScaler()
    transformed_X = scaler.fit_transform(transformed_X)
    
    results[trans_name] = {}
    for clf_name, clf in classifiers.items():
        # Evaluate the classifier with cross-validation
        mean_accuracy, std_accuracy = evaluate_classifier_cv(clf, transformed_X, y)
        results[trans_name][clf_name] = (mean_accuracy, std_accuracy)
        print(f"Transformation: {trans_name}, Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")

# Print the results
for trans_name, clf_results in results.items():
    for clf_name, (mean_accuracy, std_accuracy) in clf_results.items():
        print(f"Transformation: {trans_name}, Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")


Transformation: fourier, Classifier: SVM, Mean Accuracy: 0.9854700854700855, Std Dev: 0.02485989670469912
Transformation: fourier, Classifier: DecisionTree, Mean Accuracy: 0.982905982905983, Std Dev: 0.006620484352491331
Transformation: fourier, Classifier: RandomForest, Mean Accuracy: 0.9743589743589742, Std Dev: 0.011143935735389146
Transformation: wavedec, Classifier: SVM, Mean Accuracy: 0.9179487179487179, Std Dev: 0.03074547991214521
Transformation: wavedec, Classifier: DecisionTree, Mean Accuracy: 0.6863247863247863, Std Dev: 0.14203472189976743
Transformation: wavedec, Classifier: RandomForest, Mean Accuracy: 0.8692307692307694, Std Dev: 0.04130955888408401
Transformation: autoreg, Classifier: SVM, Mean Accuracy: 0.8931623931623932, Std Dev: 0.01709401709401708
Transformation: autoreg, Classifier: DecisionTree, Mean Accuracy: 0.8923076923076921, Std Dev: 0.03892409414268617
Transformation: autoreg, Classifier: RandomForest, Mean Accuracy: 0.9128205128205128, Std Dev: 0.040505967