# Benchmark for signal representation 

- Multifractal analysis (1ere approche)

- Discrete Fourier Transform (DFT) $\checkmark$
- Spectrogram
- Autoregression $\checkmark$
- Shannon encoding $\checkmark$
- Wavelets (en cours)

- Local symbolic features
- SAX representation
- Approximate entropy

ML

- Autoencoder

- RNN
- LSTM



## Imports

In [1]:
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

In [2]:
import numpy as np

import torch
import pywt

import pymultifracs.mfa as mfa
from pymultifracs.utils import build_q_log

%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
# import pandas_datareader as pdr
# import seaborn as sns
# from statsmodels.tsa.api import acf, graphics, pacf
from statsmodels.tsa.ar_model import AutoReg, ar_select_order


## Data Load

In [3]:
ecgs_labels = np.load('ecgs_labels.npy')

X, y = ecgs_labels[:,:-1], ecgs_labels[:,-1]

## Data Representation

In [4]:
class DataTransform:
    def __init__(self) -> None:
        self.transformed_X = None

    @property
    def X(self):
        return self.transformed_X
    
    @property
    def shape(self):
        if self.transformed_X is not None:
            return self.transformed_X.shape
        else:
            return None

    @staticmethod
    def identity(X, **kwargs):
        return X
    
    @staticmethod
    def fourier(X, new_dimension=None, **kwargs):
        fourier_transform = np.fft.fft(X, n=new_dimension)
        modulus = np.abs(fourier_transform)
        return modulus
    
    @staticmethod
    def low_fourier(X,fs = 250, cutoff_ratio = 3, **kwargs):
        fourier_transform = np.fft.fft(X)#, n=new_dimension)
        frequencies = np.fft.fftreq(X.shape[1], d= 1 /fs)
        mask = frequencies < cutoff_ratio
        fourier_transform = fourier_transform[:,mask]
        modulus = np.abs(fourier_transform)
        return modulus
    
    def low_psd(X,fs = 250, cutoff_ratio = 3, **kwargs):
        fourier_transform = np.fft.fft(X)#, n=new_dimension)
        frequencies = np.fft.fftreq(X.shape[1], d= 1 /fs)
        mask = frequencies < cutoff_ratio
        fourier_transform = fourier_transform[:,mask]
        psd = fourier_transform * np.conj(fourier_transform)
        return psd
    
    @staticmethod
    def wavedec(X, level=4, wavelet='db1', mode='symmetric', **kwargs):
        array = np.array(X)
        coeffs = pywt.wavedec(array, wavelet, mode=mode, level=level)
        coeffs_torch = [torch.tensor(c) for c in coeffs[:1]]
        return torch.cat(coeffs_torch, dim=-1)
    
    @staticmethod
    def dwt(X, wavelet='db1', mode='symmetric', **kwargs):
        array = np.arrat(X)
        coeffs = pywt.dwt(array, wavelet, mode=mode)
        coeffs_torch = [torch.tensor(c) for c in coeffs]
        return torch.cat(coeffs_torch, dim=-1)
    
    @staticmethod
    def get_ar_coefficients(X, k, **kwargs):
        n, p = X.shape
        X = np.array(X)
        ar_coefficients = np.zeros((n, k))
        for i in range(n):
            model = AutoReg(X[i], lags=k).fit()
            ar_coefficients[i] = model.params[1:k+1] 
        return ar_coefficients
    
    @staticmethod
    def autoreg(X, k, **kwargs):
        return DataTransform.get_ar_coefficients(X, k)
    
    @staticmethod
    def shannon_encoding(X, level=4, wavelet='db1', mode='symmetric', **kwargs):
        def compute_shannon_entropy(signal):
            return -np.nansum(signal**2 * np.log(signal**2))
        
        n_examples = X.shape[0]
        wp = pywt.WaveletPacket(X[0, :], wavelet="sym8", maxlevel=3)
        packet_names = [node.path for node in wp.get_level(3, "natural")]
        
        feature_matrix_wav_packet_entropy = np.full((n_examples, 8), np.nan)
        for i in range(len(X)):
            wp = pywt.WaveletPacket(X[i, :], wavelet="sym8", maxlevel=3)
            for j in range(len(packet_names)):
                new_wp = pywt.WaveletPacket(data=None, wavelet="sym8", maxlevel=3)
                new_wp[packet_names[j]] = wp[packet_names[j]].data
                reconstructed_signal = new_wp.reconstruct(update=False)
                feature_matrix_wav_packet_entropy[i, j] = compute_shannon_entropy(reconstructed_signal)
        return feature_matrix_wav_packet_entropy
    
    @staticmethod
    def wavelet_leaders(X, j1=2, j2=6, **kwargs):
        n = X.shape[0] if X.ndim > 1 else 1
        transformed_X = -np.ones((n, 2))
        for i in range(X.shape[0]):
            dwt, lwt = mfa.mf_analysis_full(
                X[i],
                scaling_ranges=[(j1, j2)],
                q=mfa.build_q_log(1, 10, 20),
                n_cumul=2,
                p_exp=np.inf,
                gamint=0.0
            )
            sf, cumul, mfs, hmin = lwt
            transformed_X[i, :] = sf.H.item(), cumul.log_cumulants[1].item()
        return transformed_X

    def apply_transformation(self, X, transformation_name, **kwargs):
        transformation_methods = {
            'identity': self.identity,
            'fourier': self.fourier,
            'wavedec': self.wavedec,
            'dwt': self.dwt,
            'autoreg': self.autoreg,
            'shannon_encoding': self.shannon_encoding,
            'wavelet_leaders': self.wavelet_leaders,
        }
        
        if transformation_name in transformation_methods.keys():
            method = transformation_methods[transformation_name]
            return method(X, **kwargs)
        else:
            raise ValueError(f"Transformation {transformation_name} not recognized.")


In [5]:
a= 3
data_transformer = DataTransform()

## Data prep

In [6]:
data_transformer = DataTransform()
# X = np.randn(100, 10)  # Example input data

# # Apply different transformations
# transformed_X_identity = data_transformer.apply_transformation(X, 'identity')
# transformed_X_fourier = data_transformer.apply_transformation(X, 'fourier',new_dimension = 8)
# transformed_X_wavedec = data_transformer.apply_transformation(X, 'wavedec', level=4)
# transformed_X_autoreg = data_transformer.apply_transformation(X, 'autoreg', k=3)

# Iterate over multiple transformations
transformations = ['identity', 'fourier', 'wavedec', 'autoreg']
for trans in transformations:
    transformed_X = data_transformer.apply_transformation(X, trans, level=4 if trans == 'wavedec' else 3,  new_dimension = 64,k = 5)
    print(f"Transformation: {trans}, Shape: {transformed_X.shape}")


Transformation: identity, Shape: (1165, 65000)
Transformation: fourier, Shape: (1165, 64)
Transformation: wavedec, Shape: torch.Size([1165, 4063])
Transformation: autoreg, Shape: (1165, 5)


In [7]:
import torch
import numpy as np
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Assuming DataTransform class is already defined as provided

# Initialize the data transformer
data_transformer = DataTransform()

# # Example input data
# X = torch.randn(100, 10)  # Example input data
# y = np.random.randint(0, 2, 100)  # Example labels

# Define the classifiers to be tested
classifiers = {
    'SVM': SVC(),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}

# Define the transformations to be tested
transformations = {
    'identity': {},
    'fourier': {},
    'wavedec': {'level': 4},
    'autoreg': {'k': 3}
}

# Function to evaluate a classifier on transformed data
def evaluate_classifier(classifier, X_train, X_test, y_train, y_test):
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    return accuracy_score(y_test, y_pred)

# Loop over each transformation and each classifier
results = {}
for trans_name, trans_kwargs in transformations.items():
    # Apply transformation
    transformed_X = data_transformer.apply_transformation(np.array(X), trans_name, **trans_kwargs)
    # Ensure the transformed data is in the right shape
    if isinstance(transformed_X, torch.Tensor):
        transformed_X = transformed_X.numpy()
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(transformed_X, y, test_size=0.2, random_state=42)
    
    results[trans_name] = {}
    for clf_name, clf in classifiers.items():
        # Evaluate the classifier
        accuracy = evaluate_classifier(clf, X_train, X_test, y_train, y_test)
        results[trans_name][clf_name] = accuracy
        print(f"Transformation: {trans_name}, Classifier: {clf_name}, Accuracy: {accuracy}")

# Print the results
for trans_name, clf_results in results.items():
    for clf_name, accuracy in clf_results.items():
        print(f"Transformation: {trans_name}, Classifier: {clf_name}, Accuracy: {accuracy}")


Transformation: identity, Classifier: SVM, Accuracy: 0.9141630901287554
Transformation: identity, Classifier: DecisionTree, Accuracy: 0.630901287553648
Transformation: identity, Classifier: RandomForest, Accuracy: 0.8669527896995708
Transformation: fourier, Classifier: SVM, Accuracy: 1.0
Transformation: fourier, Classifier: DecisionTree, Accuracy: 0.9828326180257511
Transformation: fourier, Classifier: RandomForest, Accuracy: 0.9871244635193133
Transformation: wavedec, Classifier: SVM, Accuracy: 0.9399141630901288
Transformation: wavedec, Classifier: DecisionTree, Accuracy: 0.575107296137339
Transformation: wavedec, Classifier: RandomForest, Accuracy: 0.8626609442060086
Transformation: autoreg, Classifier: SVM, Accuracy: 0.8626609442060086
Transformation: autoreg, Classifier: DecisionTree, Accuracy: 0.9613733905579399
Transformation: autoreg, Classifier: RandomForest, Accuracy: 0.9656652360515021
Transformation: identity, Classifier: SVM, Accuracy: 0.9141630901287554
Transformation: id

In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from torch.utils.data import TensorDataset, DataLoader

# Load and preprocess the dataset
# iris = datasets.load_iris()
# X, y = iris.data, iris.target

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
# X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
# y_train_tensor = torch.tensor(y_train, dtype=torch.long)
# y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
# test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
# test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)


In [9]:
iris = datasets.load_iris()
print(iris.target)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [10]:
# Define the classifiers
class FCNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(FCNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 50)
        self.fc2 = nn.Linear(50, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

class SimpleCNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv1d(1, 16, kernel_size=2, stride=1)
        self.fc1 = nn.Linear(16 * (input_dim - 1), output_dim)

    def forward(self, x):
        x = x.unsqueeze(1)
        x = torch.relu(self.conv1(x))
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        return x

class SimpleRNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleRNN, self).__init__()
        self.rnn = nn.RNN(input_dim, 50, batch_first=True)
        self.fc1 = nn.Linear(50, output_dim)

    def forward(self, x):
        x, _ = self.rnn(x)
        x = self.fc1(x[:, -1, :])
        return x

# Function to train and evaluate the classifiers
def train_and_evaluate(model, train_loader, test_loader, criterion, optimizer, num_epochs=20):
    model.train()
    for epoch in range(num_epochs):
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
    
    model.eval()
    y_pred = []
    with torch.no_grad():
        for X_batch, _ in test_loader:
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            y_pred.extend(predicted.numpy())
    
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

# Initialize models, criteria, and optimizers
input_dim = X.shape[1]
output_dim = len(set(y))

models = {
    'FCNN': FCNN(input_dim, output_dim),
    'CNN': SimpleCNN(input_dim, output_dim),
    # 'RNN': SimpleRNN(input_dim, output_dim)
}

criteria = {
    'FCNN': nn.CrossEntropyLoss(),
    'CNN': nn.CrossEntropyLoss(),
    # 'RNN': nn.CrossEntropyLoss()
}

optimizers = {
    'FCNN': optim.Adam(models['FCNN'].parameters(), lr=0.01),
    'CNN': optim.Adam(models['CNN'].parameters(), lr=0.01),
    # 'RNN': optim.Adam(models['RNN'].parameters(), lr=0.01)
}

# Train and evaluate each model
for name, model in models.items():
    criterion = criteria[name]
    optimizer = optimizers[name]
    accuracy = train_and_evaluate(model, train_loader, test_loader, criterion, optimizer)
    print(f"{name} Accuracy: {accuracy:.4f}")


NameError: name 'train_loader' is not defined

## Tests

In [None]:
# wt

In [None]:
# wtmra

In [None]:
# X = 
# y = 

In [None]:
# Load dataset
# iris = datasets.load_iris()
# X, y = iris.data, iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X[281:], y[281:] - 1, test_size=0.3, random_state=42)

# Standardize the dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:

from sklearn import datasets
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score


# Initialize classifiers
classifiers = {
    
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(),
}

# Train and evaluate each classifier
results = {}
for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    cross_val = cross_val_score(clf, X, y, cv=5)
    results[name] = {"Accuracy": accuracy, "Cross-Validation Score": cross_val.mean()}

# Print results
for name, metrics in results.items():
    print(f"{name}:")
    print(f"  Accuracy: {metrics['Accuracy']:.4f}")
    print(f"  Cross-Validation Score: {metrics['Cross-Validation Score']:.4f}")
    print()


Decision Tree:
  Accuracy: 1.0000
  Cross-Validation Score: 0.9976

Random Forest:
  Accuracy: 1.0000
  Cross-Validation Score: 1.0000

SVM:
  Accuracy: 1.0000
  Cross-Validation Score: 1.0000



In [None]:
stop

NameError: name 'stop' is not defined

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score

clf = SVC()
results = {}
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
cross_val = cross_val_score(clf, X, y, cv=5)
results[name] = {"Accuracy": accuracy, "Cross-Validation Score": cross_val.mean()}

# Print results
for name, metrics in results.items():
    print(f"{name}:")
    print(f"  Accuracy: {metrics['Accuracy']:.4f}")
    print(f"  Cross-Validation Score: {metrics['Cross-Validation Score']:.4f}")
    print()

CNN:
  Accuracy: 0.8452
  Cross-Validation Score: 0.8262



In [None]:
stop

NameError: name 'stop' is not defined

In [None]:

from sklearn import datasets
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score


# Initialize classifiers
classifiers = {
    # "Logistic Regression": LogisticRegression(max_iter=1),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(),
    "k-NN": KNeighborsClassifier(),
    "Naive Bayes": GaussianNB(),
    "Gradient Boosting": GradientBoostingClassifier()
}

# Train and evaluate each classifier
results = {}
for name, clf in classifiers.items():
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    cross_val = cross_val_score(clf, X, y, cv=5)
    results[name] = {"Accuracy": accuracy, "Cross-Validation Score": cross_val.mean()}

# Print results
for name, metrics in results.items():
    print(f"{name}:")
    print(f"  Accuracy: {metrics['Accuracy']:.4f}")
    print(f"  Cross-Validation Score: {metrics['Cross-Validation Score']:.4f}")
    print()


KeyboardInterrupt: 