# Benchmark for signal representation 

- Multifractal analysis (1ere approche)

- Discrete Fourier Transform (DFT) $\checkmark$
- Spectrogram
- Autoregression $\checkmark$
- Shannon encoding $\checkmark$
- Wavelets (en cours)

- Local symbolic features
- SAX representation
- Approximate entropy

ML

- Autoencoder

- RNN
- LSTM



## Imports

In [55]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import random
import itertools
from apyori import apriori

# import pywt

# import pymultifracs.mfa as mfa
# from pymultifracs.utils import build_q_log
# from statsmodels.tsa.ar_model import AutoReg, ar_select_order


In [56]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score,KFold
from sklearn.preprocessing import StandardScaler


In [57]:

# from sklearn.model_selection import  KFold

In [58]:
# %pip install import_ipynb
# %pip install  --user git+https://github.com/neurospin/pymultifracs

In [59]:
import import_ipynb
#from transformations import DataTransform,TransformationRegistry, IdentityTransform, FourierTransform, LowFourierTransform, LowPsdTransform, WaveDecTransform, DwtTransform, CwtTransform, AutoRegTransform, ShannonEncodingTransform, WaveletLeadersTransform, CrossCorTransform, AutoCorTransform, MultiFracsTransform, AutoEncoderTransform  
from transformations import *

In [60]:
# Initialize the registry
registry = initialize_registry()#TransformationRegistry()

# # Register transformations
# registry.register('identity', IdentityTransform)
# registry.register('mean', Mean)
# registry.register('std', StandardDeviation)
# registry.register('fourier', FourierTransform)
# registry.register('low_fourier', LowFourierTransform)
# registry.register('low_psd', LowPsdTransform)
# registry.register('wavedec', WaveDecTransform)
# registry.register('dwt', DwtTransform)
# registry.register('cwt', CwtTransform)
# registry.register('autoreg', AutoRegTransform)
# registry.register('shannon_encoding', ShannonEncodingTransform)
# registry.register('wavelet_leaders', WaveletLeadersTransform)
# registry.register('multifracs', MultiFracsTransform)
# registry.register('crosscor', CrossCorTransform)
# registry.register('autocor', AutoCorTransform)
# registry.register('autoencoder', AutoEncoderTransform)
# registry.register('waveletscattering', WaveletScattering)

## Data Load

Link here [ecgs_labels.npy](https://drive.google.com/file/d/1cbUKH9qGOeIZD6Mf73plMkyXpq56mwIu/view?usp=sharing)

In [61]:
DATASET = "ECG"
# DATASET = "HRV"
# DATASET = "HRV_chall2002"
DATASET = "SYNTH_DATA"
# DATASET = "SYNTH_DATA2"
# DATASET = "SYNTH_DATA3"
#DATASET = "SYNTH_DATA4"
# DATASET = "SYNTH_DATA_SAME"

In [62]:
if DATASET == "ECG":
    ecgs_labels = np.load('ecgs_labels.npy')

    X, y = ecgs_labels[1:,:-1], ecgs_labels[1:,-1]

In [63]:
if DATASET ==  "HRV":
    X, y = np.load('hrv_signals.npy'), np.load('hrv_labels.npy')

In [64]:
if DATASET ==  "HRV_chall2002":
    hrvs_labels = np.load('hrv_data.npy')

    X, y = hrvs_labels[:,:-1], hrvs_labels[:,-1]

In [65]:
if DATASET ==  "SYNTH_DATA":
    data = np.load('synth_data.npy')

    X, y = data[:,:-1], data[:,-1]

In [66]:
if DATASET ==  "SYNTH_DATA2":
    data = np.load('synth_data2.npy')

    X, y = data[:,:-1], data[:,-1]

In [67]:
if DATASET ==  "SYNTH_DATA3":
    data = np.load('synth_data3.npy')

    X, y = data[:,:-1], data[:,-1]

In [68]:
if DATASET == "SYNTH_DATA4":
    data = np.load('synth_data_0.8_0.75.npy')

    X, y = data[:,:-1], data[:,-1]

In [69]:
if DATASET == "SYNTH_DATA_SAME":
    data = np.load('synth_data_same.npy')

    X, y = data[:,:-1], data[:,-1]

In [70]:
np.isnan(X).sum()

0

In [71]:
n,p = X.shape

print(f'X.shape : {n,p}')

X.shape : (200, 65536)


In [72]:
j2max = min(12,int(np.log2(p) - 3))
j2max

12

In [73]:
p_ = 65000

In [74]:
data_transformer = DataTransform(registry,save_data=False)
for trans_names in registry.transformations.keys():
        trans_names_str = [str(name) for name in trans_names]
        trans_name_str = '+'.join(trans_names_str) if isinstance(trans_names, list) else trans_names
        kwargs = trans_names[1] if isinstance(trans_names, list) and len(trans_names) > 1 else {}
        trans_names = trans_names[0] if isinstance(trans_names, list) else trans_names
        # Apply transformation
        transformed_X = data_transformer.apply_transformation(np.random.randn((10*p_)).reshape((10,p_)), trans_names, **kwargs)
        
        print(f"Transformation: {trans_name_str}, Shape: {transformed_X.shape}")

Transformation: identity, Shape: (10, 65000)
Transformation: mean, Shape: (10, 1)
Transformation: std, Shape: (10, 1)
Transformation: fourier, Shape: (10, 65000)
Transformation: low_fourier, Shape: (10, 1092)
Computing  low_psd ...

Transformation: low_psd, Shape: (10, 1092)
Transformation: wavedec, Shape: (10, 4063)
Transformation: dwt, Shape: (10, 32500)
Transformation: cwt, Shape: (10, 10)
Transformation: autoreg, Shape: (10, 3)
Transformation: shannon_encoding, Shape: (10, 8)
Transformation: wavelet_leaders, Shape: (10, 2)
Transformation: multifracs, Shape: (10, 3)
Transformation: crosscor, Shape: (10, 10)
Transformation: autocor, Shape: (10, 26000)
Transformation: autoencoder, Shape: (10, 16)
Transformation: waveletscattering, Shape: (10, 203150)


In [75]:
# Initialize the data transformer
data_transformer = DataTransform(registry)

In [76]:
%%script false --no-raise-error

# Define the classifiers to be tested
classifiers = {
    'SVM': SVC(),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}

# Define the transformations to be tested
transformations = [
    # ['identity'],
    ['crosscor'],
    ['autocor', {'m':5000,'k':4}],
    ['fourier', {'new_dimension':40}],
    ['low_fourier'],
    ['low_psd'],
    ['cwt',{'pca_components' : 10}],
    ['wavedec'],
    ['autoreg', {'k': 3}],
    ['shannon_encoding'],
    ['wavelet_leaders'],
    ['multifracs'],
    ['multifracs', {'j1':1,'j2':12}],
    [['wavelet_leaders','shannon_encoding']],
    [['wavelet_leaders','multifracs']],
    [['fourier','multifracs',], {'new_dimension':40}],
    [['fourier','multifracs',], {'new_dimension':40}],
    [['fourier','multifracs','shannon_encoding'], {'new_dimension':40}],
    [['low_fourier','multifracs','autoreg'], {'k':3}],
    
]




# Function to evaluate a classifier using cross-validation
def evaluate_classifier_cv(classifier, X, y):
    scores = cross_val_score(classifier, X, y, cv=5)  # 5-fold cross-validation
    return np.mean(scores), np.std(scores)

# Loop over each transformation and each classifier
results = {}

for trans_names in transformations:
    # print()
    trans_names_str = [str(name) for name in trans_names]
    trans_name_str = '+'.join(trans_names_str) if isinstance(trans_names, list) else trans_names
    kwargs = trans_names[1] if isinstance(trans_names, list) and len(trans_names) > 1 else {}
    trans_names = trans_names[0] if isinstance(trans_names, list) else trans_names
    
    # Apply transformation
    transformed_X = data_transformer.apply_transformation(X, trans_names, **kwargs)
    print(f"Transformation: {trans_name_str}, Shape: {transformed_X.shape}" )
    # Standardize the data (important for some classifiers like SVM)
    scaler = StandardScaler()
    transformed_X = scaler.fit_transform(transformed_X)
    
    results[trans_name_str] = {}
    for clf_name, clf in classifiers.items():
        # Evaluate the classifier with cross-validation
        mean_accuracy, std_accuracy = evaluate_classifier_cv(clf, transformed_X, y)
        results[trans_name_str][clf_name] = (mean_accuracy, std_accuracy)
        print(f"Transformation: {trans_name_str}, Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")

    print()
# Print the results
for trans_name, clf_results in results.items():
    print()
    for clf_name, (mean_accuracy, std_accuracy) in clf_results.items():
        print(f"Transformation: {trans_name}, Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")
        

Couldn't find program: 'false'


In [77]:
%%script false --no-raise-error
import random
import itertools
import json

# Define the classifiers to be tested
classifiers = {
    'SVM': SVC(),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}

# Initialize the data transformer
data_transformer = DataTransform(registry)

# Define the transformations to be tested
transformation_names = ['crosscor','low_psd','low_fourier',['autoreg',{'k':3}],['autoreg',{'k':5}], #'multifracs',
                        ['multifracs', {'j1':1,'j2':j2max}],
                        'shannon_encoding',['autoencoder',{'fourier_transform':True}]] #list(registry.transformations.keys())




# Function to evaluate a classifier using cross-validation
def evaluate_classifier_cv(classifier, X, y):
    scores = cross_val_score(classifier, X, y, cv=5)  # 5-fold cross-validation
    return np.mean(scores), np.std(scores)

# Function to randomly combine transformations
def random_combination_transformations(transformation_list, n_combinations=10):
    # all_combinations = []
    # for r in range(1, len(transformation_names) + 1):
    #     combinations = list(itertools.combinations(transformation_names, r))
    #     all_combinations.extend(combinations)
    
    # return random.sample(all_combinations, min(n_combinations, len(all_combinations)))
    nb_transformation = len(transformation_list)
    combined_transformations = []
    for _ in range(n_combinations):
        nb_trans = np.random.randint(1,5)

        already_drawn = []
        transs = []
        for _ in range(nb_trans):
            while True:
                random_ind = np.random.randint(nb_transformation)
                if random_ind not in already_drawn:
                    already_drawn.append(random_ind)
                    break
            trans = transformation_list[random_ind]
            transs.append(trans)
            
        combined_transformations.append(transs)
    return combined_transformations
# Generate random combinations of transformations
random_transformations = random_combination_transformations(transformation_names, n_combinations=5)
print(random_transformations)
# # Example input data
# X = np.random.randn(100, 10)  # Example input data
# y = np.random.randint(0, 2, 100)  # Example labels

# Dictionary to store results
results = {}

i = 0
# Loop over each random combination of transformations and each classifier
for trans in random_transformations:
    print()
    trans_name_str = DataTransform.get_full_trans_kwargs_str(trans)
    
        
    transformed_X = data_transformer.apply_transformation(X, trans)

    # trans_name_str = '+'.join(trans_comb)
    # transformed_X = data_transformer.apply_transformation(X, trans_comb)
    
    # Standardize the data (important for some classifiers like SVM)
    scaler = StandardScaler()
    transformed_X = scaler.fit_transform(transformed_X)
    
    results[trans_name_str] = {}
    i += 1
    print(f"Transformations n°{i}: {trans_name_str} {transformed_X.shape}")
    for clf_name, clf in classifiers.items():
        # Evaluate the classifier with cross-validation
        mean_accuracy, std_accuracy = evaluate_classifier_cv(clf, transformed_X, y)
        results[trans_name_str][clf_name] = {'mean_accuracy': mean_accuracy, 'std_accuracy': std_accuracy}
        print(f"Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")

# Save results to a JSON file
with open('transformation_results0.json', 'w') as f:
    json.dump(results, f, indent=4)


Couldn't find program: 'false'


In [78]:
# # Save results to a JSON file
# with open('transformation_results_hrv_chall2002.json', 'w') as f:
#     json.dump(results, f, indent=4)

In [79]:
def evaluate_transformations(X, y, transformations, classifiers):
    dt = DataTransform(registry)
    results = []

    for trans in transformations:
        trans_name_str = DataTransform.get_full_trans_kwargs_str(trans)
        trans_X = dt.apply_transformation(X, trans)

        result = {'Transformation (shape)': trans_name_str}
        result["shape"] = trans_X.shape[-1]
        result['nb_trans'] = len(trans)

        scaler = StandardScaler()
        trans_X = scaler.fit_transform(trans_X) 
        print(f"Transformation: {trans_name_str}, Shape: {trans_X.shape[-1]} ")
        for clf_name, clf in classifiers.items():
            kf = KFold(n_splits=5, shuffle=True, random_state=42)
            scores = cross_val_score(clf, trans_X, y, cv=kf, scoring='accuracy')
            result[f'{clf_name} accuracy'] = np.mean(scores)
            result[f'{clf_name} std'] = np.std(scores)
            print(f"Classifier: {clf_name}, "
                  f"Mean Accuracy: {np.mean(scores):.3f}, Std Accuracy: {np.std(scores):.3f}")
        
        results.append(result)

    return pd.DataFrame(results)

In [80]:
# Define the classifiers
classifiers = {
    'SVM': SVC(kernel='linear'),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}
transformations = [['waveletscattering',{'Q':1}],['waveletscattering',{'J':3,'Q':1}],['waveletscattering',{'J':7,'Q':1}],['waveletscattering',{'pca_components':200,'Q':1}]]
results_df = evaluate_transformations(X, y, transformations, classifiers)
print(results_df)

# Save results to a CSV file
# results_df.to_csv(f'results/test_wst_transformation_results2.csv', index=False)

Transformation: waveletscattering_Q=1, Shape: 53248 
Classifier: SVM, Mean Accuracy: 0.995, Std Accuracy: 0.010
Classifier: DecisionTree, Mean Accuracy: 0.990, Std Accuracy: 0.012
Classifier: RandomForest, Mean Accuracy: 0.995, Std Accuracy: 0.010
Transformation: waveletscattering_J=3_Q=1, Shape: 65536 
Classifier: SVM, Mean Accuracy: 0.995, Std Accuracy: 0.010
Classifier: DecisionTree, Mean Accuracy: 0.990, Std Accuracy: 0.012
Classifier: RandomForest, Mean Accuracy: 0.995, Std Accuracy: 0.010
Transformation: waveletscattering_J=7_Q=1, Shape: 17408 
Classifier: SVM, Mean Accuracy: 0.995, Std Accuracy: 0.010
Classifier: DecisionTree, Mean Accuracy: 0.995, Std Accuracy: 0.010
Classifier: RandomForest, Mean Accuracy: 0.995, Std Accuracy: 0.010
Transformation: waveletscattering_pca_components=200_Q=1, Shape: 200 
Classifier: SVM, Mean Accuracy: 0.470, Std Accuracy: 0.024
Classifier: DecisionTree, Mean Accuracy: 0.935, Std Accuracy: 0.034
Classifier: RandomForest, Mean Accuracy: 0.995, Std

In [81]:
# Define the classifiers
classifiers = {
    'SVM': SVC(kernel='linear'),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}
Q = 3
transformations = [ ['waveletscattering',{'J':J,'Q':Q}] for J in range(12) ]
# transformations = [['waveletscattering',{'Q':1}],['waveletscattering',{'J':3,'Q':1}],['waveletscattering',{'J':7,'Q':1}],['waveletscattering',{'pca_components':200,'Q':1}]]
results_df = evaluate_transformations(X, y, transformations, classifiers)
# print(results_df)

Transformation: waveletscattering_J=0_Q=3, Shape: 196608 
Classifier: SVM, Mean Accuracy: 0.995, Std Accuracy: 0.010
Classifier: DecisionTree, Mean Accuracy: 0.940, Std Accuracy: 0.037
Classifier: RandomForest, Mean Accuracy: 0.995, Std Accuracy: 0.010
Transformation: waveletscattering_J=1_Q=3, Shape: 98304 
Classifier: SVM, Mean Accuracy: 0.995, Std Accuracy: 0.010
Classifier: DecisionTree, Mean Accuracy: 0.970, Std Accuracy: 0.019
Classifier: RandomForest, Mean Accuracy: 0.995, Std Accuracy: 0.010
Transformation: waveletscattering_J=2_Q=3, Shape: 98304 
Classifier: SVM, Mean Accuracy: 0.995, Std Accuracy: 0.010
Classifier: DecisionTree, Mean Accuracy: 0.985, Std Accuracy: 0.012
Classifier: RandomForest, Mean Accuracy: 0.995, Std Accuracy: 0.010
Transformation: waveletscattering_J=3_Q=3, Shape: 106496 
Classifier: SVM, Mean Accuracy: 0.995, Std Accuracy: 0.010
Classifier: DecisionTree, Mean Accuracy: 0.980, Std Accuracy: 0.010
Classifier: RandomForest, Mean Accuracy: 0.995, Std Accura

In [82]:
# Define the classifiers
classifiers = {
    'SVM': SVC(kernel='linear'),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}
Q = 3
transformations = [ ['waveletscattering',{'J':J,'Q':Q,'pca_components':200}] for J in range(12) ]
# transformations = [['waveletscattering',{'Q':1}],['waveletscattering',{'J':3,'Q':1}],['waveletscattering',{'J':7,'Q':1}],['waveletscattering',{'pca_components':200,'Q':1}]]
results_df = evaluate_transformations(X, y, transformations, classifiers)

Transformation: waveletscattering_J=0_Q=3_pca_components=200, Shape: 200 
Classifier: SVM, Mean Accuracy: 0.455, Std Accuracy: 0.089
Classifier: DecisionTree, Mean Accuracy: 0.960, Std Accuracy: 0.025
Classifier: RandomForest, Mean Accuracy: 0.995, Std Accuracy: 0.010
Transformation: waveletscattering_J=1_Q=3_pca_components=200, Shape: 200 
Classifier: SVM, Mean Accuracy: 0.485, Std Accuracy: 0.030
Classifier: DecisionTree, Mean Accuracy: 0.905, Std Accuracy: 0.053
Classifier: RandomForest, Mean Accuracy: 0.995, Std Accuracy: 0.010
Transformation: waveletscattering_J=2_Q=3_pca_components=200, Shape: 200 
Classifier: SVM, Mean Accuracy: 0.500, Std Accuracy: 0.055
Classifier: DecisionTree, Mean Accuracy: 0.920, Std Accuracy: 0.019
Classifier: RandomForest, Mean Accuracy: 0.995, Std Accuracy: 0.010
Transformation: waveletscattering_J=3_Q=3_pca_components=200, Shape: 200 
Classifier: SVM, Mean Accuracy: 0.505, Std Accuracy: 0.064
Classifier: DecisionTree, Mean Accuracy: 0.955, Std Accuracy

In [83]:

print("Transformation: waveletscattering, Shape: 65536\n\
Classifier: SVM, Mean Accuracy: 0.995, Std Accuracy: 0.010\n\
Classifier: DecisionTree, Mean Accuracy: 0.810, Std Accuracy: 0.085\n\
Classifier: RandomForest, Mean Accuracy: 0.995, Std Accuracy: 0.010")

Transformation: waveletscattering, Shape: 65536
Classifier: SVM, Mean Accuracy: 0.995, Std Accuracy: 0.010
Classifier: DecisionTree, Mean Accuracy: 0.810, Std Accuracy: 0.085
Classifier: RandomForest, Mean Accuracy: 0.995, Std Accuracy: 0.010


In [84]:
def random_combination_transformations(transformation_names, k = 2):
    all_combinations = []
    for r in range(1, k + 1):
        combinations = list(itertools.combinations(transformation_names, r))
        all_combinations.extend(combinations)
    
    return [list(x) for x in all_combinations] 


In [85]:
%%script false --no-raise-error


# Define the classifiers
classifiers = {
    'SVM': SVC(),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}

# # Define the data transformations
# transformations = [
#     'identity',
#     ['fourier', {'new_dimension': 100}],
#     ['wavedec', {'level': 4, 'wavelet': 'db1'}],
#     ['autoreg', {'k': 1}]
# ]
# Define the transformations to be tested
transformations = ['crosscor','mean','std',
                    #'low_psd','low_fourier',
                    #['autoreg',{'k':1}], ['autoreg',{'k':2}],['autoreg',{'k':3}],
                    ['autoreg',{'k':5}], #'multifracs',
                    ['multifracs', {'j1':1,'j2':j2max}],
                    'shannon_encoding',['autoencoder',{'fourier_transform':True}]]

if DATASET == "ECG":
    transformations.extend(["low_psd","low_fourier"])

t = random_combination_transformations(transformations,k=4)
transformations = t
print(transformations)
def evaluate_transformations(X, y, transformations, classifiers):
    dt = DataTransform(registry)
    results = []

    for trans in transformations:
        trans_name_str = DataTransform.get_full_trans_kwargs_str(trans)
        trans_X = dt.apply_transformation(X, trans)

        result = {'Transformation (shape)': trans_name_str}
        result["shape"] = trans_X.shape[-1]
        result['nb_trans'] = len(trans)
        for clf_name, clf in classifiers.items():
            kf = KFold(n_splits=5, shuffle=True, random_state=42)
            scores = cross_val_score(clf, trans_X, y, cv=kf, scoring='accuracy')
            result[f'{clf_name} accuracy'] = np.mean(scores)
            result[f'{clf_name} std'] = np.std(scores)
            print(f"Transformation: {trans_name_str}, Classifier: {clf_name}, "
                  f"Mean Accuracy: {np.mean(scores):.3f}, Std Accuracy: {np.std(scores):.3f}")
        
        results.append(result)

    return pd.DataFrame(results)


results_df = evaluate_transformations(X, y, transformations, classifiers)
print(results_df)

# Save results to a CSV file
results_df.to_csv(f'results/{DATASET}_transformation_results_.csv', index=False)


Couldn't find program: 'false'


In [86]:
# results_df

In [87]:
def get_transformation_names(transformations):
    # san_transformations = DataTransform.sanitize_transformations(transformations)
    transformation_names = list()
    for transformation in transformations:
        # print(transformation)
        # input = transformation
        # print(len(input) == 2 and isinstance(input[0], str) and isinstance(input[1], dict))
        s_transformation = DataTransform.sanitize_transformations(transformation)
        names = list()
        for trans in transformation:
            trans_name, kwargs = DataTransform.handle_trans_kwargs(trans)
            names.append(DataTransform.get_trans_kwargs_str(trans_name, kwargs)) 
        transformation_names.append(names)
    return transformation_names

In [88]:
# DataTransform.sanitize_transformations(transformations)

In [89]:
transformations = ['crosscor','mean','std',
                    #'low_psd','low_fourier',
                    #['autoreg',{'k':1}], ['autoreg',{'k':2}],['autoreg',{'k':3}],
                    ['autoreg',{'k':5}], #'multifracs',
                    ['multifracs', {'j1':1,'j2':j2max}],
                    'shannon_encoding',['autoencoder',{'fourier_transform':True}]]

if DATASET == "ECG":
    transformations.extend(["low_psd","low_fourier"])

t = random_combination_transformations(transformations,k=3)
transformations = t

In [90]:
transformations

[['crosscor'],
 ['mean'],
 ['std'],
 [['autoreg', {'k': 5}]],
 [['multifracs', {'j1': 1, 'j2': 12}]],
 ['shannon_encoding'],
 [['autoencoder', {'fourier_transform': True}]],
 ['crosscor', 'mean'],
 ['crosscor', 'std'],
 ['crosscor', ['autoreg', {'k': 5}]],
 ['crosscor', ['multifracs', {'j1': 1, 'j2': 12}]],
 ['crosscor', 'shannon_encoding'],
 ['crosscor', ['autoencoder', {'fourier_transform': True}]],
 ['mean', 'std'],
 ['mean', ['autoreg', {'k': 5}]],
 ['mean', ['multifracs', {'j1': 1, 'j2': 12}]],
 ['mean', 'shannon_encoding'],
 ['mean', ['autoencoder', {'fourier_transform': True}]],
 ['std', ['autoreg', {'k': 5}]],
 ['std', ['multifracs', {'j1': 1, 'j2': 12}]],
 ['std', 'shannon_encoding'],
 ['std', ['autoencoder', {'fourier_transform': True}]],
 [['autoreg', {'k': 5}], ['multifracs', {'j1': 1, 'j2': 12}]],
 [['autoreg', {'k': 5}], 'shannon_encoding'],
 [['autoreg', {'k': 5}], ['autoencoder', {'fourier_transform': True}]],
 [['multifracs', {'j1': 1, 'j2': 12}], 'shannon_encoding'],


In [91]:
get_transformation_names(transformations)

[['crosscor'],
 ['mean'],
 ['std'],
 ['autoreg_k=5'],
 ['multifracs_j1=1_j2=12'],
 ['shannon_encoding'],
 ['autoencoder_fourier_transform=True'],
 ['crosscor', 'mean'],
 ['crosscor', 'std'],
 ['crosscor', 'autoreg_k=5'],
 ['crosscor', 'multifracs_j1=1_j2=12'],
 ['crosscor', 'shannon_encoding'],
 ['crosscor', 'autoencoder_fourier_transform=True'],
 ['mean', 'std'],
 ['mean', 'autoreg_k=5'],
 ['mean', 'multifracs_j1=1_j2=12'],
 ['mean', 'shannon_encoding'],
 ['mean', 'autoencoder_fourier_transform=True'],
 ['std', 'autoreg_k=5'],
 ['std', 'multifracs_j1=1_j2=12'],
 ['std', 'shannon_encoding'],
 ['std', 'autoencoder_fourier_transform=True'],
 ['autoreg_k=5', 'multifracs_j1=1_j2=12'],
 ['autoreg_k=5', 'shannon_encoding'],
 ['autoreg_k=5', 'autoencoder_fourier_transform=True'],
 ['multifracs_j1=1_j2=12', 'shannon_encoding'],
 ['multifracs_j1=1_j2=12', 'autoencoder_fourier_transform=True'],
 ['shannon_encoding', 'autoencoder_fourier_transform=True'],
 ['crosscor', 'mean', 'std'],
 ['crosscor

In [92]:
SVC().__class__

sklearn.svm._classes.SVC

In [93]:
def evaluate_transformations_and_get_aprior(X, y, transformations, classifier1, classifier2, seed = 42):
    dt = DataTransform(registry)
    results = []
    transformation_names = get_transformation_names(transformations)

    clf1better = list()
    clf2better = list()
    apr_list = list()
    kf = KFold(n_splits=5, shuffle=True, random_state=seed)
    for trans, trans_names in zip(transformations,transformation_names):
        trans_name_str = DataTransform.get_full_trans_kwargs_str(trans)
        trans_X = dt.apply_transformation(X, trans)

        result = {'Transformation (shape)': trans_name_str}
        result["shape"] = trans_X.shape[-1]
        result['nb_trans'] = len(trans)

        scaler = StandardScaler()
        trans_X = scaler.fit_transform(trans_X) 
        print(f"Transformation: {trans_name_str}, Shape: {trans_X.shape[-1]} ")

        scores = cross_val_score(classifier1, trans_X, y, cv=kf, scoring='accuracy')
        clf1_mean = np.mean(scores)
        clf1_std = np.std(scores)
        result[f'{classifier1.__class__} accuracy'] = clf1_mean
        result[f'{classifier1.__class__} std'] = clf1_std
        print(f"Classifier: {classifier1.__class__}, "
                f"Mean Accuracy: {np.mean(scores):.3f}, Std Accuracy: {np.std(scores):.3f}")
        scores = cross_val_score(classifier2, trans_X, y, cv=kf, scoring='accuracy')
        clf2_mean = np.mean(scores)
        clf2_std = np.std(scores)
        result[f'{classifier2.__class__} accuracy'] = clf2_mean
        result[f'{classifier2.__class__} std'] = clf2_std
        print(f"Classifier: {classifier2.__class__}, "
                f"Mean Accuracy: {np.mean(scores):.3f}, Std Accuracy: {np.std(scores):.3f}")
        
        if clf1_mean - clf1_std > clf2_mean + clf2_std: # + clf2_std:
            clf1better.extend(trans_names)
            apr_list.append(trans_names + ['clf1'])
        elif clf2_mean - clf2_std > clf1_mean + clf1_std: # + clf1_std:
            clf2better.extend(trans_names)
            apr_list.append(trans_names + ['clf2'])
        results.append(result)

    return pd.DataFrame(results), pd.Series(clf1better), pd.Series(clf2better), apr_list

In [94]:
res, apr1, apr2,apr_list = evaluate_transformations_and_get_aprior(X,y,transformations,SVC(),RandomForestClassifier())

Transformation: crosscor, Shape: 10 
Classifier: <class 'sklearn.svm._classes.SVC'>, Mean Accuracy: 0.705, Std Accuracy: 0.068
Classifier: <class 'sklearn.ensemble._forest.RandomForestClassifier'>, Mean Accuracy: 0.690, Std Accuracy: 0.072
Transformation: mean, Shape: 1 
Classifier: <class 'sklearn.svm._classes.SVC'>, Mean Accuracy: 0.460, Std Accuracy: 0.080
Classifier: <class 'sklearn.ensemble._forest.RandomForestClassifier'>, Mean Accuracy: 0.540, Std Accuracy: 0.051
Transformation: std, Shape: 1 
Classifier: <class 'sklearn.svm._classes.SVC'>, Mean Accuracy: 0.685, Std Accuracy: 0.049
Classifier: <class 'sklearn.ensemble._forest.RandomForestClassifier'>, Mean Accuracy: 0.585, Std Accuracy: 0.096
Transformation: autoreg_k=5, Shape: 5 
Classifier: <class 'sklearn.svm._classes.SVC'>, Mean Accuracy: 0.995, Std Accuracy: 0.010
Classifier: <class 'sklearn.ensemble._forest.RandomForestClassifier'>, Mean Accuracy: 0.995, Std Accuracy: 0.010
Transformation: multifracs_j1=1_j2=12, Shape: 3 


In [95]:
apr1.value_counts()

Series([], Name: count, dtype: int64)

In [96]:
apr2.value_counts()

autoencoder_fourier_transform=True    6
mean                                  3
crosscor                              2
std                                   2
Name: count, dtype: int64

In [97]:
res, apr1, apr2 = evaluate_transformations_and_get_aprior(X,y,transformations,SVC(),DecisionTreeClassifier())

Transformation: crosscor, Shape: 10 
Classifier: <class 'sklearn.svm._classes.SVC'>, Mean Accuracy: 0.705, Std Accuracy: 0.068
Classifier: <class 'sklearn.tree._classes.DecisionTreeClassifier'>, Mean Accuracy: 0.605, Std Accuracy: 0.058
Transformation: mean, Shape: 1 
Classifier: <class 'sklearn.svm._classes.SVC'>, Mean Accuracy: 0.460, Std Accuracy: 0.080
Classifier: <class 'sklearn.tree._classes.DecisionTreeClassifier'>, Mean Accuracy: 0.540, Std Accuracy: 0.051
Transformation: std, Shape: 1 
Classifier: <class 'sklearn.svm._classes.SVC'>, Mean Accuracy: 0.685, Std Accuracy: 0.049
Classifier: <class 'sklearn.tree._classes.DecisionTreeClassifier'>, Mean Accuracy: 0.585, Std Accuracy: 0.096
Transformation: autoreg_k=5, Shape: 5 
Classifier: <class 'sklearn.svm._classes.SVC'>, Mean Accuracy: 0.995, Std Accuracy: 0.010
Classifier: <class 'sklearn.tree._classes.DecisionTreeClassifier'>, Mean Accuracy: 0.990, Std Accuracy: 0.012
Transformation: multifracs_j1=1_j2=12, Shape: 3 
Classifier: 

ValueError: too many values to unpack (expected 3)

In [None]:
apr1.value_counts()

crosscor    1
Name: count, dtype: int64

In [None]:
apr2.value_counts()

autoencoder_fourier_transform=True    5
mean                                  3
std                                   2
crosscor                              1
Name: count, dtype: int64

In [None]:
result_list = list(apriori(apr_list))

In [None]:
for result in result_list:
    items = [x for x in result.items]
    print(f"Rule: {items}")
    print(f"Support: {result.support}")
    for ordered_stat in result.ordered_statistics:
        print(f"{list(ordered_stat.items_base)} ==> {list(ordered_stat.items_add)}")
        print(f"Confidence: {ordered_stat.confidence}")
        print(f"Lift: {ordered_stat.lift}")
    print("-----")