# Benchmark for signal representation 

- Multifractal analysis (1ere approche)

- Discrete Fourier Transform (DFT) $\checkmark$
- Spectrogram
- Autoregression $\checkmark$
- Shannon encoding $\checkmark$
- Wavelets (en cours)

- Local symbolic features
- SAX representation
- Approximate entropy

ML

- Autoencoder

- RNN
- LSTM



## Imports

In [1]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import random
import itertools
from apyori import apriori

# import pywt

# import pymultifracs.mfa as mfa
# from pymultifracs.utils import build_q_log
# from statsmodels.tsa.ar_model import AutoReg, ar_select_order


In [2]:
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

In [3]:

# from sklearn.model_selection import  KFold

In [4]:
# %pip install import_ipynb
# %pip install  --user git+https://github.com/neurospin/pymultifracs

In [5]:
import import_ipynb
#from transformations import DataTransform,TransformationRegistry, IdentityTransform, FourierTransform, LowFourierTransform, LowPsdTransform, WaveDecTransform, DwtTransform, CwtTransform, AutoRegTransform, ShannonEncodingTransform, WaveletLeadersTransform, CrossCorTransform, AutoCorTransform, MultiFracsTransform, AutoEncoderTransform  
from transformations import *
from prep_synth_data import create_synth_data

importing Jupyter notebook from transformations.ipynb
importing Jupyter notebook from prep_synth_data.ipynb


In [6]:
# Initialize the registry
registry = initialize_registry()#TransformationRegistry()

# # Register transformations
# registry.register('identity', IdentityTransform)
# registry.register('mean', Mean)
# registry.register('std', StandardDeviation)
# registry.register('fourier', FourierTransform)
# registry.register('low_fourier', LowFourierTransform)
# registry.register('low_psd', LowPsdTransform)
# registry.register('wavedec', WaveDecTransform)
# registry.register('dwt', DwtTransform)
# registry.register('cwt', CwtTransform)
# registry.register('autoreg', AutoRegTransform)
# registry.register('shannon_encoding', ShannonEncodingTransform)
# registry.register('wavelet_leaders', WaveletLeadersTransform)
# registry.register('multifracs', MultiFracsTransform)
# registry.register('crosscor', CrossCorTransform)
# registry.register('autocor', AutoCorTransform)
# registry.register('autoencoder', AutoEncoderTransform)
# registry.register('waveletscattering', WaveletScattering)

## Data Load

Link here [ecgs_labels.npy](https://drive.google.com/file/d/1cbUKH9qGOeIZD6Mf73plMkyXpq56mwIu/view?usp=sharing)

In [7]:
DATASET = "ECG"
# DATASET = "HRV"
# DATASET = "HRV_chall2002"
DATASET = "SYNTH_DATA"
# DATASET = "SYNTH_DATA2"
# DATASET = "SYNTH_DATA3"
#DATASET = "SYNTH_DATA4"
# DATASET = "SYNTH_DATA_SAME"
# DATASET = "HRV_DATA_GOOD"

In [8]:
if DATASET == "ECG":
    ecgs_labels = np.load('ecgs_labels.npy')

    X, y = ecgs_labels[1:,:-1], ecgs_labels[1:,-1]

In [9]:
if DATASET ==  "HRV":
    X, y = np.load('hrv_signals.npy'), np.load('hrv_labels.npy')

In [10]:
if DATASET ==  "HRV_chall2002":
    hrvs_labels = np.load('hrv_data.npy')

    X, y = hrvs_labels[:,:-1], hrvs_labels[:,-1]


In [11]:
if DATASET ==  "HRV_DATA_GOOD":
    hrvs_labels = np.load('hrv_data_good.npy')

    X, y = hrvs_labels[:,:-1], hrvs_labels[:,-1]

In [12]:
if DATASET ==  "SYNTH_DATA":
    Hs = [.01,.05,.1,.3,.03,.5,.6,.7,.75,.8,.9]
    lams = np.maximum(.17 + .03 * np.random.randn(len(Hs)),0.03)
    X, y = create_synth_data(Hs=Hs,
                             lams=lams,
                             n_per_class=100,
                             length = 2048,
                             concatenate_result=False
                             )
    # data = np.load('synth_data.npy')

    # X, y = data[:,:-1], data[:,-1]

In [13]:
if DATASET ==  "SYNTH_DATA2":
    data = np.load('synth_data2.npy')

    X, y = data[:,:-1], data[:,-1]

In [14]:
if DATASET ==  "SYNTH_DATA3":
    data = np.load('synth_data3.npy')

    X, y = data[:,:-1], data[:,-1]

In [15]:
if DATASET == "SYNTH_DATA4":
    data = np.load('synth_data_0.8_0.75.npy')

    X, y = data[:,:-1], data[:,-1]

In [16]:
if DATASET == "SYNTH_DATA_SAME":
    data = np.load('synth_data_same.npy')

    X, y = data[:,:-1], data[:,-1]

In [17]:
n,p = X.shape

print(f'X.shape : {n,p}')

X.shape : (1100, 2048)


In [18]:
j2max = min(12,int(np.log2(p) - 3.3))
j2max

7

In [19]:
p_ = 65_000

In [20]:
data_transformer = DataTransform(registry,save_data=False)
for trans_names in registry.transformations.keys():
        trans_names_str = [str(name) for name in trans_names]
        trans_name_str = '+'.join(trans_names_str) if isinstance(trans_names, list) else trans_names
        kwargs = trans_names[1] if isinstance(trans_names, list) and len(trans_names) > 1 else {}
        trans_names = trans_names[0] if isinstance(trans_names, list) else trans_names
        # Apply transformation
        transformed_X = data_transformer.apply_transformation(np.random.randn((10*p_)).reshape((10,p_)), trans_names, **kwargs)
        
        print(f"Transformation: {trans_name_str}, Shape: {transformed_X.shape}")

Transformation: identity, Shape: (10, 65000)
Transformation: mean, Shape: (10, 1)
Transformation: std, Shape: (10, 1)
Transformation: fourier, Shape: (10, 65000)
Transformation: low_fourier, Shape: (10, 1092)
Transformation: low_psd, Shape: (10, 1092)
Transformation: wavedec, Shape: (10, 4063)
Transformation: dwt, Shape: (10, 32500)
Transformation: cwt, Shape: (10, 10)
Transformation: autoreg, Shape: (10, 3)
Transformation: shannon_encoding, Shape: (10, 8)
Transformation: wavelet_leaders, Shape: (10, 2)
Transformation: multifracs, Shape: (10, 3)
Transformation: newmultifracs, Shape: (10, 5)
Transformation: crosscor, Shape: (10, 10)
Transformation: autocor, Shape: (10, 26000)
Transformation: autoencoder, Shape: (10, 16)
Transformation: waveletscattering, Shape: (10, 203150)


In [21]:
# Initialize the data transformer
data_transformer = DataTransform(registry)

In [22]:
%%script false --no-raise-error

# Define the classifiers to be tested
classifiers = {
    'SVM': SVC(),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}

# Define the transformations to be tested
transformations = [
    # ['identity'],
    ['crosscor'],
    ['autocor', {'m':5000,'k':4}],
    ['fourier', {'new_dimension':40}],
    ['low_fourier'],
    ['low_psd'],
    ['cwt',{'pca_components' : 10}],
    ['wavedec'],
    ['autoreg', {'k': 3}],
    ['shannon_encoding'],
    ['wavelet_leaders'],
    ['multifracs'],
    ['multifracs', {'j1':1,'j2':12}],
    [['wavelet_leaders','shannon_encoding']],
    [['wavelet_leaders','multifracs']],
    [['fourier','multifracs',], {'new_dimension':40}],
    [['fourier','multifracs',], {'new_dimension':40}],
    [['fourier','multifracs','shannon_encoding'], {'new_dimension':40}],
    [['low_fourier','multifracs','autoreg'], {'k':3}],
    
]




# Function to evaluate a classifier using cross-validation
def evaluate_classifier_cv(classifier, X, y):
    scores = cross_val_score(classifier, X, y, cv=5)  # 5-fold cross-validation
    return np.mean(scores), np.std(scores)

# Loop over each transformation and each classifier
results = {}

for trans_names in transformations:
    # print()
    trans_names_str = [str(name) for name in trans_names]
    trans_name_str = '+'.join(trans_names_str) if isinstance(trans_names, list) else trans_names
    kwargs = trans_names[1] if isinstance(trans_names, list) and len(trans_names) > 1 else {}
    trans_names = trans_names[0] if isinstance(trans_names, list) else trans_names
    
    # Apply transformation
    transformed_X = data_transformer.apply_transformation(X, trans_names, **kwargs)
    print(f"Transformation: {trans_name_str}, Shape: {transformed_X.shape}" )
    # Standardize the data (important for some classifiers like SVM)
    scaler = StandardScaler()
    transformed_X = scaler.fit_transform(transformed_X)
    
    results[trans_name_str] = {}
    for clf_name, clf in classifiers.items():
        # Evaluate the classifier with cross-validation
        mean_accuracy, std_accuracy = evaluate_classifier_cv(clf, transformed_X, y)
        results[trans_name_str][clf_name] = (mean_accuracy, std_accuracy)
        print(f"Transformation: {trans_name_str}, Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")

    print()
# Print the results
for trans_name, clf_results in results.items():
    print()
    for clf_name, (mean_accuracy, std_accuracy) in clf_results.items():
        print(f"Transformation: {trans_name}, Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")
        

Couldn't find program: 'false'


In [23]:
%%script false --no-raise-error
import random
import itertools
import json

# Define the classifiers to be tested
classifiers = {
    'SVM': SVC(),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}

# Initialize the data transformer
data_transformer = DataTransform(registry)

# Define the transformations to be tested
transformation_names = ['crosscor','low_psd','low_fourier',['autoreg',{'k':3}],['autoreg',{'k':5}], #'multifracs',
                        ['multifracs', {'j1':1,'j2':j2max}],
                        'shannon_encoding',['autoencoder',{'fourier_transform':True}]] #list(registry.transformations.keys())




# Function to evaluate a classifier using cross-validation
def evaluate_classifier_cv(classifier, X, y):
    scores = cross_val_score(classifier, X, y, cv=5)  # 5-fold cross-validation
    return np.mean(scores), np.std(scores)

# Function to randomly combine transformations
def random_combination_transformations(transformation_list, n_combinations=10):
    # all_combinations = []
    # for r in range(1, len(transformation_names) + 1):
    #     combinations = list(itertools.combinations(transformation_names, r))
    #     all_combinations.extend(combinations)
    
    # return random.sample(all_combinations, min(n_combinations, len(all_combinations)))
    nb_transformation = len(transformation_list)
    combined_transformations = []
    for _ in range(n_combinations):
        nb_trans = np.random.randint(1,5)

        already_drawn = []
        transs = []
        for _ in range(nb_trans):
            while True:
                random_ind = np.random.randint(nb_transformation)
                if random_ind not in already_drawn:
                    already_drawn.append(random_ind)
                    break
            trans = transformation_list[random_ind]
            transs.append(trans)
            
        combined_transformations.append(transs)
    return combined_transformations
# Generate random combinations of transformations
random_transformations = random_combination_transformations(transformation_names, n_combinations=5)
print(random_transformations)
# # Example input data
# X = np.random.randn(100, 10)  # Example input data
# y = np.random.randint(0, 2, 100)  # Example labels

# Dictionary to store results
results = {}

i = 0
# Loop over each random combination of transformations and each classifier
for trans in random_transformations:
    print()
    trans_name_str = DataTransform.get_full_trans_kwargs_str(trans)
    
        
    transformed_X = data_transformer.apply_transformation(X, trans)

    # trans_name_str = '+'.join(trans_comb)
    # transformed_X = data_transformer.apply_transformation(X, trans_comb)
    
    # Standardize the data (important for some classifiers like SVM)
    scaler = StandardScaler()
    transformed_X = scaler.fit_transform(transformed_X)
    
    results[trans_name_str] = {}
    i += 1
    print(f"Transformations n°{i}: {trans_name_str} {transformed_X.shape}")
    for clf_name, clf in classifiers.items():
        # Evaluate the classifier with cross-validation
        mean_accuracy, std_accuracy = evaluate_classifier_cv(clf, transformed_X, y)
        results[trans_name_str][clf_name] = {'mean_accuracy': mean_accuracy, 'std_accuracy': std_accuracy}
        print(f"Classifier: {clf_name}, Mean Accuracy: {mean_accuracy:.3f}, Std Dev: {std_accuracy:.3f}")

# Save results to a JSON file
with open('transformation_results0.json', 'w') as f:
    json.dump(results, f, indent=4)


Couldn't find program: 'false'


In [24]:
# # Save results to a JSON file
# with open('transformation_results_hrv_chall2002.json', 'w') as f:
#     json.dump(results, f, indent=4)

In [25]:
def evaluate_transformations(X, y, transformations, classifiers):
    dt = DataTransform(registry)
    results = []

    for trans in transformations:
        trans_name_str = DataTransform.get_full_trans_kwargs_str(trans)
        trans_X = dt.apply_transformation(X, trans)

        result = {'Transformation (shape)': trans_name_str}
        result["shape"] = trans_X.shape[-1]
        result['nb_trans'] = len(trans)

        scaler = StandardScaler()
        trans_X = scaler.fit_transform(trans_X) 
        print(f"Transformation: {trans_name_str}, Shape: {trans_X.shape[-1]} ")
        for clf_name, clf in classifiers.items():
            kf = KFold(n_splits=5, shuffle=True, random_state=42)
            scores = cross_val_score(clf, trans_X, y, cv=kf, scoring='accuracy')
            result[f'{clf_name} accuracy'] = np.mean(scores)
            result[f'{clf_name} std'] = np.std(scores)
            print(f"Classifier: {clf_name}, "
                  f"Mean Accuracy: {np.mean(scores):.3f}, Std Accuracy: {np.std(scores):.3f}")
        
        results.append(result)

    return pd.DataFrame(results)

In [26]:
X.shape

(1100, 2048)

In [27]:
# Define the classifiers
classifiers = {
    'SVM': SVC(kernel='linear'),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier()
}
transformations = [['waveletscattering',{'Q':1}],['waveletscattering']]
results_df = evaluate_transformations(X, y, transformations, classifiers)
# print(results_df)

# Save results to a CSV file
# results_df.to_csv(f'results/test_wst_transformation_results2.csv', index=False)

Transformation: waveletscattering_Q=1, Shape: 1664 
Classifier: SVM, Mean Accuracy: 0.968, Std Accuracy: 0.010
Classifier: DecisionTree, Mean Accuracy: 0.780, Std Accuracy: 0.040
Classifier: RandomForest, Mean Accuracy: 0.932, Std Accuracy: 0.024
Transformation: waveletscattering, Shape: 6400 
Classifier: SVM, Mean Accuracy: 0.985, Std Accuracy: 0.007
Classifier: DecisionTree, Mean Accuracy: 0.710, Std Accuracy: 0.034
Classifier: RandomForest, Mean Accuracy: 0.936, Std Accuracy: 0.014


In [28]:
# # Define the classifiers
# classifiers = {
#     'SVM': SVC(kernel='linear'),
#     'DecisionTree': DecisionTreeClassifier(),
#     'RandomForest': RandomForestClassifier()
# }
# Q = 3
# transformations = [ ['waveletscattering',{'J':J,'Q':Q}] for J in range(0,12,4) ]
# # transformations = [['waveletscattering',{'Q':1}],['waveletscattering',{'J':3,'Q':1}],['waveletscattering',{'J':7,'Q':1}],['waveletscattering',{'pca_components':200,'Q':1}]]
# results_df = evaluate_transformations(X, y, transformations, classifiers)
# # print(results_df)

In [29]:
# # Define the classifiers
# classifiers = {
#     'SVM': SVC(kernel='linear'),
#     'DecisionTree': DecisionTreeClassifier(),
#     'RandomForest': RandomForestClassifier()
# }
# Q = 3
# transformations = [ ['waveletscattering',{'J':J,'Q':Q,'pca_components':200}] for J in range(10) ]
# # transformations = [['waveletscattering',{'Q':1}],['waveletscattering',{'J':3,'Q':1}],['waveletscattering',{'J':7,'Q':1}],['waveletscattering',{'pca_components':200,'Q':1}]]
# results_df = evaluate_transformations(X, y, transformations, classifiers)

In [30]:

# print("Transformation: waveletscattering, Shape: 65536\n\
# Classifier: SVM, Mean Accuracy: 0.995, Std Accuracy: 0.010\n\
# Classifier: DecisionTree, Mean Accuracy: 0.810, Std Accuracy: 0.085\n\
# Classifier: RandomForest, Mean Accuracy: 0.995, Std Accuracy: 0.010")

In [31]:
def random_combination_transformations(transformation_names, k = 2):
    all_combinations = []
    for r in range(1, k + 1):
        combinations = list(itertools.combinations(transformation_names, r))
        all_combinations.extend(combinations)
    
    return [list(x) for x in all_combinations] 


In [32]:
# results_df.to_csv(f'results/new__results_{DATASET}.csv', index=False)

In [33]:
# %%script false --no-raise-error


# Define the classifiers
classifiers = {
    'SVM': SVC(kernel='linear'),
    'DecisionTree': DecisionTreeClassifier(),
    'RandomForest': RandomForestClassifier(),
    # 'MLPClassifier': MLPClassifier(),
}

# # Define the data transformations
# transformations = [
#     'identity',
#     ['fourier', {'new_dimension': 100}],
#     ['wavedec', {'level': 4, 'wavelet': 'db1'}],
#     ['autoreg', {'k': 1}]
# ]
# Define the transformations to be tested
transformations = ['crosscor','std',
                   'identity',
                   "waveletscattering",
                    #'low_psd','low_fourier',
                    #['autoreg',{'k':1}], ['autoreg',{'k':2}],['autoreg',{'k':3}],
                    ['autoreg',{'k':5}],
                    ['wavelet_leaders', {'j1':3,'j2':j2max}],
                    ['multifracs', {'j1':4,'j2':j2max}], #'multifracs',
                    ['multifracs', {'j1':1,'j2':j2max}],
                    ['newmultifracs', {'j1':2,'j2':j2max}],
                    'shannon_encoding',['autoencoder',{'fourier_transform':True}]]

if DATASET == "ECG":
    transformations.extend(["low_psd","low_fourier"])

t = random_combination_transformations(transformations,k=4)
transformations = t



results_df = evaluate_transformations(X, y, transformations, classifiers)
# print(results_df)

# Save results to a CSV file
results_df.to_csv(f'results/new__results_{DATASET}.csv', index=False)


Transformation: crosscor, Shape: 10 
Classifier: SVM, Mean Accuracy: 0.088, Std Accuracy: 0.010
Classifier: DecisionTree, Mean Accuracy: 0.125, Std Accuracy: 0.016
Classifier: RandomForest, Mean Accuracy: 0.138, Std Accuracy: 0.025
Transformation: std, Shape: 1 
Classifier: SVM, Mean Accuracy: 0.242, Std Accuracy: 0.013
Classifier: DecisionTree, Mean Accuracy: 0.261, Std Accuracy: 0.010
Classifier: RandomForest, Mean Accuracy: 0.261, Std Accuracy: 0.010
Transformation: identity, Shape: 2048 
Classifier: SVM, Mean Accuracy: 0.144, Std Accuracy: 0.020
Classifier: DecisionTree, Mean Accuracy: 0.319, Std Accuracy: 0.037
Classifier: RandomForest, Mean Accuracy: 0.350, Std Accuracy: 0.036
Transformation: waveletscattering, Shape: 6400 
Classifier: SVM, Mean Accuracy: 0.985, Std Accuracy: 0.007
Classifier: DecisionTree, Mean Accuracy: 0.742, Std Accuracy: 0.026
Classifier: RandomForest, Mean Accuracy: 0.926, Std Accuracy: 0.022
Transformation: autoreg_k=5, Shape: 5 
Classifier: SVM, Mean Accu

In [34]:
DATASET

'SYNTH_DATA'

In [35]:
# results_df

In [36]:
def get_transformation_names(transformations):
    # san_transformations = DataTransform.sanitize_transformations(transformations)
    transformation_names = list()
    for transformation in transformations:
        # print(transformation)
        # input = transformation
        # print(len(input) == 2 and isinstance(input[0], str) and isinstance(input[1], dict))
        s_transformation = DataTransform.sanitize_transformations(transformation)
        names = list()
        for trans in transformation:
            trans_name, kwargs = DataTransform.handle_trans_kwargs(trans)
            names.append(DataTransform.get_trans_kwargs_str(trans_name, kwargs)) 
        transformation_names.append(names)
    return transformation_names

In [37]:
# DataTransform.sanitize_transformations(transformations)

In [38]:
transformations = ['crosscor','std',
                    #'low_psd','low_fourier',
                    #['autoreg',{'k':1}], ['autoreg',{'k':2}],['autoreg',{'k':3}],
                    ['autoreg',{'k':5}], #'multifracs',
                    ['multifracs', {'j1':1,'j2':j2max}],
                    'shannon_encoding',['autoencoder',{'fourier_transform':True}]]

if DATASET == "ECG":
    transformations.extend(["low_psd","low_fourier"])

transformations = random_combination_transformations(transformations,k=3)

In [39]:
def evaluate_transformations_and_get_aprior(X, y, transformations, classifier1, classifier2, seed = 42):
    dt = DataTransform(registry)
    results = []
    transformation_names = get_transformation_names(transformations)

    clf1better = list()
    clf2better = list()
    apr_list = list()
    kf = KFold(n_splits=5, shuffle=True, random_state=seed)
    for trans, trans_names in zip(transformations,transformation_names):
        trans_name_str = DataTransform.get_full_trans_kwargs_str(trans)
        trans_X = dt.apply_transformation(X, trans)

        result = {'Transformation (shape)': trans_name_str}
        result["shape"] = trans_X.shape[-1]
        result['nb_trans'] = len(trans)

        scaler = StandardScaler()
        trans_X = scaler.fit_transform(trans_X) 
        print(f"Transformation: {trans_name_str}, Shape: {trans_X.shape[-1]} ")

        scores = cross_val_score(classifier1, trans_X, y, cv=kf, scoring='accuracy')
        clf1_mean = np.mean(scores)
        clf1_std = np.std(scores)
        result[f'{classifier1.__class__.__name__} accuracy'] = clf1_mean
        result[f'{classifier1.__class__.__name__} std'] = clf1_std
        print(f"Classifier: {classifier1.__class__.__name__}, "
                f"Mean Accuracy: {np.mean(scores):.3f}, Std Accuracy: {np.std(scores):.3f}")
        scores = cross_val_score(classifier2, trans_X, y, cv=kf, scoring='accuracy')
        clf2_mean = np.mean(scores)
        clf2_std = np.std(scores)
        result[f'{classifier2.__class__.__name__} accuracy'] = clf2_mean
        result[f'{classifier2.__class__.__name__} std'] = clf2_std
        print(f"Classifier: {classifier2.__class__.__name__}, "
                f"Mean Accuracy: {np.mean(scores):.3f}, Std Accuracy: {np.std(scores):.3f}")
        
        if clf1_mean - 0.02  > clf2_mean: # clf1_mean > clf2_mean: #clf1_mean - clf1_std > clf2_mean + clf2_std: # + clf2_std:
            clf1better.extend(trans_names)
            apr_list.append(trans_names + ['clf1'])
        elif clf2_mean - 0.02 > clf1_mean: # clf2_mean > clf1_mean:#clf2_mean - clf2_std > clf1_mean + clf1_std: # + clf1_std:
            clf2better.extend(trans_names)
            apr_list.append(trans_names + ['clf2'])
        results.append(result)

    return pd.DataFrame(results), pd.Series(clf1better), pd.Series(clf2better), apr_list

In [40]:
res, apr1, apr2, apr_list = evaluate_transformations_and_get_aprior(X,y,transformations,SVC(kernel='linear'),RandomForestClassifier())

Transformation: crosscor, Shape: 10 
Classifier: SVC, Mean Accuracy: 0.088, Std Accuracy: 0.010
Classifier: RandomForestClassifier, Mean Accuracy: 0.121, Std Accuracy: 0.027
Transformation: std, Shape: 1 
Classifier: SVC, Mean Accuracy: 0.242, Std Accuracy: 0.013
Classifier: RandomForestClassifier, Mean Accuracy: 0.261, Std Accuracy: 0.010
Transformation: autoreg_k=5, Shape: 5 
Classifier: SVC, Mean Accuracy: 0.854, Std Accuracy: 0.013
Classifier: RandomForestClassifier, Mean Accuracy: 0.847, Std Accuracy: 0.020
Transformation: multifracs_j1=1_j2=7, Shape: 3 
Classifier: SVC, Mean Accuracy: 0.633, Std Accuracy: 0.032
Classifier: RandomForestClassifier, Mean Accuracy: 0.586, Std Accuracy: 0.018
Transformation: shannon_encoding, Shape: 8 
Classifier: SVC, Mean Accuracy: 0.506, Std Accuracy: 0.029
Classifier: RandomForestClassifier, Mean Accuracy: 0.994, Std Accuracy: 0.002
Transformation: autoencoder_fourier_transform=True, Shape: 16 
Classifier: SVC, Mean Accuracy: 0.142, Std Accuracy: 

In [41]:
apr1.value_counts()

multifracs_j1=1_j2=7                  7
autoencoder_fourier_transform=True    4
crosscor                              3
std                                   3
autoreg_k=5                           1
Name: count, dtype: int64

In [42]:
apr2.value_counts()

shannon_encoding                      16
crosscor                              11
autoencoder_fourier_transform=True     9
std                                    9
autoreg_k=5                            7
multifracs_j1=1_j2=7                   6
Name: count, dtype: int64

In [43]:
res, apr1, apr2, apr_list = evaluate_transformations_and_get_aprior(X,y,transformations,SVC(kernel='linear'),DecisionTreeClassifier())

Transformation: crosscor, Shape: 10 
Classifier: SVC, Mean Accuracy: 0.088, Std Accuracy: 0.010
Classifier: DecisionTreeClassifier, Mean Accuracy: 0.129, Std Accuracy: 0.018
Transformation: std, Shape: 1 
Classifier: SVC, Mean Accuracy: 0.242, Std Accuracy: 0.013
Classifier: DecisionTreeClassifier, Mean Accuracy: 0.261, Std Accuracy: 0.010
Transformation: autoreg_k=5, Shape: 5 
Classifier: SVC, Mean Accuracy: 0.854, Std Accuracy: 0.013
Classifier: DecisionTreeClassifier, Mean Accuracy: 0.815, Std Accuracy: 0.026
Transformation: multifracs_j1=1_j2=7, Shape: 3 
Classifier: SVC, Mean Accuracy: 0.633, Std Accuracy: 0.032
Classifier: DecisionTreeClassifier, Mean Accuracy: 0.558, Std Accuracy: 0.014
Transformation: shannon_encoding, Shape: 8 
Classifier: SVC, Mean Accuracy: 0.506, Std Accuracy: 0.029
Classifier: DecisionTreeClassifier, Mean Accuracy: 0.979, Std Accuracy: 0.006
Transformation: autoencoder_fourier_transform=True, Shape: 16 
Classifier: SVC, Mean Accuracy: 0.154, Std Accuracy: 

In [44]:
apr1.value_counts()

multifracs_j1=1_j2=7                  10
autoreg_k=5                            8
autoencoder_fourier_transform=True     7
crosscor                               5
std                                    5
Name: count, dtype: int64

In [45]:
apr2.value_counts()

shannon_encoding                      12
crosscor                               9
autoencoder_fourier_transform=True     8
std                                    7
multifracs_j1=1_j2=7                   4
autoreg_k=5                            1
Name: count, dtype: int64

In [46]:
result_list = list(apriori(apr_list))

In [47]:
for result in result_list:
    items = [x for x in result.items]
    
    for ordered_stat in result.ordered_statistics:
        if  (list(ordered_stat.items_add) == ['clf1'] or list(ordered_stat.items_add) == ['clf2']) and list(ordered_stat.items_base): 
            print(f"Rule: {items}")
            print(f"Support: {result.support} || {result.support * len(apr_list):.0f} examples")
            print(f"{list(ordered_stat.items_base)} ==> {list(ordered_stat.items_add)}")
            print(f"Confidence: {ordered_stat.confidence}")
            print(f"Lift: {ordered_stat.lift}")
            print("-----")

Rule: ['autoencoder_fourier_transform=True', 'clf1']
Support: 0.21212121212121213 || 7 examples
['autoencoder_fourier_transform=True'] ==> ['clf1']
Confidence: 0.4666666666666667
Lift: 1.0266666666666666
-----
Rule: ['autoencoder_fourier_transform=True', 'clf2']
Support: 0.24242424242424243 || 8 examples
['autoencoder_fourier_transform=True'] ==> ['clf2']
Confidence: 0.5333333333333333
Lift: 0.9777777777777779
-----
Rule: ['autoreg_k=5', 'clf1']
Support: 0.24242424242424243 || 8 examples
['autoreg_k=5'] ==> ['clf1']
Confidence: 0.888888888888889
Lift: 1.9555555555555557
-----
Rule: ['crosscor', 'clf1']
Support: 0.15151515151515152 || 5 examples
['crosscor'] ==> ['clf1']
Confidence: 0.35714285714285715
Lift: 0.7857142857142858
-----
Rule: ['multifracs_j1=1_j2=7', 'clf1']
Support: 0.30303030303030304 || 10 examples
['multifracs_j1=1_j2=7'] ==> ['clf1']
Confidence: 0.7142857142857143
Lift: 1.5714285714285716
-----
Rule: ['std', 'clf1']
Support: 0.15151515151515152 || 5 examples
['std'] ==

In [48]:
SVC().__class__.__name__

'SVC'