In [None]:
import sys
from pathlib import Path

# Add the root project folder to the python path in order to use the packages
path_root = Path( '/project_ghent/HADSP/hadsp/')
sys.path.append(str(path_root))

In [None]:
import numpy as np
from scipy import sparse
from tqdm import tqdm
from importlib import reload

# SEED
SEED = 49387

from matplotlib import pyplot as plt
from seaborn import heatmap, color_palette

# Datasets loading

Lots of different on availabale : https://towardsdatascience.com/a-data-lakes-worth-of-audio-datasets-b45b88cd4ad

Classification: 
https://arxiv.org/abs/1803.07870

https://github.com/FilippoMB/Time-series-classification-and-clustering-with-Reservoir-Computing

Multivariate:
https://www.timeseriesclassification.com/dataset.php

## Torchaudio

https://pytorch.org/audio/stable/datasets.html


## Prediction ahead

Datasets available :

* MackeyGlass
* Lorenz

In [None]:
from datasets.load_datasets import load_dataset_prediction
is_instances_classification = False
dataset_name = "MackeyGlass"
step_ahead=5

is_multivariate, sampling_rate, X_train, X_test, Y_train, Y_test = load_dataset_prediction(dataset_name, step_ahead, visualize=True)


## Classification

Datasets available :

* FSDD
* HAART
* JapaneseVowels

## Formating

### Multivariate Naming

In [None]:
if is_multivariate:
    X_train_band, X_test_band = X_train, X_test
    del X_train, X_test

### Check data formating

In [None]:
if is_instances_classification:
    common_index = 1
else:
    common_index = 0

if is_multivariate:
    if is_instances_classification:
        print("Common index for multivariate classification should be 1")
        print("\nCheck it ! \nFirst array ", X_train_band[0].shape, " and second array", X_train_band[2].shape)
        common_size = X_train_band[0].shape[common_index]
    else:
        print("Common index for multivariate prediction should be 0")
        common_size = X_train_band.shape[common_index]


# Reservoir parameters

## Activation function

In [None]:
from reservoir.activation_functions import tanh, heaviside, sigmoid

# the activation function choosen for the rest of the experiment
# activation_function = lambda x : sigmoid(2*(x-0.5))tanh(x)
activation_function = lambda x : tanh(x)

plt.plot(np.linspace(0, 1.1, 100), activation_function(np.linspace(0, 1.1, 100)))
plt.grid()

## Reservoir size

In [None]:
RESERVOIR_SIZE = 500

# Cross validation

In [None]:
from sklearn.model_selection import train_test_split, GroupShuffleSplit

# Split
val_size=0.2
if is_multivariate:
    X_train_band, X_val_band, Y_train, Y_val = train_test_split(X_train_band, Y_train, test_size=val_size, shuffle=False, random_state=SEED)
else:
    X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=val_size, shuffle=False, random_state=SEED)


# Preprocessing

Spectrograms_vs_Cochleagrams : https://www.researchgate.net/publication/340510607_Speech_recognition_using_very_deep_neural_networks_Spectrograms_vs_Cochleagrams

Attention ! For multivariate shape should be : (nb_of_timeseries, nb_of_timesteps)

## Multivariate generation (if not multivariate) and train_validation split

In [None]:
from datasets.multivariate_generation import generate_multivariate_dataset, extract_peak_frequencies

freq_train_data = X_train_band if is_multivariate else X_train
flat_train_data = np.concatenate(freq_train_data, axis=0) if is_instances_classification else freq_train_data
filtered_peak_freqs = extract_peak_frequencies(flat_train_data, sampling_rate, threshold=1e-5, nperseg=1024, visualize=False)

if not is_multivariate:
    common_size = len(filtered_peak_freqs)
    X_train_band = generate_multivariate_dataset(
        filtered_peak_freqs, X_train, sampling_rate, is_instances_classification, nb_jobs=-1
    )
    X_val_band = generate_multivariate_dataset(
        filtered_peak_freqs, X_val, sampling_rate, is_instances_classification, nb_jobs=-1
    )
    X_test_band = generate_multivariate_dataset(
        filtered_peak_freqs, X_test, sampling_rate, is_instances_classification, nb_jobs=-1
    )


## Standardizing the amplitudes

In [None]:
from sklearn.preprocessing import MinMaxScaler
from datasets.preprocessing import scale_data

scaler_multi = MinMaxScaler(feature_range=(0, 1))
X_train_band, X_val_band, X_test_band = scale_data(X_train_band, X_val_band, X_test_band, scaler_multi, is_instances_classification)
            
if not is_multivariate:
    scaler_x_uni = MinMaxScaler(feature_range=(0, 1))
    X_train, X_val, X_test = scale_data(X_train, X_val, X_test, scaler_multi, is_instances_classification)       


## Formating

### noise level

In [None]:
# Define noise parameter
noise_std = 0.001

### common size

**common_size** : the number of different dimensions in the input data

**K** : the number of neurons that will receive a particular time serie as input 

In [None]:
import math 

# We want the size of the reservoir to be at least RESERVOIR_SIZE
K = math.ceil(RESERVOIR_SIZE / common_size)
n = common_size * K
print("Dimension of our reservoir :", n)
print("Copy of each time serie :", K)

## noizing and duplication

In [None]:
from datasets.preprocessing import add_noise, duplicate_data

# PRETRAIN NOISE
# UNI
if not is_multivariate:
    X_pretrain_uni = X_train.flatten()
    X_pretrain_noisy = (add_noise(instance, noise_std)).flatten()

# MULTI
X_pretrain_band = X_train_band
X_pretrain_band_noisy = [add_noise(instance, noise_std) for instance in X_pretrain_band]


#Train/Val/Test
if is_instances_classification:
    # NOISE
    # UNI
    if not is_multivariate:
        X_train_noisy = [add_noise(instance, noise_std) for instance in tqdm(X_train, desc="TRAIN")]
        X_val_noisy = [add_noise(instance, noise_std) for instance in tqdm(X_val, desc="VAL")]
        X_test_noisy = [add_noise(instance, noise_std) for instance in tqdm(X_test, desc="TEST")]
        
    # MULTI
    X_train_band_noisy = [add_noise(instance, noise_std) for instance in tqdm(X_train_band, desc="TRAIN")]
    X_val_band_noisy = [add_noise(instance, noise_std) for instance in tqdm(X_val_band, desc="VAL")]
    X_test_band_noisy = [add_noise(instance, noise_std) for instance in tqdm(X_test_band, desc="TEST")]

    # DUPLICATION
    X_pretrain_multi = duplicate_data(X_train_band, K)
    X_pretrain_multi_noisy = duplicate_data(X_pretrain_band_noisy, K)

    X_train_band_duplicated = duplicate_data(X_train_band, K)
    X_train_band_noisy_duplicated = duplicate_data(X_train_band_noisy, K)
    X_val_band_duplicated = duplicate_data(X_val_band, K)
    X_val_band_noisy_duplicated = duplicate_data(X_val_band_noisy, K)
    X_test_band_duplicated = duplicate_data(X_test_band, K)
    X_test_band_noisy_duplicated = duplicate_data(X_test_band_noisy, K)


else:  #if prediction
    # NOISE
    # UNI
    if not is_multivariate:
        X_train_noisy = add_noise(X_train, noise_std)
        X_test_noisy = add_noise(X_test, noise_std)
        X_val_noisy = add_noise(X_val, noise_std)

    # MULTI
    X_train_band_noisy = add_noise(X_train_band, noise_std)
    X_val_band_noisy = add_noise(X_val_band, noise_std)
    X_test_band_noisy = add_noise(X_test_band, noise_std)

    # DUPLICATION
    X_pretrain_multi = np.repeat(X_pretrain_band, K, axis=1)
    X_pretrain_multi_noisy = np.repeat(X_pretrain_band_noisy, K, axis=1)

    X_train_band_duplicated = np.repeat(np.array(X_train_band), K, axis=1)
    X_val_band_duplicated = np.repeat(np.array(X_val_band), K, axis=1)
    X_test_band_duplicated = np.repeat(np.array(X_test_band), K, axis=1)
    X_train_band_noisy_duplicated = np.repeat(np.array(X_train_band_noisy), K, axis=1)
    X_val_band_noisy_duplicated = np.repeat(np.array(X_val_band_noisy), K, axis=1)    
    X_test_band_noisy_duplicated = np.repeat(np.array(X_test_band_noisy), K, axis=1)

del X_train_band, X_val_band, X_test_band, X_train_band_noisy, X_val_band_noisy, X_test_band_noisy

# Hyperparameter search

## Generated matrix

In [None]:
# Min window size to get all the dynamics ? 
min_window_size = sampling_rate/np.max(np.hstack(filtered_peak_freqs))
max_window_size = sampling_rate/np.min(np.hstack(filtered_peak_freqs))

print(min_window_size)
print(max_window_size)

In [None]:
from performances.esn_model_evaluation import init_and_train_model_for_classification, predict_model_for_classification, compute_score
from joblib import Parallel, delayed
from reservoir.reservoir import init_matrices
from connexion_generation.bounded_hadsp import run_hadsp_algorithm
from performances.esn_model_evaluation import init_and_train_model_for_prediction
from connexion_generation.desp import run_desp_algorithm
import connexion_generation.desp
reload(connexion_generation.desp)

N_JOBS = -1

# TO CALCULATE SCORE FOR PREDICTION
START_STEP = 0
END_STEP = 500
slice_range = slice(START_STEP, END_STEP)

function_name = "hadsp" # "desp" ou "hadsp"
data_type = "noisy" # "normal" ou "noisy"

def objective(trial):
    # Suggest values for the parameters you want to optimize
    # COMMON
    input_scaling = trial.suggest_float('input_scaling', 0.01, 0.2, step=0.01)
    bias_scaling = trial.suggest_float('bias_scaling', 0, 0.2, step=0.01)
    leaky_rate = trial.suggest_float('leaky_rate', 1, 1)
    connectivity = trial.suggest_float('connectivity', 0, 0)
    input_connectivity = trial.suggest_float('input_connectivity', 1, 1)
    network_size = trial.suggest_float('network_size', RESERVOIR_SIZE, RESERVOIR_SIZE)
    weight_increment = trial.suggest_float('weight_increment', 0.01, 0.5, step=0.01)

    ridge = trial.suggest_int('ridge', -10, 1)
    RIDGE_COEF = 10**ridge

    # HADSP
    if function_name == "hadsp":
        target_rate = trial.suggest_float('target_rate', 0.5, 1, step=0.01)
        rate_spread = trial.suggest_float('rate_spread', 0.01, 0.4, step=0.01)
        TIME_INCREMENT = int(min_window_size+1) # int(min_window_size+1) or int(max_window_size)
        MAX_TIME_INCREMENT = int(max_window_size) #int(max_window_size) or None or TIME_INCREMENT
    # DESP
    else:
        min_variance = trial.suggest_float('min_variance', 0.001, 0.01, step=0.001)
        variance_window = trial.suggest_float('variance_window', 0.01, 0.02, step=0.005)
        max_variance = min_variance + variance_window
        TIME_INCREMENT = 100 # int(min_window_size+1) or int(max_window_size)
        MAX_TIME_INCREMENT = TIME_INCREMENT #int(max_window_size) or None or TIME_INCREMENT


    total_score = 0
    average_nb = 3
    for _ in range(average_nb):  # Repeat the process three times
        # CROSS-VALIDATION

        # PREPROCESSING
        pretrain_data = X_pretrain_multi
        train_data = X_train_band_duplicated # X_train_band_noisy_duplicated or X_train_band_duplicated
        val_data = X_val_band_noisy_duplicated if data_type == "noisy" else X_val_band_duplicated

        # UNSUPERVISED PRETRAINING 
        def initialise_and_train(input_scaling, n, input_connectivity, connectivity, bias_scaling, training_set):
            Win, W, bias = init_matrices(n, input_connectivity, connectivity)
            bias *= bias_scaling
            Win *= input_scaling
    
            if function_name == "hadsp":
                W, state_history = run_hadsp_algorithm(W, Win, bias, leaky_rate, activation_function, training_set, TIME_INCREMENT, weight_increment,
                                        target_rate, rate_spread, max_increment=MAX_TIME_INCREMENT, mi_based=False, instances=is_instances_classification, common_index=common_index, visualize=False)
            elif function_name == "desp":
                W, state_history, _ = run_desp_algorithm(W, Win, bias, leaky_rate, activation_function, training_set, TIME_INCREMENT, weight_increment,
                            min_variance, max_variance, max_increment=MAX_TIME_INCREMENT, mi_based=True, instances=is_instances_classification, common_index=common_index, n_jobs = 1, visualize=False)
            else:
                raise ValueError(f"Invalid function: {function}")
            
            return Win, W, bias
        Win, W, bias= initialise_and_train(input_scaling, n, input_connectivity, connectivity, bias_scaling, pretrain_data)

        
        # EVALUATION
        if is_instances_classification:
            reservoir, readout = init_and_train_model_for_classification(W, np.diag(Win.A.T[0]), bias, leaky_rate, activation_function, train_data, Y_train, N_JOBS, RIDGE_COEF, mode="sequence-to-vector")
            
            Y_pred = predict_model_for_classification(reservoir, readout, val_data, N_JOBS)
            score = compute_score(Y_pred, Y_val, is_instances_classification)
        else:
            esn = init_and_train_model_for_prediction(W, np.diag(Win.A.T[0]), bias, leaky_rate, activation_function, train_data, Y_train, RIDGE_COEF)
            
            Y_pred =  esn.run(val_data, reset=False)
            score = compute_score(Y_pred, Y_val, is_instances_classification)

        total_score += score

    average_score = total_score / average_nb  # Average the score

    return average_score


In [None]:
import optuna
from optuna.samplers import TPESampler
import re

def camel_to_snake(name):
    str1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', str1).lower()

storage = optuna.storages.RDBStorage(
    url="sqlite:///optuna_" + camel_to_snake(dataset_name) + "_db.â‰ˆ",
    engine_kwargs={"pool_size": 20, "connect_args": {"timeout": 10}},
)
study_name = function_name + "_" + dataset_name + "_" + data_type
direction = "maximize" if is_instances_classification else "minimize"
sampler = TPESampler()

def optimize_study(n_trials):
    study = optuna.create_study(storage=storage, sampler=sampler, study_name=study_name, direction=direction, load_if_exists=True)
    study.optimize(objective, n_trials=n_trials)

N_TRIALS = 400
n_jobs = 10
trials_per_process = N_TRIALS // n_jobs

# Use joblib to parallelize the optimization
Parallel(n_jobs=n_jobs)(
    delayed(optimize_study)(trials_per_process) for _ in range(n_jobs)
)


In [None]:
test_data_multi = X_test_band_noisy_duplicated if data_type == "noisy" else X_test_band_duplicated

## Random matrix

In [None]:
from performances.esn_model_evaluation import init_and_train_model_for_classification, predict_model_for_classification, compute_score
from joblib import Parallel, delayed
from reservoir.reservoir import init_matrices
from connexion_generation.bounded_hadsp import run_hadsp_algorithm
from connexion_generation.utility import TwoDimArrayWrapper
from performances.esn_model_evaluation import init_and_train_model_for_prediction

N_JOBS = -1

def objective(trial):
    # Suggest values for the parameters you want to optimize
    input_scaling = trial.suggest_float('input_scaling', 0.01, 1.0, step=0.01)
    bias_scaling = trial.suggest_float('bias_scaling', 0, 1, step=0.05)
    leaky_rate = trial.suggest_float('leaky_rate', 1, 1)
    connectivity = trial.suggest_float('connectivity', 0, 1)
    input_connectivity = trial.suggest_float('input_connectivity', 1, 1)
    network_size = trial.suggest_float('network_size', RESERVOIR_SIZE, RESERVOIR_SIZE)
    sr = trial.suggest_float('spectral_radius', 0.4, 1.6, step=0.01)
    ridge = trial.suggest_int('ridge', -10, 1)
    RIDGE_COEF = 10**ridge


    pretrain_data_multi = X_pretrain_multi
    train_data_multi = X_train_band_duplicated # X_train_band_noisy_duplicated or X_train_band_duplicated
    test_data_multi = X_test_band_noisy_duplicated if data_type == "noisy" else X_test_band_duplicated

    
    total_score = 0
    average_nb=3
    for _ in range(average_nb):  # Repeat the process three times
        
        # random + multi
        Win_random_multi, W_random_multi, bias_random_multi =  init_matrices(n, 1, connectivity, sr)
        bias_random_multi= bias_random_multi*bias_scaling
        Win_random_multi= Win_random_multi*input_scaling    

        if is_instances_classification:
            reservoir_random_multi, readout_random_multi = init_and_train_model_for_classification(W_random_multi, np.diag(Win_random_multi.A.T[0]), bias_random_multi, leaky_rate, activation_function, train_data_multi, Y_train, N_JOBS, RIDGE_COEF, mode="sequence-to-vector")
            Y_pred = predict_model_for_classification(reservoir_random_multi, readout_random_multi, test_data_multi, N_JOBS)
            score = compute_score(Y_pred, Y_test, is_instances_classification)
        else:
            esn_random_multi = init_and_train_model_for_prediction(W_random_multi, np.diag(Win_random_multi.A.T[0]), bias_random_multi, leaky_rate, activation_function, train_data_multi, Y_train, RIDGE_COEF)
            Y_pred =  esn_random_multi.run(test_data_multi, reset=False)
        
            score = compute_score(Y_pred, Y_test, is_instances_classification)

        total_score += score
        
    average_score = total_score / average_nb  # Average the score

    
    return average_score


In [None]:
import optuna
from optuna.samplers import TPESampler
import re

def camel_to_snake(name):
    str1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', str1).lower()

storage = optuna.storages.RDBStorage(
    url="sqlite:///optuna_" + camel_to_snake(dataset_name) + "_db.sqlite3",
    engine_kwargs={"pool_size": 20, "connect_args": {"timeout": 10}},
)
study_name = "random_" + dataset_name + "_" + data_type
direction = "maximize" if is_instances_classification else "minimize"
sampler = TPESampler()


def optimize_study(n_trials):
    study = optuna.create_study(storage, sampler, study_name=study_name, direction=direction, load_if_exists=True)
    study.optimize(objective, n_trials=n_trials)


N_TRIALS = 400
n_jobs = 10
trials_per_process = N_TRIALS // n_jobs

# Use joblib to parallelize the optimization
Parallel(n_jobs=n_jobs)(
    delayed(optimize_study)(trials_per_process) for _ in range(n_jobs)
)


# Results



## Mackey Glass
### Ridge parameter
| Dataset     | Algorithm | test   | Curve shape | Best value |
|-------------|-----------|--------|-------------|------------|
| MackeyGlass | DESP      | normal | no extremum | 10         |
| MackeyGlass | HADSP     | normal | no extremum | 9,10       |
| MackeyGlass | random    | normal | no extremum | 9,10       |
| MackeyGlass | DESP      | noisy  | bell        | 7          |
| MackeyGlass | HADSP     | noisy  | bell        | 6          |
| MackeyGlass | random    | noisy  | no extremum | 10         |

### Results, no validation/test

|           |  HADSP |  DESP  | random |
|-----------|--------|--------|--------|
|  normal   | 0.0399 | 0.0379 | 0.0446 |
|  noisy    | 0.119  | 0.109  | 0.104  | 


## Japanese vowels

### Ridge parameter

| Dataset        | Algorithm | test   | Curve shape | Best value |
|----------------|-----------|--------|-------------|------------|
| JapaneseVowels | DESP      | normal | bell        | 2          |
| JapaneseVowels | HADSP     | normal | bell        | 4          |
| JapaneseVowels | random    | normal | bell        | 4          |
| JapaneseVowels | DESP      | noisy  | bell        | 2          |
| JapaneseVowels | HADSP     | noisy  | bell        | 5          |
| JapaneseVowels | random    | noisy  | bell        | 5          |

### Results, no validation/test

|           |  HADSP |  DESP  | random |
|-----------|--------|--------|--------|
|  normal   | 0.641  | 0.649  | 0.612  |
|  noisy    | 0.621  | 0.623  | 0.610  | 