# 0. Optuna

In [None]:
import optuna
import numpy as np
import scipy.io as sio
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.preprocessing   import StandardScaler
from sklearn.linear_model    import LogisticRegression

# ------------------------------------------------------------------
# Load the data & prepare it
# --------------------------------------------------------- ---------
readdata = sio.loadmat('./dat/simulated_neural_data_for_Optuna.mat')
data     = readdata['data']
labels   = readdata['label'].ravel()
print("Data Loaded:")
print(f"Data shape: {data.shape}")
print(f"Labels shape: {labels.shape}")

# ------------------------------------------------------------------
# Import ReservoirNetwork
# ------------------------------------------------------------------
import importlib
import ReservoirNetwork
importlib.reload(ReservoirNetwork)
from ReservoirNetwork import ReservoirNetwork, extract_reservoir_features

#-------------------------------------------------------------------
# Fix seed so reservoir deterministic.
#-------------------------------------------------------------------
np.random.seed(42)

# ------------------------------------------------------------------
# Define the objective function for Optuna
# ------------------------------------------------------------------
def objective(trial):
    # 1) Sample hyperparameters
    Fs                   = 1000
    fstep                = trial.suggest_int(  'fstep', 1, 10)
    sigma                = trial.suggest_float('sigma', 1e-4, 1e-1, log=True)
    sparsity             = trial.suggest_float('sparsity', 0.0, 1.0)
    spectral_radius      = trial.suggest_float('spectral_radius', 0.1, 1.5)
    base_geometric_ratio = trial.suggest_float('base_geometric_ratio', 0.5, 0.99)

    # 2) Build reservoir & extract _all_ features
    res_net      = ReservoirNetwork(Fs=Fs, fstep=fstep, sigma=sigma,
                                    sparsity=sparsity, spectral_radius=spectral_radius,
                                    base_geometric_ratio=base_geometric_ratio,
                                    random_state=42) # make the reservoir itself reproducible
    X_feat       = extract_reservoir_features(res_net, data)
    X_scaled     = StandardScaler().fit_transform(X_feat)

    # 3) Cross-validated logistic regression
    clf          = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=10_000)
    skf          = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores       = cross_val_score(clf, X_scaled, labels, cv=skf, scoring='accuracy', n_jobs=1)  # ← only one job for CV

    mean_accuracy = scores.mean()
    trial.report(mean_accuracy, step=0)
    if trial.should_prune():
        raise optuna.TrialPruned()
    return mean_accuracy

# ------------------------------------------------------------------
# Run optimization 
# ------------------------------------------------------------------

# Get today's date in YYYY-MM-DD format
from datetime import datetime
date_today = datetime.today().strftime('%Y-%m-%d')
save_name  = f"sqlite:///simulated_optuna_results_{date_today}.db"

# Create and optimize the studyz
study = optuna.create_study(direction="maximize",
                            pruner=optuna.pruners.MedianPruner(n_warmup_steps=10),
                            study_name="sim_optuna", storage=save_name, load_if_exists=True)
study.optimize(objective, n_trials=1000)

# Print the best hyperparameters
print("Best hyperparameters:", study.best_params)
print("Best accuracy:", study.best_value)

---
# 1. Simulated neural rhythms

## Run: reservoir or simple power values. 

In [None]:
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
import importlib
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing   import StandardScaler
from sklearn.linear_model    import LogisticRegression
from sklearn.metrics         import accuracy_score, confusion_matrix
from ReservoirNetwork        import ReservoirNetwork, extract_reservoir_features
from getFFTfeatures          import extract_fft_power_features

feature_choice = 'reservoir' # OR --> feature_choise = 'fft_power'

# ------------------------------------------------------------------
# 1) Load Data
# ------------------------------------------------------------------
readdata = sio.loadmat('dat/simulated_neural_data.mat')
data     = readdata['data']
labels   = readdata['label'].ravel()
print("Data Loaded:")
print(f"Data shape: {data.shape}")
print(f"Labels shape: {labels.shape}")

K = 100
accuracy         = np.zeros(K)
correct_counts   = np.zeros([K,4])
confuse_matrix   = []

for k in np.arange(K):
    split_seed = 100 + k

    # ------------------------------------------------------------------
    # 2) Create the ReservoirNetwork using default optimized parameters
    # ------------------------------------------------------------------
    Fs  = 1000
    res_net = ReservoirNetwork(Fs=Fs, random_state=split_seed)

    # ------------------------------------------------------------------
    # 3) Split the data into training and testing sets
    # ------------------------------------------------------------------
    X_train, X_test, y_train, y_test = train_test_split(
        data, labels, test_size=0.2, stratify=labels, random_state=split_seed
    )
    print("Training data label distribution:")
    unique_labels_train, counts_train = np.unique(y_train, return_counts=True)
    for ul, ct in zip(unique_labels_train, counts_train):
        print(f"Label {ul}: {ct} samples")

    # ------------------------------------------------------------------
    # 4) Extract Features for Training
    # ------------------------------------------------------------------
    if feature_choice   == 'reservoir':
        X_train_features                  = extract_reservoir_features(res_net, X_train)
    elif feature_choice == 'fft_power':
        X_train_features                  = extract_fft_power_features(X_train, fs=Fs, fmin=res_net.frange[0], fmax=res_net.frange[-1])
        X_train_features                  = np.vstack(X_train_features)
    else:
        raise ValueError("Invalid feature_choice! Must be 'reservoir' or 'fft_power'.")
    print(f"Training feature extraction complete ({feature_choice}).")
    print(f"Training feature shape: {X_train_features.shape}")
    scaler                  = StandardScaler()
    X_train_features_scaled = scaler.fit_transform(X_train_features)

    # ------------------------------------------------------------------
    # 5) Train the Classifier
    # ------------------------------------------------------------------
    clf = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=10000)
    clf.fit(X_train_features_scaled, y_train);

    # ------------------------------------------------------------------
    # 6) Extract Features for Test Set
    # ------------------------------------------------------------------
    if feature_choice   == 'reservoir':
        X_test_features                   = extract_reservoir_features(res_net, X_test)
    elif feature_choice == 'fft_power':
        X_test_features                   = extract_fft_power_features(X_test,  fs=Fs, fmin=res_net.frange[0], fmax=res_net.frange[-1])
        X_test_features                   = np.vstack(X_test_features)
    else:
        raise ValueError("Invalid feature_choice! Must be 'reservoir' or 'fft_power'.")

    X_test_features_scaled = scaler.transform(X_test_features)
    
    # ------------------------------------------------------------------
    # 7) Predict and Evaluate
    # ------------------------------------------------------------------
    y_pred      = clf.predict(X_test_features_scaled)
    accuracy[k] = accuracy_score(y_test, y_pred)
    print(k,accuracy[k])

    cm          = confusion_matrix(y_test, y_pred)
    confuse_matrix.append(cm)

    print("Correct classifications per test label:")
    unique_labels_test = np.unique(y_test)
    correct_count      = np.zeros(4)
    for index,ul in enumerate(unique_labels_test):
        test_mask = (y_test == ul)
        correct_count[index] = np.sum((y_pred == ul) & (y_test == ul))
        print(f"Label {ul}: Correctly identified {correct_count[index]} times out of {np.sum(test_mask)}")

    correct_counts[k,:] = correct_count

# Convert the list of confusion matrices into a NumPy array
confuse_matrix = np.array(confuse_matrix)

# Get today's date in YYYY-MM-DD format
from datetime import datetime
date_today = datetime.today().strftime('%Y-%m-%d')
out_fname  = f"SIM_accuracy_results_{feature_choice}_{date_today}.pkl"

# Save the results
results = {
    'accuracy': accuracy, 
    'correct_counts': correct_counts, 
    'confuse_matrix': confuse_matrix, 
    'scaler': scaler, 
    'res_net': res_net, 
    'X_train': X_train, 
    'y_train': y_train, 
    'X_test': X_test, 
    'y_test': y_test,
    'X_train_features': X_train_features, 
    'X_test_features': X_test_features, 
    'y_pred': y_pred
}

with open(out_fname, "wb") as f:
    pickle.dump(results, f)

## Run: Increase frequency step.

In [None]:
import numpy as np
import scipy.io as sio
import matplotlib.pyplot as plt
import importlib
import pickle
from ReservoirNetwork        import ReservoirNetwork, extract_reservoir_features
from sklearn.model_selection import train_test_split
from sklearn.preprocessing   import StandardScaler
from sklearn.linear_model    import LogisticRegression
from sklearn.metrics         import accuracy_score, confusion_matrix

# ------------------------------------------------------------------
# 1) Load Data
# ------------------------------------------------------------------
readdata = sio.loadmat('dat/simulated_neural_data.mat')
data     = readdata['data']
labels   = readdata['label'].ravel()
print("Data Loaded:")
print(f"Data shape: {data.shape}")
print(f"Labels shape: {labels.shape}")

K      = 100
fsteps = np.arange(2,21)

for fstep in fsteps:

    accuracy         = np.zeros(K)
    correct_counts   = np.zeros([K,4])
    confuse_matrix   = []
    
    for k in np.arange(K):
        split_seed = 100 + k
    
        # ------------------------------------------------------------------
        # 2) Create the ReservoirNetwork using default optimized parameters
        # ------------------------------------------------------------------
        res_net = ReservoirNetwork(Fs=1000, fstep=fstep)

        # ------------------------------------------------------------------
        # 3) Split the data into training and testing sets
        # ------------------------------------------------------------------
        X_train, X_test, y_train, y_test = train_test_split(
            data, labels, test_size=0.2, stratify=labels, random_state=split_seed
        )
        print("Training data label distribution:")
        unique_labels_train, counts_train = np.unique(y_train, return_counts=True)
        for ul, ct in zip(unique_labels_train, counts_train):
            print(f"Label {ul}: {ct} samples")
    
        # ------------------------------------------------------------------
        # 4) Extract Features for Training
        # ------------------------------------------------------------------
        X_train_features        = extract_reservoir_features(res_net, X_train)
        scaler                  = StandardScaler()
        X_train_features_scaled = scaler.fit_transform(X_train_features)
    
        # ------------------------------------------------------------------
        # 5) Train the Classifier
        # ------------------------------------------------------------------
        clf = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=10000)
        clf.fit(X_train_features_scaled, y_train);
    
        # ------------------------------------------------------------------
        # 6) Extract Features for Test Set
        # ------------------------------------------------------------------
        X_test_features        = extract_reservoir_features(res_net, X_test)
        X_test_features_scaled = scaler.transform(X_test_features)
    
        # ------------------------------------------------------------------
        # 7) Predict and Evaluate
        # ------------------------------------------------------------------
        y_pred      = clf.predict(X_test_features_scaled)
        accuracy[k] = accuracy_score(y_test, y_pred)
        print(k,accuracy[k])
    
        cm          = confusion_matrix(y_test, y_pred)
        confuse_matrix.append(cm)
    
        print("Correct classifications per test label:")
        unique_labels_test = np.unique(y_test)
        correct_count      = np.zeros(4)
        for index,ul in enumerate(unique_labels_test):
            test_mask = (y_test == ul)
            correct_count[index] = np.sum((y_pred == ul) & (y_test == ul))
            print(f"Label {ul}: Correctly identified {correct_count[index]} times out of {np.sum(test_mask)}")
    
        correct_counts[k,:] = correct_count
    
    # Convert the list of confusion matrices into a NumPy array
    confuse_matrix = np.array(confuse_matrix)
    
    # Get today's date in YYYY-MM-DD format
    from datetime import datetime
    date_today = datetime.today().strftime('%Y-%m-%d')
    save_name  = f"SIM_accuracy_results_fstep_{fstep}_date_{date_today}"
    
    # Save the results
    results = {
        'accuracy': accuracy, 
        'correct_counts': correct_counts, 
        'confuse_matrix': confuse_matrix, 
        'scaler': scaler, 
        'res_net': res_net, 
        'X_train': X_train, 
        'y_train': y_train, 
        'X_test': X_test, 
        'y_test': y_test,
        'X_train_features': X_train_features, 
        'X_test_features': X_test_features, 
        'y_pred': y_pred
    }
    
    with open(save_name + '.pkl', 'wb') as f:
        pickle.dump(results, f)

---

# 2. In vivo data

## Run: RRN on in vivo data.

In [None]:
import numpy as np
import scipy.io as sio
import importlib
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model  import LogisticRegression
from sklearn.metrics       import accuracy_score

import ReservoirNetwork
importlib.reload(ReservoirNetwork)
from ReservoirNetwork import ReservoirNetwork, extract_reservoir_features

# ------------------------------------------------------------------
# Helpers
# ------------------------------------------------------------------

def balance_classes(X, y, seed):
    """
    Given training data X,y with classes {0:'y',1:'N',2:'b'},
    sample so that #N + #b = #y, then collapse b->N.
    Returns (X_bal, y_bal).
    """
    rng = np.random.default_rng(seed)
    # indices for each class
    idx_y = np.where(y == 0)[0]
    idx_N = np.where(y == 1)[0]
    idx_b = np.where(y == 2)[0]
    n_y   = len(idx_y)
    if len(idx_N) < n_y//2 or len(idx_b) < n_y - n_y//2:
        raise ValueError("Not enough N/b to balance y")
    sel_N = rng.choice(idx_N, size=n_y//2, replace=False)
    sel_b = rng.choice(idx_b, size=n_y - n_y//2, replace=False)
    final = np.concatenate([idx_y, sel_N, sel_b])
    X_bal = X[final]
    y_bal = y[final].copy()
    # collapse b(2)->N(1)
    y_bal[y_bal == 2] = 1
    return X_bal, y_bal

def train_and_test(res_net, X_train, y_train, X_test, y_test):
    """
    Extract reservoir features, scale, fit LR, predict and compute metrics.
    Returns dict with accuracy, sensitivity, specificity, PPV, NPV.
    """
    # 1) features
    Xtr_feat = extract_reservoir_features(res_net, X_train)
    Xte_feat = extract_reservoir_features(res_net, X_test)

    # 2) scale & fit
    scaler = StandardScaler().fit(Xtr_feat)
    Xtr = scaler.transform(Xtr_feat)
    Xte = scaler.transform(Xte_feat)
    clf = LogisticRegression(
        multi_class='multinomial', solver='lbfgs', max_iter=10_000
    ).fit(Xtr, y_train)

    # 3) predict
    y_pred = clf.predict(Xte)
    # collapse b(2)->N(1)
    y_test[y_test == 2] = 1
    acc = accuracy_score(y_test, y_pred)

    # 4) confusion elements
    pos = 0  # ‘y’
    TP = np.sum((y_pred == pos) & (y_test == pos))
    FN = np.sum((y_pred != pos) & (y_test == pos))
    FP = np.sum((y_pred == pos) & (y_test != pos))
    TN = np.sum((y_pred != pos) & (y_test != pos))

    sens = TP / (TP + FN) if TP+FN else np.nan
    spec = TN / (TN + FP) if TN+FP else np.nan
    ppv  = TP / (TP + FP) if TP+FP else np.nan
    npv  = TN / (TN + FN) if TN+FN else np.nan

    return {
        'accuracy':    acc,
        'sensitivity': sens,
        'specificity': spec,
        'PPV':         ppv,
        'NPV':         npv
    }

# ------------------------------------------------------------------
# Main script
# ------------------------------------------------------------------
# 1) load data
readdata = sio.loadmat('data/EEG_classifications_3label.mat')
data     = readdata['data'].T
label    = readdata['label'].ravel()
subj     = readdata['subj'].ravel()

# map labels
mapping = {'y':0,'n':1,'b':2}
labels = np.array([mapping[l] for l in label])

selected_subjects = [
    'pBECTS003','pBECTS007','pBECTS011',
    'pBECTS015','pBECTS033','pBECTS043'
]

K = 100
results = {
    'accuracy':    np.zeros((K,len(selected_subjects))),
    'sensitivity': np.zeros((K,len(selected_subjects))),
    'specificity': np.zeros((K,len(selected_subjects))),
    'PPV':         np.zeros((K,len(selected_subjects))),
    'NPV':         np.zeros((K,len(selected_subjects))),
}

# 2) outer loop over random seeds
for k in range(K):
    seed   = 100 + k
    res_net = ReservoirNetwork(Fs=2035, random_state=seed)

    # 3) leave-one-subject-out
    for i, subj_id in enumerate(selected_subjects):
        # split
        test_idx  = np.where(subj == subj_id)[0]
        train_idx = np.where(subj != subj_id)[0]
        Xtr, ytr = data[train_idx], labels[train_idx]
        Xte, yte = data[test_idx],  labels[test_idx]

        # balance
        Xtr_bal, ytr_bal = balance_classes(Xtr, ytr, seed)

        # train & get metrics
        m = train_and_test(res_net, Xtr_bal, ytr_bal, Xte, yte)

        print(f"{subj_id}: Test size {np.shape(Xte)[0]} , Accuracy {m['accuracy']:.2f} , Sensitivity {m['sensitivity']:.2f} , Specificity {m['specificity']:.2f} , PPV {m['PPV']:.2f} , NPV {m['NPV']:.2f}")

        # store
        results['accuracy'][k,i]    = m['accuracy']
        results['sensitivity'][k,i] = m['sensitivity']
        results['specificity'][k,i] = m['specificity']
        results['PPV'][k,i]         = m['PPV']
        results['NPV'][k,i]         = m['NPV']

# 4) save
from datetime import datetime
fname = f"INVIVO_accuracy_results_{datetime.today():%Y-%m-%d}.pkl"
with open(fname,'wb') as f:
    pickle.dump(results, f)


fmin= 5.0 , fmax= 508.0 , fstep= 1.0 , N nodes= 504


---
# 3. MNIST Analysis

## Run: RRN on MNIST data

In [None]:
from   keras.datasets import mnist
import numpy as np
import pickle
from collections           import Counter
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model  import LogisticRegression
from sklearn.metrics       import accuracy_score, confusion_matrix
from sklearn.pipeline      import Pipeline
from ReservoirNetwork      import ReservoirNetwork, extract_reservoir_features

# ------------------------------------------------------------------
# 1) Load the MNIST data
# ------------------------------------------------------------------
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# ------------------------------------------------------------------
# 2) Prepare the data
# ------------------------------------------------------------------
# Flatten images from (60000, 28, 28) to (60000, 784)
X_train = X_train.reshape(60000, 784).astype(np.float32)
X_test  =  X_test.reshape(10000, 784).astype(np.float32)
print('After flattening: ', np.shape(X_train))

# Normalize pixel values to range [0, 1]
X_train /= 255.0
X_test  /= 255.0

# Print sample counts
print("\nLabel distribution in training subset:", dict(Counter(y_train)))
print("Label distribution in testing subset:", dict(Counter(y_test)))

# Build pipelines for scaling & classification
clf = Pipeline([
    ('scaler', StandardScaler()),
    ('logreg', LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=10_000))
])

K = 100
accuracy         = np.zeros(K)
confuse_matrix   = []
for k in np.arange(K):

    # ------------------------------------------------------------------
    # 3) Instantiate the ReservoirNetwork
    # ------------------------------------------------------------------
    res_net = ReservoirNetwork(Fs = 1000)
    
    # ------------------------------------------------------------------
    # 4) Extract Features for Training & Testing
    # ------------------------------------------------------------------
    X_train_features = extract_reservoir_features(res_net, X_train)
    
    # ------------------------------------------------------------------
    # 5) Train the Classifier
    # ------------------------------------------------------------------
    clf.fit(X_train_features, y_train)

    # ------------------------------------------------------------------
    # 6) Extract Features for Test Set
    # ------------------------------------------------------------------
    X_test_features         = extract_reservoir_features(res_net, X_test)
    
    # ------------------------------------------------------------------
    # 7) Predict and Evaluate
    # ------------------------------------------------------------------
    y_pred = clf.predict(X_test_features)

    accuracy[k] = accuracy_score(y_test, y_pred)
    print(k,accuracy[k])
    
    # Evaluate confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    confuse_matrix.append(cm)

# Convert the list of confusion matrices into a NumPy array
confuse_matrix = np.array(confuse_matrix)

# Get today's date in YYYY-MM-DD format
from datetime import datetime
date_today = datetime.today().strftime('%Y-%m-%d')
save_name  = f"MNIST_accuracy_results_{date_today}"

# Save the results
results = {
    'res_net': res_net, 
    'clf':     clf,
    'accuracy': accuracy, 
    'confuse_matrix': confuse_matrix, 
    'X_train': X_train, 
    'y_train': y_train, 
    'X_test': X_test, 
    'y_test': y_test,
    'X_train_features': X_train_features, 
    'X_test_features': X_test_features, 
    'y_pred': y_pred
}

with open(save_name + '.pkl', 'wb') as f:
    pickle.dump(results, f)

## Run: Increase frequency step.

In [None]:
from   keras.datasets import mnist
import numpy as np
import pickle
from collections           import Counter
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model  import LogisticRegression
from sklearn.metrics       import accuracy_score, confusion_matrix
from sklearn.pipeline      import Pipeline
from ReservoirNetwork      import ReservoirNetwork, extract_reservoir_features

# ------------------------------------------------------------------
# 1) Load the MNIST data
# ------------------------------------------------------------------
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# ------------------------------------------------------------------
# 2) Prepare the data
# ------------------------------------------------------------------
# Flatten images from (60000, 28, 28) to (60000, 784)
X_train = X_train.reshape(60000, 784).astype(np.float32)
X_test  =  X_test.reshape(10000, 784).astype(np.float32)
print('After flattening: ', np.shape(X_train))

# Normalize pixel values to range [0, 1]
X_train /= 255.0
X_test  /= 255.0

# Print sample counts
print("\nLabel distribution in training subset:", dict(Counter(y_train)))
print("Label distribution in testing subset:", dict(Counter(y_test)))

# Build pipelines for scaling & classification
clf = Pipeline([
    ('scaler', StandardScaler()),
    ('logreg', LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=10_000))
])

fsteps = np.arange(2,11)
K = 100

for fstep in fsteps:

    accuracy         = np.zeros(K)
    confuse_matrix   = []
    
    for k in np.arange(K):
    
        # ------------------------------------------------------------------
        # 3) Instantiate the ReservoirNetwork
        # ------------------------------------------------------------------
        res_net = ReservoirNetwork(Fs = 1000, fstep=fstep)
        
        # ------------------------------------------------------------------
        # 4) Extract Features for Training & Testing
        # ------------------------------------------------------------------
        X_train_features = extract_reservoir_features(res_net, X_train)
        
        # ------------------------------------------------------------------
        # 5) Train the Classifier
        # ------------------------------------------------------------------
        clf.fit(X_train_features, y_train)
    
        # ------------------------------------------------------------------
        # 6) Extract Features for Test Set
        # ------------------------------------------------------------------
        X_test_features  = extract_reservoir_features(res_net, X_test)
        
        # ------------------------------------------------------------------
        # 7) Predict and Evaluate
        # ------------------------------------------------------------------
        y_pred = clf.predict(X_test_features)
    
        accuracy[k] = accuracy_score(y_test, y_pred)
        print(k,accuracy[k])
        
        # Evaluate confusion matrix
        cm = confusion_matrix(y_test, y_pred)
        confuse_matrix.append(cm)
    
    # Convert the list of confusion matrices into a NumPy array
    confuse_matrix = np.array(confuse_matrix)
    
    # Get today's date in YYYY-MM-DD format
    from datetime import datetime
    date_today = datetime.today().strftime('%Y-%m-%d')
    save_name  = f"MNIST_accuracy_results_fstep_{fstep}_date_{date_today}"
    
    # Save the results
    results = {
        'res_net': res_net, 
        'clf':     clf,
        'accuracy': accuracy, 
        'confuse_matrix': confuse_matrix, 
        'X_train': X_train, 
        'y_train': y_train, 
        'X_test': X_test, 
        'y_test': y_test,
        'X_train_features': X_train_features, 
        'X_test_features': X_test_features, 
        'y_pred': y_pred
    }
    
    with open(save_name + '.pkl', 'wb') as f:
        pickle.dump(results, f)

---

# 4. Speech Commands Dataset (SCD)

## Run: RRN on SCD data.

In [None]:
import os
import glob
import pickle
import random

import numpy as np
import librosa

from ReservoirNetwork import ReservoirNetwork, extract_reservoir_features
from datetime import datetime
from tqdm import trange
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

random.seed(0)
np.random.seed(0)

def load_speech_commands_dataset(data_path, sr=None, max_examples_per_label=None, fixed_length=None):
    
    labels_map = {
        "zero": 0, "one": 1, "two": 2, "three": 3, "four": 4,
        "five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9
    }
    
    X_audio = []
    y = []
    max_len = 0  # Track max length of any audio signal

    # First pass: load all audio and find max length if needed
    temp_audio = []  # Store temporary list of raw audio signals
    for label_name, label_val in labels_map.items():
        folder_path = os.path.join(data_path, label_name)
        wav_files = glob.glob(os.path.join(folder_path, "*.wav"))
        
        if max_examples_per_label is not None and len(wav_files) > max_examples_per_label:
            wav_files = np.random.choice(wav_files, size=max_examples_per_label, replace=False)
        
        for wav_file in wav_files:
            audio, _ = librosa.load(wav_file, sr=sr)
            temp_audio.append(audio)
            y.append(label_val)
            max_len = max(max_len, len(audio))
    
    # Use user-defined fixed length or the maximum length found
    if fixed_length is None:
        fixed_length = max_len

    # Second pass: Pad or truncate and convert to NumPy array
    for audio in temp_audio:
        if len(audio) < fixed_length:
            # Pad with zeros at the end
            audio = np.pad(audio, (0, fixed_length - len(audio)), mode='constant')
        else:
            # Truncate to fixed length
            audio = audio[:fixed_length]
        X_audio.append(audio)

    X_audio = np.array(X_audio)
    y = np.array(y)

    return X_audio, y

# ---------------------------
# 1. Load and standardize the audio data
# ---------------------------
data_path  = "data/speech_commands_v0.02"
Fs         = 4000  # Target sample rate
X_audio, y = load_speech_commands_dataset(data_path, sr=Fs)

# Report some diagnostics.
print(f"Loaded {X_audio.shape[0]} audio files with shape {X_audio.shape}.")
print("Unique labels:", np.unique(y))
unique_labels, counts = np.unique(y, return_counts=True)
for label, count in zip(unique_labels, counts):
    print(f"Label {label}: {count} samples")

# Zero-mean and scale to [-1, 1]
X_audio_standardized = []
for signal in X_audio:
    sig0 = signal - np.mean(signal)      # Subtract the mean
    peak = np.max(np.abs(sig0))          # Find the peak absolute value
    if peak > 0:                         # Avoid division by zero (unlikely)
        sig_scaled = sig0 / peak
    else:
        sig_scaled = sig0
    X_audio_standardized.append(sig_scaled)

# Prepare to repeat K times and save results
K                = 100
accuracy         = np.zeros(K)
confuse_matrix   = []
for k in trange(K):
    
    # ---------------------------
    # 2. Split into training and test sets
    # ---------------------------
    X_train, X_test, y_train, y_test = train_test_split(
        X_audio_standardized,
        y, test_size=0.1, stratify=y,
        random_state=k
    )
    X_train = np.asarray(X_train)
    X_test  = np.asarray(X_test)
    
    # ---------------------------
    # 3. Process data with the ReservoirNetwork
    # ---------------------------
    res_net         = ReservoirNetwork(Fs=Fs, random_state=k)
    
    # Extract reservoir features for training and test sets.
    X_train_features = extract_reservoir_features(res_net, X_train)
    X_test_features  = extract_reservoir_features(res_net, X_test)
    
    # Scale features before logistic regression.
    scaler = StandardScaler()
    X_train_features_scaled = scaler.fit_transform(X_train_features)
    X_test_features_scaled  = scaler.transform(X_test_features)
    
    # ---------------------------
    # 4. Train a classifier on the reservoir features
    # ---------------------------
    clf = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=10000)
    clf.fit(X_train_features_scaled, y_train);
    
    # ---------------------------
    # 5. Evaluate the classifier
    # ---------------------------
    y_pred = clf.predict(X_test_features_scaled)
    accuracy[k] = accuracy_score(y_test, y_pred)
    print("Test accuracy on SDDS using Reservoir Network features:", k,accuracy[k])

    # Evaluate confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    confuse_matrix.append(cm)

    # Timestamp results (backup in case code crashes)
    temp_results = {
        'res_net': res_net, 
        'accuracy': accuracy, 
        'confuse_matrix': confuse_matrix, 
    }
    date_today = datetime.today().strftime('%Y-%m-%d')
    save_name  = f"SDDS_TEMP_accuracy_results_{date_today}"
    with open(save_name + '.pkl', 'wb') as f:
        pickle.dump(temp_results, f)

# Convert the list of confusion matrices into a NumPy array
confuse_matrix = np.array(confuse_matrix)

# Get today's date in YYYY-MM-DD format
date_today = datetime.today().strftime('%Y-%m-%d')
save_name  = f"SDDS_accuracy_results_{date_today}"

# Save results after K iterations complete.
results = {
    'res_net': res_net, 
    'accuracy': accuracy, 
    'confuse_matrix': confuse_matrix, 
    'scaler': scaler, 
    'X_train': X_train, 
    'y_train': y_train, 
    'X_test': X_test, 
    'y_test': y_test,
    'X_train_features': X_train_features, 
    'X_test_features': X_test_features, 
    'y_pred': y_pred
}

with open(save_name + '.pkl', 'wb') as f:
    pickle.dump(results, f)