<a href="https://colab.research.google.com/github/Maya7991/gsc_classification/blob/main/optuna_relu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install optuna --quiet
!pip install snntorch --quiet
!pip install torchaudio --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/386.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.5/242.5 kB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m125.6/125.6 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m125.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m71.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m50.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import optuna
from optuna.trial import TrialState

import snntorch as snn
from snntorch import utils, spikegen, surrogate, functional as SF

import torch
import torch.nn as nn
import torch.nn.functional as F

import torchaudio
from torchaudio.datasets import SPEECHCOMMANDS
from torch.utils.data import DataLoader, Dataset
import torchaudio.transforms as T

from torchsummary import summary

import os
import sys
import argparse
import logging
from sklearn.preprocessing import LabelEncoder

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

# === DATASET ===
train_dataset = SPEECHCOMMANDS("./", download=True, subset="training")
val_dataset = SPEECHCOMMANDS("./", download=True, subset="validation")
test_dataset = SPEECHCOMMANDS("./", download=True, subset="testing")

# === Label Encoding ===
all_labels = sorted(set(datapoint[2] for datapoint in train_dataset + val_dataset + test_dataset))
label_encoder = LabelEncoder()
label_encoder.fit(all_labels) # encode labels as indices

cuda


100%|██████████| 2.26G/2.26G [01:46<00:00, 22.8MB/s]


In [None]:
mel_transform = torchaudio.transforms.MelSpectrogram(
    sample_rate=16000,
    n_fft=400,
    hop_length=160,
    n_mels=64  # Recommended to avoid warnings
)
target_length = 16000

def collate_fn(batch):
    tensors, targets = [], []

    for waveform, sample_rate, label, *_ in batch:
      if waveform.size(1) < target_length:
          pad_size = target_length - waveform.size(1)
          waveform = F.pad(waveform, (0, pad_size))
      else:
          waveform = waveform[:, :target_length]

      mel_spec = mel_transform(waveform).squeeze(0)  # Shape: [1, n_mels, time] squeezed to Shape: [n_mels, time] , useful for normalization
      mel_spec = (mel_spec - mel_spec.mean()) / (mel_spec.std() + 1e-5)
      mel_spec = mel_spec.unsqueeze(0)  # Shape: [1, n_mels, time]
      tensors.append(mel_spec)  # [1, 64, 256]
      encoded_label = label_encoder.transform([label])[0]
      targets.append(encoded_label)

    return torch.stack(tensors), torch.tensor(targets)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, collate_fn=collate_fn)

# === Label Info for Model Output ===
num_classes = len(label_encoder.classes_)

In [None]:
class NetSCNN(nn.Module):
  def __init__(self, params):
      super(NetSCNN, self).__init__()
      layers = []
      fc_layers = []

      beta = 0.95
      # outchn_layer = []
      self.linear_layers = params.get("linear_layers")
      self.linear_dim = params.get("linear_dim")
      self.dropout_rate = params.get("dropout")

      in_v_size = params["input_dim_v"]
      in_h_size = params["input_dim_h"]
      in_chn = params["input_chn"]
      out_v_size = in_v_size
      out_h_size = in_h_size
      out_chn = in_chn

      for i in range(params["level"]):
          print(f"level={i}")
          k = params[f"kernel_ff {i}"]
          s = params[f"stride_ff {i}"]
          # Auto-fix invalid kernel+stride combinations
          while (k > 1 and s > 1 and (k + s > in_v_size or k + s > in_h_size)):
              if k > s:
                  k -= 1
              else:
                  s -= 1
          # params[f"kernel_ff {i}"] = k
          # params[f"stride_ff {i}"] = s
          # k = min(k, in_v_size - 2)
          # s = min(s, in_h_size - 2)
          # kernel = k
          # stride = s

          out_v_size = int((in_v_size + 2 - k) / s) + 1
          out_h_size = int((in_h_size + 2 - k) / s) + 1
          out_chn = params[f"filter_ff {i}"]

          if out_v_size <= 2 or out_h_size <= 2:
            print(f"Stopping early at layer {i} — output too small")
            break

          conv = nn.Conv2d(in_chn, out_chn, kernel_size=k, stride=s, padding=1)
          relu = nn.ReLU()
          # lif = snn.Leaky(beta=beta, init_hidden=True)
          layers.append(conv)
          # layers.append(snn.Leaky(beta=beta, init_hidden=True))
          layers.append(relu)

          pool = nn.MaxPool2d(kernel_size=2, stride=2)
          dropout = nn.Dropout2d(p=params.get("dropout"))

          # layers.extend([conv, lif, dropout])


          # Apply pooling only if spatial dimensions allow
          # out_v_size = int((in_v_size + 2 - k) / s) + 1
          # out_h_size = int((in_h_size + 2 - k) / s) + 1

          if out_v_size >= 6 and out_h_size >= 6:
              layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
              out_v_size //= 2
              out_h_size //= 2

          layers.append(dropout)

          # outchn_layer.append(out_v_size * out_h_size * out_chn)
          in_v_size, in_h_size, in_chn = out_v_size, out_h_size, out_chn  # update for next layer

      self.features = nn.Sequential(*layers)
      # self.flat_dim = sum(outchn_layer)
      self.flat_dim = out_v_size * out_h_size * out_chn
      # self.classifier = nn.Linear(self.flat_dim, params["classes"])

      # Build classifier head with variable number of layers
      in_dim = self.flat_dim

      print(f"Flatten Layer : out_size = ({out_chn} x {out_v_size} x {out_h_size})")
      for i in range(self.linear_layers - 1):
          fc_layers.append(nn.Linear(in_dim, self.linear_dim))
          # fc_layers.append(snn.Leaky(beta=beta, init_hidden=True))
          fc_layers.append(relu)
          fc_layers.append(nn.Dropout(p=self.dropout_rate))
          in_dim = self.linear_dim  # for next layer

      fc_layers.append(nn.Linear(in_dim, params["classes"]))  # final output layer
      # fc_layers.append(snn.Leaky(beta=beta, init_hidden=True))
      # fc_layers.append(relu)

      self.classifier = nn.Sequential(*fc_layers)

  def forward(self, x):
      x = self.features(x)
      x = x.view(x.size(0), -1)  # Flatten
      spk_out = self.classifier(x)
      return spk_out

  # def forward(self, x, num_steps=30):
  #     utils.reset(self)

  #     spk_out_rec = []
  #     spk_input = spikegen.rate(x, num_steps=num_steps)

  #     for step in range(num_steps):
  #       x = self.features(spk_input[step])
  #       x = x.view(x.size(0), -1)  # Flatten
  #       spk_out = self.classifier(x)
  #       spk_out_rec.append(spk_out)

  #     return torch.stack(spk_out_rec)


In [None]:
# Optuna: Optimizer Setting
def get_optimizer(model,params):
    # We optimize the choice of optimizers as well as their parameters.

    if params["opt"] == "Adam":
        return torch.optim.Adam(model.parameters(),
                                lr=params["lr_init"])
    elif params["opt"] == "SGD":
        return torch.optim.SGD(model.parameters(),
                                lr=params["lr_init"],
                                momentum=params["opt.sgd.moment"],
                                nesterov=params["opt.sgd.nesterov"])

# Optuna: Learning rate setting
def get_lr_scheduler(optimizer, params):
            if params["lr_decay"] == "exp_decay":
                return torch.optim.lr_scheduler.ExponentialLR(
                    optimizer=optimizer,
                    gamma=0.8,
                )
            elif params["lr_decay"] == "cosine_decay":
                return torch.optim.lr_scheduler.CosineAnnealingLR(
                    optimizer=optimizer,
                    T_max=EPOCHS * STEPS_PER_EPOCH,
                )

In [None]:
def trace_model_shapes(model, input_shape=(1, 1, 101, 64), num_steps=1):
    x = torch.randn(*input_shape).to(next(model.parameters()).device)
    x = spikegen.rate(x, num_steps=num_steps)[0]  # Use 1 timestep only

    print("\n=== Features ===")
    for i, layer in enumerate(model.features):
        x = layer(x)
        print(f"Layer {i}: {layer.__class__.__name__} -> {tuple(x.shape)}")

    x = x.view(x.size(0), -1)

    print("\n=== Classifier ===")
    for i, layer in enumerate(model.classifier):
        x = layer(x)
        print(f"Layer {i}: {layer.__class__.__name__} -> {tuple(x.shape)}")

    print("\nFinal Output Shape:", x.shape)

In [None]:
def forward_pass(net, num_steps, data):
  spk_rec = []
  utils.reset(net)  # resets hidden states for all LIF neurons in net
  spk_input = spikegen.rate(data, num_steps=num_steps)

  for step in range(num_steps):
      spk_out = net(spk_input[step])
      spk_rec.append(spk_out)

  return torch.stack(spk_rec)

In [None]:
loss_fn = SF.ce_rate_loss()
accuracy_fn = SF.accuracy_rate
num_steps = 30

def define_search_space(trial):
    params = {}
    params["input_dim_v"] = trial.suggest_categorical("input_dim_v", [101])
    params["input_dim_h"] = trial.suggest_categorical("input_dim_h", [64])
    params["input_chn"] = trial.suggest_categorical("input_chn", [1])
    params["classes"] = trial.suggest_categorical("classes", [35])
    params["opt"] = trial.suggest_categorical("opt", ["Adam", "SGD"])
    params["lr_init"] = trial.suggest_categorical("lr_init", [0.001])
    params["lr_scheduler"] = trial.suggest_categorical("lr_scheduler", [True])

    if params["lr_scheduler"]:
        params["lr_decay"] = trial.suggest_categorical("lr_decay", ["exp_decay"])
    if params["opt"] == "SGD":
        params["opt.sgd.moment"] = trial.suggest_float("opt.sgd.moment", 0.0, 1.0)
        params["opt.sgd.nesterov"] = trial.suggest_categorical("opt.sgd.nesterov", [True, False])

    params["dropout"] = trial.suggest_float("dropout", 0.0, 0.5)
    params["level"] = trial.suggest_categorical("level", [2, 3, 4])
    for i in range(params["level"]):
        params[f"filter_ff {i}"] = trial.suggest_categorical(f"filter_ff {i}", [8, 16, 32, 48, 64])
        params[f"kernel_ff {i}"] = trial.suggest_categorical(f"kernel_ff {i}", [3, 5])
        params[f"stride_ff {i}"] = trial.suggest_categorical(f"stride_ff {i}", [1, 2])
        params[f"use_pooling {i}"] = trial.suggest_categorical(f"use_pooling {i}", [True, False])
    params["linear_layers"] = trial.suggest_int("linear_layers", 1, 3)
    # params["linear_dim"] = trial.suggest_categorical("linear_dim", [128, 256, 512, 1024])
    params["linear_dim"] = trial.suggest_categorical("linear_dim", [128, 256, 512])
    return params


def objective(trial, args):
    params = define_search_space(trial)
    model = NetSCNN(params).to(device)

    ###############################################################################
    for data, target in train_loader:
        print("Input Mel spectrogram shape: ", data.shape)
        break
    print(model)
    print(f"Trial #{trial.number}, level={params['level']}")

    # dummy = torch.randn(1, 1, 101, 64).to(device)
    # out = model(dummy, num_steps=1)  # Just to check output shape
    # print("Output shape:", out.shape)

    # trace_model_shapes(model)
    summary(model, (1, params["input_dim_v"], params["input_dim_h"]))
    ###############################################################################
    optimizer = get_optimizer(model, params)
    if params["lr_scheduler"]:
        scheduler = get_lr_scheduler(optimizer, params)

    for epoch in range(args.epochs):
        print("epoch:", epoch)
        model.train()
        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()

            with torch.autograd.set_detect_anomaly(True):
              output = model(data)
              # output = forward_pass(model, num_steps, data)
              # loss = loss_fn(output, target)
              loss = F.cross_entropy(output, target)
              loss.backward()
            optimizer.step()

        if params["lr_scheduler"]:
            scheduler.step()
        print("--------------------------val-----------------------------")
        # Validation
        model.eval()
        val_acc = 0
        with torch.no_grad():
            for data, target in val_loader:
                data, target = data.to(device), target.to(device)
                output = model(data, num_steps)
                val_acc += accuracy_fn(output, target)
        accuracy = val_acc / len(val_loader)
        trial.report(accuracy, epoch)
    return accuracy


In [None]:
def show_result(study,best_model_view=False, best_model_train=False):
    pruned_trials = study.get_trials(deepcopy=True, states=[TrialState.PRUNED])
    complete_trials = study.get_trials(deepcopy=True, states=[TrialState.COMPLETE])

    print("\n========================== Statistics ==========================")
    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))

    print("Best trial:")
    best_trial = study.best_trial

    print("  Number: ", best_trial.number)
    print("  Value: ", best_trial.value)

    print("  Params: ")
    for key, value in best_trial.params.items():
        print("    {}: {}".format(key, value))


    print("\nComplete trials:")
    for i, trial in enumerate(complete_trials):
        print("    {0:2d} [{1:2d}] val={2:6.4f} -> ".format(i,trial.number,trial.value), end="")
        for key, value in trial.params.items():
            print("{}: {} ".format(key,value), end="")
        print("")

    print("\nPruned trials:")
    for i, trial in enumerate(pruned_trials):
        print("    {0:2d} [{1:2d}] val={2:6.4f} -> ".format(i,trial.number,trial.value), end="")
        for key, value in trial.params.items():
            print("{}: {} ".format(key,value), end="")
        print("")


    if best_model_view:
        params = best_trial.params
        model = NetSCNN(params).to(device)

        print("\n=========================== Summary ============================")
        summary(model, (1,params["input_dim_v"],params["input_dim_h"]))
        print("")

    optimizer=get_optimizer(model, params)
    if params["lr_scheduler"]:
        scheduler = get_lr_scheduler(optimizer, params)

    if best_model_train:
        if not(best_model_view):
            params = best_trial.params
            model = NetSCNN(params).to(device)

        # Train model.
        print("\n============================= Training Model =============================")
        for epoch in range(args.epochs):
            print("epoch: ", epoch)
            model.train()
            train_acc = 0
            for batch_idx, (data, target) in enumerate(train_loader):
                data, target = data.to(device), target.to(device)

                optimizer.zero_grad()
                output = model(data)
                loss = loss_fn(output, target)
                loss.backward()
                optimizer.step()
                train_acc += accuracy_fn(output, target)

            if params["lr_scheduler"]:
                scheduler.step()

            accuracy = train_acc / len(train_loader)

            trial.report(accuracy, epoch)

        #plt.plot(history.history['accuracy'],color = 'blue', label = 'accuracy')
        #plt.plot(history.history['val_accuracy'],color = 'red', label = 'val')
        #plt.title('Model accuracy')
        #plt.ylabel('Accuracy')
        #plt.xlabel('Epoch')
        #plt.legend

        print("\n============================== Saving Model ==============================")
        torch.save(model, "FF.best_model.pytorch")
        print("Saved model to disk")

        # evaluate the model
        print("\n============================ Evaluating Model ============================")
        model.eval()
        correct = 0
        with torch.no_grad():
          for batch_idx, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)
            output = model(data, num_steps)
            test_acc += accuracy_fn(output, target)
        test_acc = test_acc / len(test_loader)
        print("Evaluation Accuracy:")
        trial.report(test_acc, epoch)

In [None]:
def main(args):
    # Add stream handler of stdout to show the messages
    optuna.logging.get_logger("optuna").addHandler(logging.StreamHandler(sys.stdout))
    study_name = "SNN-Mel-Sandbox"  # Unique identifier of the study.
    storage_name = "sqlite:///{}.db".format(study_name)

    if os.path.exists("{}.db".format(study_name)):
        optuna.delete_study(study_name=study_name, storage=storage_name)

    study = optuna.create_study(
        direction="maximize", pruner=optuna.pruners.MedianPruner(n_startup_trials=2),
        study_name=study_name, storage=storage_name
    )

    #study.optimize(objective, n_trials=150, timeout=600)
    #study.optimize(objective, n_trials=20)
    study.optimize(lambda trial: objective(trial, args), n_trials=2)
    #study.optimize(objective, n_trials=400, args)

    show_result(study, best_model_view=True, best_model_train=True)

if __name__ == "__main__":
        # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size', type=int, default=128, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=128, metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=10, metavar='N',
                        help='number of epochs to train (default: 14)')
    parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                        help='learning rate (default: 1.0)')
    parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
                        help='Learning rate step gamma (default: 0.7)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--no-mps', action='store_true', default=False,
                        help='disables macOS GPU training')
    parser.add_argument('--dry-run', action='store_true', default=False,
                        help='quickly check a single pass')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    #parser.add_argument('--log-interval', type=int, default=10, metavar='N',
    #                    help='how many batches to wait before logging training status')
    parser.add_argument('--log-interval', type=int, default=1, metavar='N',
                        help='how many batches to wait before logging training status')
    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    #args = parser.parse_args()
    args, unknown = parser.parse_known_args()
    print("unknown=", unknown)
    # Until here

    main(args)

[I 2025-06-02 09:51:33,713] A new study created in RDB with name: SNN-Mel-Sandbox


unknown= ['-f', '/root/.local/share/jupyter/runtime/kernel-48fb8dc2-3c8b-4354-bfca-98dd70a1c259.json']
A new study created in RDB with name: SNN-Mel-Sandbox
A new study created in RDB with name: SNN-Mel-Sandbox
A new study created in RDB with name: SNN-Mel-Sandbox
A new study created in RDB with name: SNN-Mel-Sandbox
level=0
level=1
level=2
level=3
Flatten Layer : out_size = (48 x 5 x 3)
Input Mel spectrogram shape:  torch.Size([64, 1, 64, 101])
NetSCNN(
  (features): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Dropout2d(p=0.12743323934825462, inplace=False)
    (4): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Dropout2d(p=0.12743323934825462, inplace=False)
    (8): Conv2d(16, 48, kernel_size=(5, 5), stride=(1,

[W 2025-06-02 09:58:20,286] Trial 0 failed with parameters: {'input_dim_v': 101, 'input_dim_h': 64, 'input_chn': 1, 'classes': 35, 'opt': 'SGD', 'lr_init': 0.001, 'lr_scheduler': True, 'lr_decay': 'exp_decay', 'opt.sgd.moment': 0.9277964072592509, 'opt.sgd.nesterov': False, 'dropout': 0.12743323934825462, 'level': 4, 'filter_ff 0': 8, 'kernel_ff 0': 3, 'stride_ff 0': 1, 'use_pooling 0': True, 'filter_ff 1': 16, 'kernel_ff 1': 3, 'stride_ff 1': 1, 'use_pooling 1': True, 'filter_ff 2': 48, 'kernel_ff 2': 5, 'stride_ff 2': 1, 'use_pooling 2': True, 'filter_ff 3': 48, 'kernel_ff 3': 5, 'stride_ff 3': 2, 'use_pooling 3': False, 'linear_layers': 3, 'linear_dim': 512} because of the following error: TypeError('NetSCNN.forward() takes 2 positional arguments but 3 were given').
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "<ipython-

Trial 0 failed with parameters: {'input_dim_v': 101, 'input_dim_h': 64, 'input_chn': 1, 'classes': 35, 'opt': 'SGD', 'lr_init': 0.001, 'lr_scheduler': True, 'lr_decay': 'exp_decay', 'opt.sgd.moment': 0.9277964072592509, 'opt.sgd.nesterov': False, 'dropout': 0.12743323934825462, 'level': 4, 'filter_ff 0': 8, 'kernel_ff 0': 3, 'stride_ff 0': 1, 'use_pooling 0': True, 'filter_ff 1': 16, 'kernel_ff 1': 3, 'stride_ff 1': 1, 'use_pooling 1': True, 'filter_ff 2': 48, 'kernel_ff 2': 5, 'stride_ff 2': 1, 'use_pooling 2': True, 'filter_ff 3': 48, 'kernel_ff 3': 5, 'stride_ff 3': 2, 'use_pooling 3': False, 'linear_layers': 3, 'linear_dim': 512} because of the following error: TypeError('NetSCNN.forward() takes 2 positional arguments but 3 were given').
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "<ipython-input-29-ada720ff0932>", lin

[W 2025-06-02 09:58:20,291] Trial 0 failed with value None.


Trial 0 failed with value None.
Trial 0 failed with value None.
Trial 0 failed with value None.
Trial 0 failed with value None.


TypeError: NetSCNN.forward() takes 2 positional arguments but 3 were given