In [4]:
# for colab use
%pip install spikingjelly
%pip install optuna

Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.6.0-py3-none-any.whl (404 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.7/404.7 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading optuna-4.6.0-py3-none-any.whl (404 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m404.7/404.7 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, optuna
Installing collected packages: colorlog, optuna
Successfully installed colorlog-6.10.1 optuna-4.6.0
Successfully insta

In [5]:
# debug
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "0"

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import time
import optuna
from spikingjelly.activation_based import neuron, functional, layer, surrogate

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"Current device: {torch.cuda.get_device_name(0)}")
    print(f"Device count: {torch.cuda.device_count()}") # 应该输出 1

PyTorch version: 2.9.0+cu126
CUDA available: True
Current device: Tesla T4
Device count: 1


In [7]:

# --- 辅助函数：阶跃函数 ---
@torch.jit.script
def heaviside(x: torch.Tensor):
    """
    前向传播用的阶跃函数：x >= 0 时输出 1，否则输出 0
    """
    return (x >= 0).float()

# =========================================================
# 1. SuperSpike 实现
# 公式: h(x) = 1 / (beta * |x| + 1)^2
# =========================================================
class SuperSpikeFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x, alpha):
        if x.requires_grad:
            ctx.save_for_backward(x)
            ctx.alpha = alpha
        return heaviside(x)

    @staticmethod
    def backward(ctx, grad_output):
        x, = ctx.saved_tensors
        alpha = ctx.alpha
        
        # 实现图片中的公式: 1 / (beta * |x| + 1)^2
        denom = (alpha * x.abs() + 1.0)
        grad_x = grad_output * (1.0 / (denom * denom))
        
        return grad_x, None

class SuperSpike(nn.Module):
    def __init__(self, alpha=100.0, spiking=True):
        """
        SuperSpike 替代梯度
        :param alpha: 对应公式中的 beta，控制梯度的陡峭程度
        """
        super().__init__()
        self.alpha = alpha
        self.spiking = spiking

    def forward(self, x):
        if self.spiking:
            return SuperSpikeFunction.apply(x, self.alpha)
        else:
            return heaviside(x)

# =========================================================
# 2. Sigmoid' (Image Version) 实现
# 公式: h(x) = s(x)(1 - s(x)), 其中 s(x) = sigmoid(beta * x)
# =========================================================
class SigmoidDerivativeFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x, alpha):
        if x.requires_grad:
            ctx.save_for_backward(x)
            ctx.alpha = alpha
        return heaviside(x)

    @staticmethod
    def backward(ctx, grad_output):
        x, = ctx.saved_tensors
        alpha = ctx.alpha
        
        # 计算 s(x) = sigmoid(beta * x)
        sigmoid_x = torch.sigmoid(alpha * x)
        
        # 实现: h(x) = s(x) * (1 - s(x))
        grad_x = grad_output * sigmoid_x * (1.0 - sigmoid_x)
        
        return grad_x, None

class SigmoidDerivative(nn.Module):
    def __init__(self, alpha=4.0, spiking=True):
        """
        Sigmoid' 替代梯度 (图片版本)
        :param alpha: 对应公式中的 beta
        """
        super().__init__()
        self.alpha = alpha
        self.spiking = spiking

    def forward(self, x):
        if self.spiking:
            return SigmoidDerivativeFunction.apply(x, self.alpha)
        return heaviside(x)

# =========================================================
# 3. Esser et al. 实现
# 公式: h(x) = max(0, 1.0 - beta * |x|)
# =========================================================
class EsserFunction(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x, alpha):
        if x.requires_grad:
            ctx.save_for_backward(x)
            ctx.alpha = alpha
        return heaviside(x)

    @staticmethod
    def backward(ctx, grad_output):
        x, = ctx.saved_tensors
        alpha = ctx.alpha
        
        # 实现图片公式: max(0, 1.0 - beta * |x|)
        grad_x = grad_output * torch.clamp(1.0 - alpha * x.abs(), min=0.0)
        
        return grad_x, None

class Esser(nn.Module):
    def __init__(self, alpha=1.0, spiking=True):
        """
        Esser et al. 替代梯度
        :param alpha: 对应公式中的 beta，通常设为 1.0 或更大
        """
        super().__init__()
        self.alpha = alpha
        self.spiking = spiking

    def forward(self, x):
        if self.spiking:
            return EsserFunction.apply(x, self.alpha)
        return heaviside(x)

In [8]:
# ----------------------------------------
# 1. 定义超参数和设置
# ----------------------------------------

T = 8             # 仿真总时长 (SNN 的关键参数)
BATCH_SIZE = 64   # 批处理大小
EPOCHS = 10       # 训练轮数 (为快速演示，设置较小)
LR = 1e-3         # 学习率
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
BETA = 10.0       # 替代梯度中的超参数, 论文中规定值

print(f"--- 实验设置 ---")
print(f"设备 (DEVICE): {DEVICE}")
print(f"仿真时长 (T): {T}")
print(f"批大小 (BATCH_SIZE): {BATCH_SIZE}")
print(f"训练轮数 (EPOCHS): {EPOCHS}")
print(f"------------------\n")

--- 实验设置 ---
设备 (DEVICE): cuda:0
仿真时长 (T): 8
批大小 (BATCH_SIZE): 64
训练轮数 (EPOCHS): 10
------------------



In [9]:
# ----------------------------------------
# 2. 加载和预处理 CIFAR10 数据集
# ----------------------------------------
print("正在加载 CIFAR10 数据集...")
# CIFAR10 图像的均值和标准差 (用于归一化)
cifar_mean = (0.4914, 0.4822, 0.4465)
cifar_std = (0.2023, 0.1994, 0.2010)

transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(), # 简单数据增强：随机翻转
    transforms.ToTensor(),
    transforms.Normalize(cifar_mean, cifar_std)
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(cifar_mean, cifar_std)
])

# 加载数据
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

print("数据集加载完毕。\n")

正在加载 CIFAR10 数据集...


100%|██████████| 170M/170M [00:05<00:00, 32.1MB/s] 



数据集加载完毕。



In [10]:
# ----------------------------------------
# 3. 定义基础的卷积 SNN 模型
# ----------------------------------------
# 使用 nn.Sequential 快速搭建一个简单的 CNN 结构
# 关键在于在激活函数的位置换上 SNN 的脉冲神经元

class BasicCSNN(nn.Module):
    # 增加 surrogate_function 参数
    def __init__(self, T, surrogate_function=surrogate.Sigmoid()):
        super().__init__()
        self.T = T  # 保存仿真时长
        print(f"Initializing Network with Surrogate: {surrogate_function.__class__.__name__}")

        # 定义网络结构
        # 结构：[卷积 -> 脉冲 -> 池化] x 2 -> [展平 -> 全连接 -> 脉冲] -> [全连接]
        self.net = nn.Sequential(
            # 块 1
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            # --- 核心：使用 LIF 神经元 ---
            # 激活驱动:LIFNode 在前向传播时模拟 LIF 神经元动力学，在反向传播时，SpikingJelly 会自动使用“替代梯度”进行计算。
            neuron.LIFNode(surrogate_function=surrogate_function),
            nn.MaxPool2d(2),  # 32x32 -> 16x16

            # 块 2
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            neuron.LIFNode(surrogate_function=surrogate_function),
            nn.MaxPool2d(2),  # 16x16 -> 8x8

            # 展平
            nn.Flatten(),

            # 全连接层 1
            nn.Linear(64 * 8 * 8, 128), # 64 * 8 * 8 = 4096
            neuron.LIFNode(surrogate_function=surrogate_function),

            # 输出层 (全连接层 2)
            # 输出层通常不使用脉冲神经元，而是直接输出膜电位或累积电流
            # 这样可以方便地与交叉熵损失配合使用
            nn.Linear(128, 10) # 10个类别
        )

    def forward(self, x):
        # --- SNN 算法思路的核心 ---
        # SNN 神经元是有状态的（例如膜电位 V），在处理一个新样本前必须重置
        # 1. 重置网络中所有神经元的状态
        functional.reset_net(self)

        # 准备一个列表来收集 T 个时间步的输出
        # (T, N, C)，T=时间步, N=BatchSize, C=类别数
        outputs_over_time = []

        # 2. SNN 的时间步循环
        # 对于静态图像 (如CIFAR10)，我们在 T 个时间步内输入 *相同* 的图像 x
        # 神经元会在这 T 步内不断累积输入并发放脉冲
        for t in range(self.T):
            # 运行一步前向传播
            out_t = self.net(x)
            outputs_over_time.append(out_t)

        # 3. 聚合 T 个时间步的输出
        # (T, N, 10) -> (T, N, 10)
        outputs_stack = torch.stack(outputs_over_time)
        
        # 4. 解码：计算 T 步内的平均输出
        # (T, N, 10) -> (N, 10)
        # 我们取所有时间步输出的平均值，作为最终的分类 "logits"
        # 这是一种常见的 SNN 解码方式（Rate Coding / Mean Output）
        return outputs_stack.mean(dim=0)

In [None]:
# ----------------------------------------
# 4. 准备实验配置 (Optuna 版本)
# ----------------------------------------

# 定义要对比的替代梯度名称
surrogate_types = ["SuperSpike", "Sigmoid", "Esser"]

# 辅助函数：根据名称和 alpha 创建 surrogate 实例
def get_surrogate_func(name, alpha):
    if name == "SuperSpike":
        return SuperSpike(alpha=alpha)
    elif name == "Sigmoid":
        return SigmoidDerivative(alpha=alpha)
    elif name == "Esser":
        return Esser(alpha=alpha)
    raise ValueError(f"Unknown surrogate type: {name}")

# 定义损失函数 (全局使用)
criterion = nn.CrossEntropyLoss()

实验配置已更新为 Optuna 搜索模式。


In [15]:
# ----------------------------------------
# 5. 训练和评估循环
# ----------------------------------------

# --- 训练函数 (Train Loop) ---
def train_epoch(model, optimizer, epoch, model_name):
    model.train()  # 设置为训练模式
    total_loss = 0.0
    correct = 0
    total = 0
    start_time = time.time()

    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)

        optimizer.zero_grad()
        outputs = model(inputs) # 这里的 model 是参数传进来的
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()


        # 统计损失和准确率
        total_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()


    end_time = time.time()
    avg_loss = total_loss / len(train_loader)
    acc = 100. * correct / total
    print(f"[{model_name}] Epoch {epoch+1} Train | Loss: {avg_loss:.4f} | Acc: {acc:.2f}% | Time: {end_time - start_time:.2f}s")
    return avg_loss, acc

# --- 评估函数 (Eval Loop) ---
def test_epoch(model, epoch, model_name):
    model.eval()  # 设置为评估模式
    total_loss = 0.0
    correct = 0
    total = 0

    # 评估时不需要计算梯度
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)

            # 前向传播
            outputs = model(inputs)

            # 计算损失
            loss = criterion(outputs, targets)
            total_loss += loss.item()

            # 统计准确率
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
            
    acc = 100. * correct / total
    avg_loss = total_loss / len(test_loader)
    print(f"[{model_name}] Epoch {epoch+1} Test  | Loss: {avg_loss:.4f} | Acc: {acc:.2f}%")
    return avg_loss, acc

In [16]:
# ----------------------------------------
# 6. 开始 Optuna 超参数搜索
# ----------------------------------------
print(f"=== 开始对比实验 (总 Epochs: {EPOCHS}) ===\n")

def run_optuna_search(surrogate_name, n_trials=10):
    print(f"\n>>> 开始搜索 {surrogate_name} 的最优 alpha (Trials: {n_trials}) ...")
    
    def objective(trial):
        # 1. 采样超参数 alpha (搜索空间 0.5 - 20)
        alpha = trial.suggest_float("alpha", 0.5, 20.0)
        
        # 2. 构建模型和优化器
        surr_func = get_surrogate_func(surrogate_name, alpha)
        model = BasicCSNN(T=T, surrogate_function=surr_func).to(DEVICE)
        optimizer = optim.Adam(model.parameters(), lr=LR)
        
        # 3. 训练循环
        best_acc = 0.0
        
        for epoch in range(EPOCHS):
            # 这里的 model_name 用于打印日志
            trial_name = f"{surrogate_name}_alpha={alpha:.2f}"
            
            # 训练
            train_loss, train_acc = train_epoch(model, optimizer, epoch, trial_name)
            
            # 测试
            test_loss, test_acc = test_epoch(model, epoch, trial_name)
            
            if test_acc > best_acc:
                best_acc = test_acc
            
            # Optuna Pruning: 如果当前 trial 表现不好，提前终止
            trial.report(test_acc, epoch)
            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()
                
        return best_acc

    # 创建 Study
    # direction="maximize" 因为我们要最大化准确率
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=n_trials)
    
    print(f"[{surrogate_name}] 搜索结束。")
    print(f"  Best Alpha: {study.best_params['alpha']:.4f}")
    print(f"  Best Acc: {study.best_value:.2f}%")
    return study

# 运行所有搜索
studies = {}
# 建议 trial 次数设为 10-20 次，根据计算资源调整
N_TRIALS = 10 

for name in surrogate_types:
    studies[name] = run_optuna_search(name, n_trials=N_TRIALS)

# ----------------------------------------
# 7. 最终总结
# ----------------------------------------
print(f"\n=== 所有实验完成，最终结果汇总 ===")
for name, study in studies.items():
    print(f"模型: {name:<15} | 最优 Alpha: {study.best_params['alpha']:.4f} | 最佳 Test Acc: {study.best_value:.2f}%")


[I 2025-12-08 13:45:40,599] A new study created in memory with name: no-name-e9a68d55-b097-41c1-92d0-06e5cefa2a4b


=== 开始对比实验 (总 Epochs: 10) ===


>>> 开始搜索 SuperSpike 的最优 alpha (Trials: 10) ...
Initializing Network with Surrogate: SuperSpike


[SuperSpike_alpha=6.52] Epoch 1 Train | Loss: 1.5638 | Acc: 43.48% | Time: 34.65s
[SuperSpike_alpha=6.52] Epoch 1 Test  | Loss: 1.2688 | Acc: 55.07%
[SuperSpike_alpha=6.52] Epoch 1 Test  | Loss: 1.2688 | Acc: 55.07%
[SuperSpike_alpha=6.52] Epoch 2 Train | Loss: 1.1794 | Acc: 58.07% | Time: 33.41s
[SuperSpike_alpha=6.52] Epoch 2 Train | Loss: 1.1794 | Acc: 58.07% | Time: 33.41s
[SuperSpike_alpha=6.52] Epoch 2 Test  | Loss: 1.1129 | Acc: 60.03%
[SuperSpike_alpha=6.52] Epoch 2 Test  | Loss: 1.1129 | Acc: 60.03%
[SuperSpike_alpha=6.52] Epoch 3 Train | Loss: 1.0209 | Acc: 63.89% | Time: 33.31s
[SuperSpike_alpha=6.52] Epoch 3 Train | Loss: 1.0209 | Acc: 63.89% | Time: 33.31s
[SuperSpike_alpha=6.52] Epoch 3 Test  | Loss: 0.9813 | Acc: 64.96%
[SuperSpike_alpha=6.52] Epoch 3 Test  | Loss: 0.9813 | Acc: 64.96%
[SuperSpike_alpha=6.52] Epoch 4 Train | Loss: 0.9114 | Acc: 68.15% | Time: 33.11s
[SuperSpike_alpha=6.52] Epoch 4 Train | Loss: 0.9114 | Acc: 68.15% | Time: 33.11s
[SuperSpike_alpha=6.52] 

[I 2025-12-08 13:51:51,334] Trial 0 finished with value: 72.29 and parameters: {'alpha': 6.523310167940246}. Best is trial 0 with value: 72.29.


[SuperSpike_alpha=6.52] Epoch 10 Test  | Loss: 0.8286 | Acc: 71.50%
Initializing Network with Surrogate: SuperSpike
[SuperSpike_alpha=15.21] Epoch 1 Train | Loss: 1.6025 | Acc: 42.12% | Time: 33.65s
[SuperSpike_alpha=15.21] Epoch 1 Train | Loss: 1.6025 | Acc: 42.12% | Time: 33.65s
[SuperSpike_alpha=15.21] Epoch 1 Test  | Loss: 1.3072 | Acc: 53.06%
[SuperSpike_alpha=15.21] Epoch 1 Test  | Loss: 1.3072 | Acc: 53.06%
[SuperSpike_alpha=15.21] Epoch 2 Train | Loss: 1.2141 | Acc: 56.57% | Time: 33.61s
[SuperSpike_alpha=15.21] Epoch 2 Train | Loss: 1.2141 | Acc: 56.57% | Time: 33.61s
[SuperSpike_alpha=15.21] Epoch 2 Test  | Loss: 1.1002 | Acc: 60.52%
[SuperSpike_alpha=15.21] Epoch 2 Test  | Loss: 1.1002 | Acc: 60.52%
[SuperSpike_alpha=15.21] Epoch 3 Train | Loss: 1.0424 | Acc: 63.11% | Time: 33.77s
[SuperSpike_alpha=15.21] Epoch 3 Train | Loss: 1.0424 | Acc: 63.11% | Time: 33.77s
[SuperSpike_alpha=15.21] Epoch 3 Test  | Loss: 1.0061 | Acc: 64.30%
[SuperSpike_alpha=15.21] Epoch 3 Test  | Loss:

[I 2025-12-08 13:58:04,249] Trial 1 finished with value: 72.1 and parameters: {'alpha': 15.209298042081468}. Best is trial 0 with value: 72.29.


[SuperSpike_alpha=15.21] Epoch 10 Test  | Loss: 0.8162 | Acc: 72.10%
Initializing Network with Surrogate: SuperSpike
[SuperSpike_alpha=4.67] Epoch 1 Train | Loss: 1.5721 | Acc: 43.02% | Time: 34.04s
[SuperSpike_alpha=4.67] Epoch 1 Train | Loss: 1.5721 | Acc: 43.02% | Time: 34.04s
[SuperSpike_alpha=4.67] Epoch 1 Test  | Loss: 1.3004 | Acc: 53.19%
[SuperSpike_alpha=4.67] Epoch 1 Test  | Loss: 1.3004 | Acc: 53.19%
[SuperSpike_alpha=4.67] Epoch 2 Train | Loss: 1.1885 | Acc: 57.76% | Time: 34.02s
[SuperSpike_alpha=4.67] Epoch 2 Train | Loss: 1.1885 | Acc: 57.76% | Time: 34.02s
[SuperSpike_alpha=4.67] Epoch 2 Test  | Loss: 1.0914 | Acc: 61.41%
[SuperSpike_alpha=4.67] Epoch 2 Test  | Loss: 1.0914 | Acc: 61.41%
[SuperSpike_alpha=4.67] Epoch 3 Train | Loss: 1.0196 | Acc: 63.92% | Time: 34.11s
[SuperSpike_alpha=4.67] Epoch 3 Train | Loss: 1.0196 | Acc: 63.92% | Time: 34.11s
[SuperSpike_alpha=4.67] Epoch 3 Test  | Loss: 0.9917 | Acc: 65.03%
[SuperSpike_alpha=4.67] Epoch 3 Test  | Loss: 0.9917 | A

[I 2025-12-08 14:04:16,540] Trial 2 finished with value: 72.28 and parameters: {'alpha': 4.665509544634646}. Best is trial 0 with value: 72.29.


[SuperSpike_alpha=4.67] Epoch 10 Test  | Loss: 0.8075 | Acc: 72.28%
Initializing Network with Surrogate: SuperSpike
[SuperSpike_alpha=10.95] Epoch 1 Train | Loss: 1.6272 | Acc: 41.40% | Time: 33.83s
[SuperSpike_alpha=10.95] Epoch 1 Train | Loss: 1.6272 | Acc: 41.40% | Time: 33.83s
[SuperSpike_alpha=10.95] Epoch 1 Test  | Loss: 1.3093 | Acc: 52.95%
[SuperSpike_alpha=10.95] Epoch 1 Test  | Loss: 1.3093 | Acc: 52.95%
[SuperSpike_alpha=10.95] Epoch 2 Train | Loss: 1.2227 | Acc: 55.97% | Time: 33.88s
[SuperSpike_alpha=10.95] Epoch 2 Train | Loss: 1.2227 | Acc: 55.97% | Time: 33.88s
[SuperSpike_alpha=10.95] Epoch 2 Test  | Loss: 1.1452 | Acc: 59.10%
[SuperSpike_alpha=10.95] Epoch 2 Test  | Loss: 1.1452 | Acc: 59.10%
[SuperSpike_alpha=10.95] Epoch 3 Train | Loss: 1.0758 | Acc: 61.74% | Time: 33.86s
[SuperSpike_alpha=10.95] Epoch 3 Train | Loss: 1.0758 | Acc: 61.74% | Time: 33.86s
[SuperSpike_alpha=10.95] Epoch 3 Test  | Loss: 1.0424 | Acc: 63.42%
[SuperSpike_alpha=10.95] Epoch 3 Test  | Loss:

[I 2025-12-08 14:10:28,133] Trial 3 finished with value: 70.85 and parameters: {'alpha': 10.954255321610162}. Best is trial 0 with value: 72.29.


[SuperSpike_alpha=10.95] Epoch 10 Test  | Loss: 0.8303 | Acc: 70.85%
Initializing Network with Surrogate: SuperSpike
[SuperSpike_alpha=16.52] Epoch 1 Train | Loss: 1.5931 | Acc: 42.57% | Time: 33.94s
[SuperSpike_alpha=16.52] Epoch 1 Train | Loss: 1.5931 | Acc: 42.57% | Time: 33.94s
[SuperSpike_alpha=16.52] Epoch 1 Test  | Loss: 1.2678 | Acc: 54.66%
[SuperSpike_alpha=16.52] Epoch 1 Test  | Loss: 1.2678 | Acc: 54.66%
[SuperSpike_alpha=16.52] Epoch 2 Train | Loss: 1.1743 | Acc: 58.41% | Time: 34.03s
[SuperSpike_alpha=16.52] Epoch 2 Train | Loss: 1.1743 | Acc: 58.41% | Time: 34.03s
[SuperSpike_alpha=16.52] Epoch 2 Test  | Loss: 1.0749 | Acc: 61.52%
[SuperSpike_alpha=16.52] Epoch 2 Test  | Loss: 1.0749 | Acc: 61.52%
[SuperSpike_alpha=16.52] Epoch 3 Train | Loss: 1.0227 | Acc: 63.90% | Time: 34.09s
[SuperSpike_alpha=16.52] Epoch 3 Train | Loss: 1.0227 | Acc: 63.90% | Time: 34.09s
[SuperSpike_alpha=16.52] Epoch 3 Test  | Loss: 0.9950 | Acc: 65.21%
[SuperSpike_alpha=16.52] Epoch 3 Test  | Loss

[I 2025-12-08 14:16:43,116] Trial 4 finished with value: 71.63 and parameters: {'alpha': 16.517890241760902}. Best is trial 0 with value: 72.29.


[SuperSpike_alpha=16.52] Epoch 10 Test  | Loss: 0.8277 | Acc: 71.23%
Initializing Network with Surrogate: SuperSpike
[SuperSpike_alpha=10.77] Epoch 1 Train | Loss: 1.6024 | Acc: 41.93% | Time: 34.24s
[SuperSpike_alpha=10.77] Epoch 1 Train | Loss: 1.6024 | Acc: 41.93% | Time: 34.24s


[I 2025-12-08 14:17:20,637] Trial 5 pruned. 


[SuperSpike_alpha=10.77] Epoch 1 Test  | Loss: 1.3224 | Acc: 52.72%
Initializing Network with Surrogate: SuperSpike
[SuperSpike_alpha=5.30] Epoch 1 Train | Loss: 1.5474 | Acc: 44.13% | Time: 34.28s
[SuperSpike_alpha=5.30] Epoch 1 Train | Loss: 1.5474 | Acc: 44.13% | Time: 34.28s
[SuperSpike_alpha=5.30] Epoch 1 Test  | Loss: 1.2525 | Acc: 54.37%
[SuperSpike_alpha=5.30] Epoch 1 Test  | Loss: 1.2525 | Acc: 54.37%
[SuperSpike_alpha=5.30] Epoch 2 Train | Loss: 1.1527 | Acc: 59.11% | Time: 34.22s
[SuperSpike_alpha=5.30] Epoch 2 Train | Loss: 1.1527 | Acc: 59.11% | Time: 34.22s
[SuperSpike_alpha=5.30] Epoch 2 Test  | Loss: 1.0673 | Acc: 61.75%
[SuperSpike_alpha=5.30] Epoch 2 Test  | Loss: 1.0673 | Acc: 61.75%
[SuperSpike_alpha=5.30] Epoch 3 Train | Loss: 0.9924 | Acc: 65.17% | Time: 33.96s
[SuperSpike_alpha=5.30] Epoch 3 Train | Loss: 0.9924 | Acc: 65.17% | Time: 33.96s
[SuperSpike_alpha=5.30] Epoch 3 Test  | Loss: 0.9576 | Acc: 66.17%
[SuperSpike_alpha=5.30] Epoch 3 Test  | Loss: 0.9576 | Ac

[I 2025-12-08 14:23:37,442] Trial 6 finished with value: 71.93 and parameters: {'alpha': 5.2976422955482825}. Best is trial 0 with value: 72.29.


[SuperSpike_alpha=5.30] Epoch 10 Test  | Loss: 0.8249 | Acc: 71.78%
Initializing Network with Surrogate: SuperSpike
[SuperSpike_alpha=2.08] Epoch 1 Train | Loss: 1.5897 | Acc: 42.56% | Time: 33.35s
[SuperSpike_alpha=2.08] Epoch 1 Train | Loss: 1.5897 | Acc: 42.56% | Time: 33.35s
[SuperSpike_alpha=2.08] Epoch 1 Test  | Loss: 1.2881 | Acc: 53.85%
[SuperSpike_alpha=2.08] Epoch 1 Test  | Loss: 1.2881 | Acc: 53.85%
[SuperSpike_alpha=2.08] Epoch 2 Train | Loss: 1.1969 | Acc: 57.50% | Time: 33.29s
[SuperSpike_alpha=2.08] Epoch 2 Train | Loss: 1.1969 | Acc: 57.50% | Time: 33.29s


[I 2025-12-08 14:24:51,749] Trial 7 pruned. 


[SuperSpike_alpha=2.08] Epoch 2 Test  | Loss: 1.1351 | Acc: 59.59%
Initializing Network with Surrogate: SuperSpike
[SuperSpike_alpha=11.36] Epoch 1 Train | Loss: 1.6061 | Acc: 41.74% | Time: 33.30s
[SuperSpike_alpha=11.36] Epoch 1 Train | Loss: 1.6061 | Acc: 41.74% | Time: 33.30s


[I 2025-12-08 14:25:28,877] Trial 8 pruned. 


[SuperSpike_alpha=11.36] Epoch 1 Test  | Loss: 1.2949 | Acc: 52.84%
Initializing Network with Surrogate: SuperSpike
[SuperSpike_alpha=2.92] Epoch 1 Train | Loss: 1.5639 | Acc: 43.50% | Time: 33.26s
[SuperSpike_alpha=2.92] Epoch 1 Train | Loss: 1.5639 | Acc: 43.50% | Time: 33.26s


[I 2025-12-08 14:26:06,130] Trial 9 pruned. 
[I 2025-12-08 14:26:06,132] A new study created in memory with name: no-name-4c90ebe9-824a-456e-b8fe-5635c0d90662
[I 2025-12-08 14:26:06,132] A new study created in memory with name: no-name-4c90ebe9-824a-456e-b8fe-5635c0d90662


[SuperSpike_alpha=2.92] Epoch 1 Test  | Loss: 1.2895 | Acc: 53.73%
[SuperSpike] 搜索结束。
  Best Alpha: 6.5233
  Best Acc: 72.29%

>>> 开始搜索 Sigmoid 的最优 alpha (Trials: 10) ...
Initializing Network with Surrogate: SigmoidDerivative


: 