In [9]:
from joblib import load
import pandas as pd
import numpy as np
import os
from  data_utilities import *
import cv2  # OpenCV 用于调整图像大小和颜色处理
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import gc  # 引入垃圾回收模块
from tqdm.auto import tqdm  # 自动适配环境 导入tqdm进度条库
from collections import defaultdict

dataset_name = 'ManySig'
dataset_path='../ManySig.pkl/'

compact_dataset = load_compact_pkl_dataset(dataset_path,dataset_name)

print("数据集发射机数量：",len(compact_dataset['tx_list']),"具体为：",compact_dataset['tx_list'])
print("数据集接收机数量：",len(compact_dataset['rx_list']),"具体为：",compact_dataset['rx_list'])
print("数据集采集天数：",len(compact_dataset['capture_date_list']),"具体为：",compact_dataset['capture_date_list'])


tx_list = compact_dataset['tx_list']
rx_list = compact_dataset['rx_list']
equalized = 0
capture_date_list = compact_dataset['capture_date_list']


n_tx = len(tx_list)
n_rx = len(rx_list)
print(n_tx,n_rx)


train_dates = ['2021_03_01', '2021_03_08', '2021_03_15']  # 设定你想用的训练日期
X_train, y_train, X_test, y_test = preprocess_dataset_for_classification(
    compact_dataset, tx_list, rx_list, train_dates, max_sig=None, equalized = equalized)

print("X_train shape:", X_train.shape)  # (num_blocks, 256, 250, 2)
print("y_train shape:", y_train.shape)


数据集发射机数量： 6 具体为： ['14-10', '14-7', '20-15', '20-19', '6-15', '8-20']
数据集接收机数量： 12 具体为： ['1-1', '1-19', '14-7', '18-2', '19-2', '2-1', '2-19', '20-1', '3-19', '7-14', '7-7', '8-8']
数据集采集天数： 4 具体为： ['2021_03_01', '2021_03_08', '2021_03_15', '2021_03_23']
6 12
✅ 训练样本数: 216000, 测试样本数: 72000
X_train shape: (216000, 256, 2)
y_train shape: (216000,)


In [10]:
import numpy as np

# === 参数设置 ===
SNR_dB = 10           # 信噪比
fs = 20e6             # 采样率 (Hz)
fc = 2.4e9            # 载波频率 (Hz)
v = 120               # 速度 (m/s)

# === 多普勒频移计算 ===
def compute_doppler_shift(v, fc):
    c = 3e8  # 光速
    return (v / c) * fc

fd = compute_doppler_shift(v, fc)
print(f"[INFO] 多普勒频移 fd = {fd:.2f} Hz")

# === 多普勒变换 ===
def add_doppler_shift(signal, fd, fs):
    num_samples = signal.shape[-1]
    t = np.arange(num_samples) / fs
    doppler_phase = np.exp(1j * 2 * np.pi * fd * t)
    return signal * doppler_phase

# === 加噪声 + 多普勒 的主流程 ===
def preprocess_iq_data(data_real_imag, snr_db, fd, fs):
    # Step 1: 转为复数 IQ，shape: (N, T, 2) → (N, T)
    data_complex = data_real_imag[..., 0] + 1j * data_real_imag[..., 1]

    processed = []
    for sig in data_complex:
        # Step 2: 添加 AWGN 噪声
        signal_std = np.std(sig)
        noise_std = signal_std / (10 ** (snr_db / 20))
        noise = np.random.normal(0, noise_std, sig.shape) + 1j * np.random.normal(0, noise_std, sig.shape)
        noisy = sig + noise

        # Step 3: 添加多普勒频移
        shifted = add_doppler_shift(noisy, fd, fs)

        processed.append(shifted)

    processed = np.array(processed)  # shape: (N, T), complex

    # Step 4: 转回 [I, Q] 实数格式
    processed_real_imag = np.stack([processed.real, processed.imag], axis=-1)  # shape: (N, T, 2)

    return processed_real_imag


X_train_processed = preprocess_iq_data(X_train, snr_db=SNR_dB, fd=fd, fs=fs)
X_test_processed  = preprocess_iq_data(X_test,  snr_db=SNR_dB, fd=fd, fs=fs)

# 查看处理前后前10个点
print("原始信号 I 分量：", X_train[0, :10, 0])
print("处理后信号 I 分量：", X_train_processed[0, :10, 0])


[INFO] 多普勒频移 fd = 960.00 Hz
原始信号 I 分量： [ 0.00445568 -0.00589004 -0.0109866   0.01052883  0.01196319 -0.00201421
 -0.00106814 -0.00387583 -0.01409948  0.000824  ]
处理后信号 I 分量： [ 0.00218009 -0.00354658 -0.01052414  0.00091308  0.02281909 -0.00102891
 -0.00206321 -0.00131955 -0.01368751 -0.00507219]


In [11]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix
import seaborn as sns
from torch.utils.data import DataLoader, TensorDataset, Subset
from datetime import datetime
from tqdm import tqdm
from sklearn.model_selection import KFold
from torch.nn import TransformerEncoder, TransformerEncoderLayer

# 假设 SNR_dB 和 fd 已经定义
SNR_dB = globals().get('SNR_dB', 'no')
fd = globals().get('fd', 'no')

# === 模型与训练参数设置 ===
raw_input_dim = 2         # 每个时间步是 I/Q 两个值
model_dim = 128           # Transformer 模型内部维度
num_heads = 4
num_layers = 3
num_classes = len(np.unique(y_train))  # 或 len(tx_list)
dropout = 0.1
batch_size = 512
num_epochs = 200
learning_rate = 1e-4
patience = 5

# === 创建保存目录 ===
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
script_name = "wisig_time"
folder_name = f"{timestamp}_{script_name}_SNR{SNR_dB}dB_fd{fd}_classes_{num_classes}_Transformer"
save_folder = os.path.join(os.getcwd(), "training_results", folder_name)
os.makedirs(save_folder, exist_ok=True)

results_file = os.path.join(save_folder, "results.txt")
with open(results_file, "w") as f:
    f.write(f"=== Experiment Summary ===\n")
    f.write(f"Timestamp: {timestamp}\n")
    f.write(f"Total Classes: {num_classes}\n")
    f.write(f"SNR: {SNR_dB} dB\n")
    f.write(f"fd (Doppler shift): {fd} Hz\n")
    f.write(f"equalized: {equalized}\n")

# === 模型定义 ===
class SignalTransformer(nn.Module):
    def __init__(self, raw_input_dim, model_dim, num_heads, num_layers, num_classes, dropout=0.1):
        super(SignalTransformer, self).__init__()
        self.embedding = nn.Linear(raw_input_dim, model_dim)
        encoder_layer = TransformerEncoderLayer(d_model=model_dim, nhead=num_heads, dropout=dropout, batch_first=True)
        self.encoder = TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(model_dim, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = self.encoder(x)
        x = x[:, -1, :]
        x = self.fc(x)
        return x


# === 假设 X_train, y_train, X_test, y_test 都已定义并 shape 为 (N, L, 2) ===
# 若还未定义，可自行加载并 reshape
X_test_tensor = torch.tensor(X_test_processed, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

train_dataset = TensorDataset(torch.tensor(X_train_processed, dtype=torch.float32),
                               torch.tensor(y_train, dtype=torch.long))

# === K折交叉验证训练 ===
n_splits = 5
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
fold_results = []
test_results = []

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def compute_grad_norm(model):
    total_norm = 0.0
    for p in model.parameters():
        if p.grad is not None:
            param_norm = p.grad.data.norm(2)
            total_norm += param_norm.item() ** 2
    return total_norm ** 0.5

def moving_average(x, w=5):
    return np.convolve(x, np.ones(w), 'valid') / w

avg_grad_norms_per_fold = []

for fold, (train_idx, val_idx) in enumerate(kfold.split(train_dataset)):
    print(f"\n====== Fold {fold+1}/{n_splits} ======")

    train_subset = Subset(train_dataset, train_idx)
    val_subset = Subset(train_dataset, val_idx)

    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, drop_last=True)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, drop_last=True)

    model = SignalTransformer(raw_input_dim, model_dim, num_heads, num_layers, num_classes, dropout).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []
    grad_norms = []

    best_val_loss = float('inf')
    patience_counter = 0

    for epoch in range(num_epochs):
        model.train()
        running_train_loss, correct_train, total_train = 0.0, 0, 0
        batch_grad_norms = []

        with tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch") as tepoch:
            for inputs, labels in tepoch:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()

                grad_norm = compute_grad_norm(model)
                batch_grad_norms.append(grad_norm)

                optimizer.step()

                running_train_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total_train += labels.size(0)
                correct_train += (predicted == labels).sum().item()

                tepoch.set_postfix(loss=running_train_loss / (len(train_loader)),
                                   accuracy=100 * correct_train / total_train,
                                   grad_norm=grad_norm)

        epoch_train_loss = running_train_loss / len(train_loader)
        train_losses.append(epoch_train_loss)
        train_accuracies.append(100 * correct_train / total_train)
        avg_grad_norm = np.mean(batch_grad_norms)
        grad_norms.append(avg_grad_norm)

        print(f"Epoch {epoch+1} Average Gradient Norm: {avg_grad_norm:.4f}")

        # === 验证 ===
        model.eval()
        running_val_loss, correct_val, total_val = 0.0, 0, 0

        with torch.no_grad():
            for val_inputs, val_labels in val_loader:
                val_inputs = val_inputs.to(device)
                val_labels = val_labels.to(device)

                val_outputs = model(val_inputs)
                val_loss = criterion(val_outputs, val_labels)
                running_val_loss += val_loss.item()
                _, val_predicted = torch.max(val_outputs, 1)
                total_val += val_labels.size(0)
                correct_val += (val_predicted == val_labels).sum().item()

        epoch_val_loss = running_val_loss / len(val_loader)
        val_losses.append(epoch_val_loss)
        val_accuracies.append(100 * correct_val / total_val)

        with open(results_file, "a") as f:
            f.write(f"Epoch {epoch+1} | Train Acc: {train_accuracies[-1]:.2f}% | Val Acc: {val_accuracies[-1]:.2f}%\n")

        if epoch_val_loss < best_val_loss:
            best_val_loss = epoch_val_loss
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print("Early stopping")
            break

        scheduler.step()

    fold_results.append(max(val_accuracies))
    avg_grad_norms_per_fold.append(grad_norms)

    # === 绘制 loss 曲线 ===
    plt.figure()
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.plot(moving_average(train_losses), label='Train Loss (Smooth)', linestyle='--')
    plt.plot(moving_average(val_losses), label='Val Loss (Smooth)', linestyle='--')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title(f'Fold {fold+1} Loss Curve')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(save_folder, f"fold_{fold+1}_loss_curve.png"))
    plt.close()

    # === 绘制 Gradient Norm 曲线 ===
    plt.figure()
    plt.plot(grad_norms, label='Gradient Norm')
    plt.xlabel('Epoch')
    plt.ylabel('Gradient Norm')
    plt.title(f'Fold {fold+1} Gradient Norm')
    plt.grid(True)
    plt.legend()
    plt.savefig(os.path.join(save_folder, f"fold_{fold+1}_grad_norm.png"))
    plt.close()

    # === 测试集评估 ===
    model.eval()
    test_preds, test_true = [], []

    with torch.no_grad():
        for test_inputs, test_labels in test_loader:
            test_inputs = test_inputs.to(device)
            test_labels = test_labels.to(device)

            test_outputs = model(test_inputs)
            _, predicted = torch.max(test_outputs, 1)
            test_preds.extend(predicted.cpu().numpy())
            test_true.extend(test_labels.cpu().numpy())

    test_preds = np.array(test_preds)
    test_true = np.array(test_true)
    test_accuracy = 100.0 * np.sum(test_preds == test_true) / len(test_true)
    test_results.append(test_accuracy)

    with open(results_file, "a") as f:
        f.write(f"Fold {fold+1} Test Accuracy: {test_accuracy:.2f}%\n")

    cm = confusion_matrix(test_true, test_preds)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'Test Confusion Matrix Fold {fold+1}')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.savefig(os.path.join(save_folder, f"fold_{fold+1}_test_confusion_matrix.png"))
    plt.close()

# === 总结结果 ===
avg_val = np.mean(fold_results)
avg_test = np.mean(test_results)

with open(results_file, "a") as f:
    f.write("\n=== Summary ===\n")
    for i in range(n_splits):
        f.write(f"Fold {i+1}: Val Acc = {fold_results[i]:.2f}%, Test Acc = {test_results[i]:.2f}%\n")
    f.write(f"\nAverage Validation Accuracy: {avg_val:.2f}%\n")
    f.write(f"Average Test Accuracy: {avg_test:.2f}%\n")

print("\n=== Final Summary ===")
for i in range(n_splits):
    print(f"Fold {i+1}: Val = {fold_results[i]:.2f}%, Test = {test_results[i]:.2f}%")
print(f"Average Val Accuracy: {avg_val:.2f}%")
print(f"Average Test Accuracy: {avg_test:.2f}%")


Using device: cuda



Epoch 1/100: 100%|██████████| 337/337 [01:09<00:00,  4.87batch/s, accuracy=21.3, grad_norm=4.51, loss=1.74]  


Epoch 1 Average Gradient Norm: 1.6552


Epoch 2/100: 100%|██████████| 337/337 [01:02<00:00,  5.43batch/s, accuracy=31.1, grad_norm=3.32, loss=1.59]  


Epoch 2 Average Gradient Norm: 4.5905


Epoch 3/100: 100%|██████████| 337/337 [01:08<00:00,  4.92batch/s, accuracy=35.6, grad_norm=9.29, loss=1.51]  


Epoch 3 Average Gradient Norm: 5.7755


Epoch 4/100: 100%|██████████| 337/337 [01:12<00:00,  4.63batch/s, accuracy=40, grad_norm=11.9, loss=1.43]   


Epoch 4 Average Gradient Norm: 8.4100


Epoch 5/100: 100%|██████████| 337/337 [01:16<00:00,  4.43batch/s, accuracy=43.4, grad_norm=9.18, loss=1.35] 


Epoch 5 Average Gradient Norm: 9.9745


Epoch 6/100: 100%|██████████| 337/337 [01:16<00:00,  4.43batch/s, accuracy=46.2, grad_norm=12.7, loss=1.28] 


Epoch 6 Average Gradient Norm: 12.4596


Epoch 7/100: 100%|██████████| 337/337 [01:16<00:00,  4.39batch/s, accuracy=48.5, grad_norm=4.82, loss=1.23] 


Epoch 7 Average Gradient Norm: 14.7133


Epoch 8/100: 100%|██████████| 337/337 [01:17<00:00,  4.34batch/s, accuracy=50.2, grad_norm=6.29, loss=1.19] 


Epoch 8 Average Gradient Norm: 14.3769


Epoch 9/100: 100%|██████████| 337/337 [01:18<00:00,  4.30batch/s, accuracy=51.7, grad_norm=5.46, loss=1.16] 


Epoch 9 Average Gradient Norm: 15.6338


Epoch 10/100: 100%|██████████| 337/337 [01:16<00:00,  4.41batch/s, accuracy=53.1, grad_norm=13, loss=1.13]   


Epoch 10 Average Gradient Norm: 17.1111


Epoch 11/100: 100%|██████████| 337/337 [01:16<00:00,  4.40batch/s, accuracy=55.2, grad_norm=15.8, loss=1.08] 


Epoch 11 Average Gradient Norm: 12.7828


Epoch 12/100: 100%|██████████| 337/337 [01:17<00:00,  4.34batch/s, accuracy=55.8, grad_norm=18.2, loss=1.07] 


Epoch 12 Average Gradient Norm: 17.4305


Epoch 13/100: 100%|██████████| 337/337 [01:17<00:00,  4.34batch/s, accuracy=56.6, grad_norm=17.9, loss=1.05] 


Epoch 13 Average Gradient Norm: 16.8391


Epoch 14/100: 100%|██████████| 337/337 [01:17<00:00,  4.36batch/s, accuracy=57.1, grad_norm=24.3, loss=1.05] 


Epoch 14 Average Gradient Norm: 19.2081


Epoch 15/100: 100%|██████████| 337/337 [01:18<00:00,  4.27batch/s, accuracy=57.9, grad_norm=6.86, loss=1.03] 


Epoch 15 Average Gradient Norm: 16.3101


Epoch 16/100: 100%|██████████| 337/337 [01:24<00:00,  3.98batch/s, accuracy=58.1, grad_norm=12.2, loss=1.02] 


Epoch 16 Average Gradient Norm: 18.4055


Epoch 17/100: 100%|██████████| 337/337 [01:24<00:00,  3.97batch/s, accuracy=58.5, grad_norm=22.1, loss=1.01] 


Epoch 17 Average Gradient Norm: 18.1518


Epoch 18/100: 100%|██████████| 337/337 [01:25<00:00,  3.95batch/s, accuracy=58.9, grad_norm=23.5, loss=1]    


Epoch 18 Average Gradient Norm: 18.4756


Epoch 19/100: 100%|██████████| 337/337 [01:24<00:00,  3.97batch/s, accuracy=59.2, grad_norm=9.54, loss=0.998]


Epoch 19 Average Gradient Norm: 19.1551


Epoch 20/100: 100%|██████████| 337/337 [01:21<00:00,  4.14batch/s, accuracy=59.4, grad_norm=25.4, loss=0.992]


Epoch 20 Average Gradient Norm: 20.1902


Epoch 21/100: 100%|██████████| 337/337 [01:22<00:00,  4.11batch/s, accuracy=60.4, grad_norm=14.4, loss=0.973]


Epoch 21 Average Gradient Norm: 14.0070


Epoch 22/100: 100%|██████████| 337/337 [01:20<00:00,  4.16batch/s, accuracy=60.6, grad_norm=24.7, loss=0.969]


Epoch 22 Average Gradient Norm: 15.1943


Epoch 23/100: 100%|██████████| 337/337 [01:20<00:00,  4.21batch/s, accuracy=60.6, grad_norm=18.9, loss=0.967]


Epoch 23 Average Gradient Norm: 16.0485


Epoch 24/100: 100%|██████████| 337/337 [01:21<00:00,  4.13batch/s, accuracy=60.6, grad_norm=6.74, loss=0.965]


Epoch 24 Average Gradient Norm: 16.1459


Epoch 25/100: 100%|██████████| 337/337 [01:14<00:00,  4.55batch/s, accuracy=60.9, grad_norm=30.8, loss=0.959]


Epoch 25 Average Gradient Norm: 15.2635


Epoch 26/100: 100%|██████████| 337/337 [01:16<00:00,  4.38batch/s, accuracy=61, grad_norm=16.6, loss=0.96]   


Epoch 26 Average Gradient Norm: 17.4715


Epoch 27/100: 100%|██████████| 337/337 [01:17<00:00,  4.34batch/s, accuracy=61.2, grad_norm=21.6, loss=0.954]


Epoch 27 Average Gradient Norm: 16.6809
Early stopping



Epoch 1/100: 100%|██████████| 337/337 [01:17<00:00,  4.36batch/s, accuracy=18, grad_norm=0.922, loss=1.79]   


Epoch 1 Average Gradient Norm: 0.9022


Epoch 2/100: 100%|██████████| 337/337 [01:17<00:00,  4.35batch/s, accuracy=30.2, grad_norm=4.91, loss=1.6]   


Epoch 2 Average Gradient Norm: 4.8725


Epoch 3/100: 100%|██████████| 337/337 [01:18<00:00,  4.27batch/s, accuracy=35.7, grad_norm=1.72, loss=1.5]   


Epoch 3 Average Gradient Norm: 4.1967


Epoch 4/100: 100%|██████████| 337/337 [01:16<00:00,  4.43batch/s, accuracy=41.3, grad_norm=4.46, loss=1.4]  


Epoch 4 Average Gradient Norm: 7.5779


Epoch 5/100: 100%|██████████| 337/337 [01:13<00:00,  4.58batch/s, accuracy=45, grad_norm=4.44, loss=1.32]   


Epoch 5 Average Gradient Norm: 9.0596


Epoch 6/100: 100%|██████████| 337/337 [01:16<00:00,  4.40batch/s, accuracy=47.4, grad_norm=15.8, loss=1.26] 


Epoch 6 Average Gradient Norm: 9.0986


Epoch 7/100: 100%|██████████| 337/337 [01:16<00:00,  4.39batch/s, accuracy=48.9, grad_norm=8.56, loss=1.22] 


Epoch 7 Average Gradient Norm: 10.1137


Epoch 8/100: 100%|██████████| 337/337 [01:16<00:00,  4.39batch/s, accuracy=50.4, grad_norm=16.8, loss=1.19] 


Epoch 8 Average Gradient Norm: 10.2559


Epoch 9/100: 100%|██████████| 337/337 [01:17<00:00,  4.35batch/s, accuracy=51.3, grad_norm=4.41, loss=1.16] 


Epoch 9 Average Gradient Norm: 11.1553


Epoch 10/100: 100%|██████████| 337/337 [01:17<00:00,  4.33batch/s, accuracy=52.6, grad_norm=12.9, loss=1.13] 


Epoch 10 Average Gradient Norm: 12.2036


Epoch 11/100: 100%|██████████| 337/337 [01:18<00:00,  4.29batch/s, accuracy=54.7, grad_norm=11.6, loss=1.09] 


Epoch 11 Average Gradient Norm: 10.0615


Epoch 12/100: 100%|██████████| 337/337 [01:03<00:00,  5.27batch/s, accuracy=55.5, grad_norm=13.7, loss=1.07] 


Epoch 12 Average Gradient Norm: 12.1573


Epoch 13/100: 100%|██████████| 337/337 [01:00<00:00,  5.60batch/s, accuracy=56.5, grad_norm=9.54, loss=1.05] 


Epoch 13 Average Gradient Norm: 11.8646


Epoch 14/100: 100%|██████████| 337/337 [00:59<00:00,  5.62batch/s, accuracy=57.1, grad_norm=12.6, loss=1.04] 


Epoch 14 Average Gradient Norm: 12.3901


Epoch 15/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=57.9, grad_norm=13.7, loss=1.03] 


Epoch 15 Average Gradient Norm: 15.1021


Epoch 16/100: 100%|██████████| 337/337 [00:59<00:00,  5.62batch/s, accuracy=58.4, grad_norm=18.2, loss=1.01] 


Epoch 16 Average Gradient Norm: 15.8090


Epoch 17/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=59.2, grad_norm=19.9, loss=1]    


Epoch 17 Average Gradient Norm: 17.6966


Epoch 18/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=59.5, grad_norm=21.9, loss=0.993]


Epoch 18 Average Gradient Norm: 19.5248


Epoch 19/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=60.3, grad_norm=13.7, loss=0.975]


Epoch 19 Average Gradient Norm: 20.7090


Epoch 20/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=60.8, grad_norm=31.9, loss=0.965]


Epoch 20 Average Gradient Norm: 25.8593


Epoch 21/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=62.4, grad_norm=46, loss=0.933]  


Epoch 21 Average Gradient Norm: 19.8822


Epoch 22/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=62.8, grad_norm=11.6, loss=0.922]


Epoch 22 Average Gradient Norm: 22.5827


Epoch 23/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=63.2, grad_norm=14.8, loss=0.912]


Epoch 23 Average Gradient Norm: 26.7069


Epoch 24/100: 100%|██████████| 337/337 [01:00<00:00,  5.62batch/s, accuracy=63.8, grad_norm=25.3, loss=0.903]


Epoch 24 Average Gradient Norm: 30.7398


Epoch 25/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=63.9, grad_norm=24.5, loss=0.896]


Epoch 25 Average Gradient Norm: 34.3523


Epoch 26/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=64.6, grad_norm=6.7, loss=0.883] 


Epoch 26 Average Gradient Norm: 34.6671


Epoch 27/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=65, grad_norm=37.3, loss=0.872]  


Epoch 27 Average Gradient Norm: 35.2291


Epoch 28/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=65.4, grad_norm=24.3, loss=0.863]


Epoch 28 Average Gradient Norm: 39.9141


Epoch 29/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=65.8, grad_norm=19.6, loss=0.855]


Epoch 29 Average Gradient Norm: 43.9393


Epoch 30/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=66.3, grad_norm=17.7, loss=0.845]


Epoch 30 Average Gradient Norm: 42.8168


Epoch 31/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=66.9, grad_norm=92.3, loss=0.83] 


Epoch 31 Average Gradient Norm: 37.1264


Epoch 32/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=67.3, grad_norm=88.7, loss=0.823]


Epoch 32 Average Gradient Norm: 43.6831


Epoch 33/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=67.3, grad_norm=85.2, loss=0.819]


Epoch 33 Average Gradient Norm: 45.3849


Epoch 34/100: 100%|██████████| 337/337 [01:00<00:00,  5.60batch/s, accuracy=67.5, grad_norm=71.8, loss=0.814]


Epoch 34 Average Gradient Norm: 45.0201


Epoch 35/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=67.7, grad_norm=117, loss=0.809] 


Epoch 35 Average Gradient Norm: 38.5770


Epoch 36/100: 100%|██████████| 337/337 [01:00<00:00,  5.60batch/s, accuracy=68, grad_norm=34.8, loss=0.807]  


Epoch 36 Average Gradient Norm: 44.9280


Epoch 37/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=68.2, grad_norm=61.5, loss=0.801]


Epoch 37 Average Gradient Norm: 44.6442


Epoch 38/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=68.2, grad_norm=27.6, loss=0.798]


Epoch 38 Average Gradient Norm: 45.4827


Epoch 39/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=68.5, grad_norm=61.5, loss=0.794]


Epoch 39 Average Gradient Norm: 43.4291


Epoch 40/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=68.6, grad_norm=97.8, loss=0.79] 


Epoch 40 Average Gradient Norm: 48.0723


Epoch 41/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=69, grad_norm=36.9, loss=0.78]   


Epoch 41 Average Gradient Norm: 38.3044


Epoch 42/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=69.1, grad_norm=23.3, loss=0.777]


Epoch 42 Average Gradient Norm: 36.4150


Epoch 43/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=69.1, grad_norm=22, loss=0.778]  


Epoch 43 Average Gradient Norm: 41.3047


Epoch 44/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=69.2, grad_norm=95.2, loss=0.774]


Epoch 44 Average Gradient Norm: 43.5415


Epoch 45/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=69.4, grad_norm=39.7, loss=0.772]


Epoch 45 Average Gradient Norm: 38.9523


Epoch 46/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=69.5, grad_norm=42.1, loss=0.771]


Epoch 46 Average Gradient Norm: 42.9464


Epoch 47/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=69.6, grad_norm=47.7, loss=0.768]


Epoch 47 Average Gradient Norm: 42.1264


Epoch 48/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=69.5, grad_norm=74.5, loss=0.768]


Epoch 48 Average Gradient Norm: 46.9932


Epoch 49/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=69.8, grad_norm=22.2, loss=0.765]


Epoch 49 Average Gradient Norm: 45.4248


Epoch 50/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=69.8, grad_norm=16.5, loss=0.763]


Epoch 50 Average Gradient Norm: 43.9134


Epoch 51/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=69.9, grad_norm=67, loss=0.758]  


Epoch 51 Average Gradient Norm: 33.7863


Epoch 52/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=70.1, grad_norm=85.4, loss=0.755]


Epoch 52 Average Gradient Norm: 35.1751


Epoch 53/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=70.2, grad_norm=26.3, loss=0.754]


Epoch 53 Average Gradient Norm: 34.8204


Epoch 54/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=70.2, grad_norm=27.4, loss=0.754]


Epoch 54 Average Gradient Norm: 39.2585


Epoch 55/100: 100%|██████████| 337/337 [01:00<00:00,  5.60batch/s, accuracy=70.2, grad_norm=79, loss=0.754]  


Epoch 55 Average Gradient Norm: 37.8157


Epoch 56/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=70.2, grad_norm=22.4, loss=0.751]


Epoch 56 Average Gradient Norm: 38.2782


Epoch 57/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=70.3, grad_norm=13.5, loss=0.751]


Epoch 57 Average Gradient Norm: 38.1483


Epoch 58/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=70.4, grad_norm=44.7, loss=0.749]


Epoch 58 Average Gradient Norm: 35.4943


Epoch 59/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=70.3, grad_norm=33.4, loss=0.748]


Epoch 59 Average Gradient Norm: 44.1125


Epoch 60/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=70.4, grad_norm=13.8, loss=0.748]


Epoch 60 Average Gradient Norm: 39.9288


Epoch 61/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=70.5, grad_norm=23.9, loss=0.745]


Epoch 61 Average Gradient Norm: 34.2174


Epoch 62/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=70.6, grad_norm=38.6, loss=0.744]


Epoch 62 Average Gradient Norm: 32.8600


Epoch 63/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=70.6, grad_norm=19.2, loss=0.743]


Epoch 63 Average Gradient Norm: 33.9500


Epoch 64/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=70.7, grad_norm=23.8, loss=0.743]


Epoch 64 Average Gradient Norm: 34.2563


Epoch 65/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=70.7, grad_norm=30.9, loss=0.743]


Epoch 65 Average Gradient Norm: 36.3472


Epoch 66/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=70.7, grad_norm=32.5, loss=0.743]


Epoch 66 Average Gradient Norm: 37.1564


Epoch 67/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=70.8, grad_norm=19.5, loss=0.74] 


Epoch 67 Average Gradient Norm: 31.9711


Epoch 68/100: 100%|██████████| 337/337 [01:00<00:00,  5.60batch/s, accuracy=70.8, grad_norm=27.4, loss=0.741]


Epoch 68 Average Gradient Norm: 34.4163


Epoch 69/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=70.8, grad_norm=44.7, loss=0.74] 


Epoch 69 Average Gradient Norm: 33.5301


Epoch 70/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=70.7, grad_norm=37.4, loss=0.74] 


Epoch 70 Average Gradient Norm: 34.9952


Epoch 71/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=70.9, grad_norm=32.7, loss=0.738]


Epoch 71 Average Gradient Norm: 31.1591


Epoch 72/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=70.9, grad_norm=32.1, loss=0.739]


Epoch 72 Average Gradient Norm: 32.2627


Epoch 73/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=70.9, grad_norm=34.7, loss=0.737]


Epoch 73 Average Gradient Norm: 32.3945


Epoch 74/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=70.9, grad_norm=19.4, loss=0.739]


Epoch 74 Average Gradient Norm: 34.2999


Epoch 75/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=70.9, grad_norm=71.7, loss=0.738]


Epoch 75 Average Gradient Norm: 30.6504


Epoch 76/100: 100%|██████████| 337/337 [01:00<00:00,  5.60batch/s, accuracy=70.8, grad_norm=11, loss=0.738]  


Epoch 76 Average Gradient Norm: 31.4905


Epoch 77/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=70.8, grad_norm=34.6, loss=0.739]


Epoch 77 Average Gradient Norm: 30.1578


Epoch 78/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=71, grad_norm=42.7, loss=0.735]  


Epoch 78 Average Gradient Norm: 33.1361


Epoch 79/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=70.9, grad_norm=39.9, loss=0.736]


Epoch 79 Average Gradient Norm: 33.7955


Epoch 80/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=71, grad_norm=21.8, loss=0.736]  


Epoch 80 Average Gradient Norm: 31.5952


Epoch 81/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=71.1, grad_norm=18.7, loss=0.734]


Epoch 81 Average Gradient Norm: 29.1915


Epoch 82/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=71, grad_norm=28, loss=0.735]    


Epoch 82 Average Gradient Norm: 29.7575


Epoch 83/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=70.9, grad_norm=11.8, loss=0.736]


Epoch 83 Average Gradient Norm: 31.5183


Epoch 84/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=71, grad_norm=45.7, loss=0.735]  


Epoch 84 Average Gradient Norm: 28.6929


Epoch 85/100: 100%|██████████| 337/337 [00:59<00:00,  5.62batch/s, accuracy=71.2, grad_norm=17.7, loss=0.732]


Epoch 85 Average Gradient Norm: 30.3700


Epoch 86/100: 100%|██████████| 337/337 [01:00<00:00,  5.56batch/s, accuracy=71.1, grad_norm=29.5, loss=0.734]


Epoch 86 Average Gradient Norm: 30.2130


Epoch 87/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=70.9, grad_norm=30.1, loss=0.735]


Epoch 87 Average Gradient Norm: 31.6123


Epoch 88/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=71, grad_norm=39.7, loss=0.734]  


Epoch 88 Average Gradient Norm: 29.9301


Epoch 89/100: 100%|██████████| 337/337 [00:59<00:00,  5.62batch/s, accuracy=71, grad_norm=10.9, loss=0.735]  


Epoch 89 Average Gradient Norm: 31.5302


Epoch 90/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=70.9, grad_norm=16.8, loss=0.735]


Epoch 90 Average Gradient Norm: 31.1793


Epoch 91/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=71.1, grad_norm=9.7, loss=0.734] 


Epoch 91 Average Gradient Norm: 29.3749


Epoch 92/100: 100%|██████████| 337/337 [01:00<00:00,  5.56batch/s, accuracy=71.1, grad_norm=31.2, loss=0.733]


Epoch 92 Average Gradient Norm: 29.6149


Epoch 93/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=71, grad_norm=32.6, loss=0.734]  


Epoch 93 Average Gradient Norm: 28.3345


Epoch 94/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=71.2, grad_norm=30.6, loss=0.733]


Epoch 94 Average Gradient Norm: 28.9099


Epoch 95/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=71, grad_norm=10.4, loss=0.733]  


Epoch 95 Average Gradient Norm: 30.6085


Epoch 96/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=71.1, grad_norm=39.5, loss=0.734]


Epoch 96 Average Gradient Norm: 28.8515


Epoch 97/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=71, grad_norm=28.9, loss=0.734]  


Epoch 97 Average Gradient Norm: 29.7657


Epoch 98/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=71.2, grad_norm=16.8, loss=0.732]


Epoch 98 Average Gradient Norm: 29.1609


Epoch 99/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=71, grad_norm=21.8, loss=0.733]  


Epoch 99 Average Gradient Norm: 27.7145


Epoch 100/100: 100%|██████████| 337/337 [01:00<00:00,  5.60batch/s, accuracy=71.1, grad_norm=20.4, loss=0.732]


Epoch 100 Average Gradient Norm: 29.4694



Epoch 1/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=20.4, grad_norm=2.65, loss=1.76]  


Epoch 1 Average Gradient Norm: 1.3875


Epoch 2/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=31.5, grad_norm=7.58, loss=1.58]  


Epoch 2 Average Gradient Norm: 4.4236


Epoch 3/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=36.6, grad_norm=7.97, loss=1.49]  


Epoch 3 Average Gradient Norm: 5.5600


Epoch 4/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=41.1, grad_norm=13.4, loss=1.4]  


Epoch 4 Average Gradient Norm: 7.9146


Epoch 5/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=44.4, grad_norm=9.58, loss=1.34] 


Epoch 5 Average Gradient Norm: 10.4024


Epoch 6/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=46.3, grad_norm=24.9, loss=1.29] 


Epoch 6 Average Gradient Norm: 13.3173


Epoch 7/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=48.1, grad_norm=16.5, loss=1.25] 


Epoch 7 Average Gradient Norm: 12.9921


Epoch 8/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=49.3, grad_norm=10.5, loss=1.21] 


Epoch 8 Average Gradient Norm: 14.0577


Epoch 9/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=50.5, grad_norm=8.67, loss=1.19] 


Epoch 9 Average Gradient Norm: 13.4846


Epoch 10/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=51.3, grad_norm=11.9, loss=1.16] 


Epoch 10 Average Gradient Norm: 14.1645


Epoch 11/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=53.2, grad_norm=13.5, loss=1.12] 


Epoch 11 Average Gradient Norm: 11.9911


Epoch 12/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=53.5, grad_norm=17.5, loss=1.11] 


Epoch 12 Average Gradient Norm: 13.8433


Epoch 13/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=54.2, grad_norm=6.63, loss=1.1]  


Epoch 13 Average Gradient Norm: 12.8265


Epoch 14/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=54.8, grad_norm=12.2, loss=1.09] 


Epoch 14 Average Gradient Norm: 13.9905


Epoch 15/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=55.3, grad_norm=3.67, loss=1.08] 


Epoch 15 Average Gradient Norm: 13.8452


Epoch 16/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=55.6, grad_norm=16.4, loss=1.07] 


Epoch 16 Average Gradient Norm: 14.5107


Epoch 17/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=56.3, grad_norm=6.7, loss=1.06]  


Epoch 17 Average Gradient Norm: 14.1883


Epoch 18/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=56.7, grad_norm=17.4, loss=1.05] 


Epoch 18 Average Gradient Norm: 14.5440


Epoch 19/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=57.1, grad_norm=23.3, loss=1.04] 


Epoch 19 Average Gradient Norm: 13.9447


Epoch 20/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=57.4, grad_norm=14.3, loss=1.03] 


Epoch 20 Average Gradient Norm: 14.0721


Epoch 21/100: 100%|██████████| 337/337 [01:00<00:00,  5.60batch/s, accuracy=58.3, grad_norm=19, loss=1.02]   


Epoch 21 Average Gradient Norm: 12.6375


Epoch 22/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=58.5, grad_norm=11.3, loss=1.01] 


Epoch 22 Average Gradient Norm: 11.4094
Early stopping



Epoch 1/100: 100%|██████████| 337/337 [01:00<00:00,  5.60batch/s, accuracy=19.2, grad_norm=1.29, loss=1.77]  


Epoch 1 Average Gradient Norm: 1.0025


Epoch 2/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=32, grad_norm=1.69, loss=1.57]    


Epoch 2 Average Gradient Norm: 2.2972


Epoch 3/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=37.9, grad_norm=6.1, loss=1.47]  


Epoch 3 Average Gradient Norm: 3.0223


Epoch 4/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=42.6, grad_norm=3.82, loss=1.37] 


Epoch 4 Average Gradient Norm: 4.7076


Epoch 5/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=45.4, grad_norm=4.64, loss=1.31] 


Epoch 5 Average Gradient Norm: 6.1587


Epoch 6/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=47.4, grad_norm=5.16, loss=1.26] 


Epoch 6 Average Gradient Norm: 6.6665


Epoch 7/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=48.8, grad_norm=5.19, loss=1.23] 


Epoch 7 Average Gradient Norm: 8.1106


Epoch 8/100: 100%|██████████| 337/337 [01:00<00:00,  5.62batch/s, accuracy=50.1, grad_norm=18.3, loss=1.19] 


Epoch 8 Average Gradient Norm: 8.4475


Epoch 9/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=51.2, grad_norm=21.9, loss=1.16] 


Epoch 9 Average Gradient Norm: 9.7150


Epoch 10/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=52.2, grad_norm=8.46, loss=1.14] 


Epoch 10 Average Gradient Norm: 11.2483


Epoch 11/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=54.2, grad_norm=5.84, loss=1.1]  


Epoch 11 Average Gradient Norm: 8.5506


Epoch 12/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=54.6, grad_norm=10.9, loss=1.09] 


Epoch 12 Average Gradient Norm: 10.6177


Epoch 13/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=55.2, grad_norm=14.9, loss=1.08] 


Epoch 13 Average Gradient Norm: 11.8146


Epoch 14/100: 100%|██████████| 337/337 [00:59<00:00,  5.62batch/s, accuracy=55.9, grad_norm=10.6, loss=1.06] 


Epoch 14 Average Gradient Norm: 10.5746


Epoch 15/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=56.4, grad_norm=10.2, loss=1.05] 


Epoch 15 Average Gradient Norm: 11.6035


Epoch 16/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=56.8, grad_norm=11.4, loss=1.05] 


Epoch 16 Average Gradient Norm: 12.1050


Epoch 17/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=57.2, grad_norm=19, loss=1.04]   


Epoch 17 Average Gradient Norm: 11.9586


Epoch 18/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=57.7, grad_norm=28.1, loss=1.03] 


Epoch 18 Average Gradient Norm: 12.5701


Epoch 19/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=58, grad_norm=7.38, loss=1.02]   


Epoch 19 Average Gradient Norm: 13.5061


Epoch 20/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=58.5, grad_norm=7.71, loss=1.01] 


Epoch 20 Average Gradient Norm: 13.6282


Epoch 21/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=59.3, grad_norm=8.28, loss=0.996]


Epoch 21 Average Gradient Norm: 9.8665


Epoch 22/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=59.5, grad_norm=18.7, loss=0.991]


Epoch 22 Average Gradient Norm: 11.9181


Epoch 23/100: 100%|██████████| 337/337 [01:00<00:00,  5.61batch/s, accuracy=60, grad_norm=6.13, loss=0.983]  


Epoch 23 Average Gradient Norm: 11.5619


Epoch 24/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=60.2, grad_norm=8.6, loss=0.98]  


Epoch 24 Average Gradient Norm: 12.9318


Epoch 25/100: 100%|██████████| 337/337 [01:00<00:00,  5.60batch/s, accuracy=60.5, grad_norm=24.9, loss=0.973]


Epoch 25 Average Gradient Norm: 15.0693


Epoch 26/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=60.9, grad_norm=26.8, loss=0.967]


Epoch 26 Average Gradient Norm: 16.7310


Epoch 27/100: 100%|██████████| 337/337 [00:59<00:00,  5.62batch/s, accuracy=61.4, grad_norm=6.21, loss=0.956]


Epoch 27 Average Gradient Norm: 17.7627


Epoch 28/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=62, grad_norm=18.3, loss=0.943]  


Epoch 28 Average Gradient Norm: 20.7049


Epoch 29/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=62.5, grad_norm=51, loss=0.931]  


Epoch 29 Average Gradient Norm: 24.9193


Epoch 30/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=63, grad_norm=26.1, loss=0.922]  


Epoch 30 Average Gradient Norm: 28.9767


Epoch 31/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=63.6, grad_norm=23.1, loss=0.905]


Epoch 31 Average Gradient Norm: 24.9717


Epoch 32/100: 100%|██████████| 337/337 [00:59<00:00,  5.62batch/s, accuracy=63.9, grad_norm=15.8, loss=0.897]


Epoch 32 Average Gradient Norm: 27.5734


Epoch 33/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=64.2, grad_norm=48.6, loss=0.894]


Epoch 33 Average Gradient Norm: 26.9946


Epoch 34/100: 100%|██████████| 337/337 [00:59<00:00,  5.62batch/s, accuracy=64.2, grad_norm=18.5, loss=0.893]


Epoch 34 Average Gradient Norm: 36.0463


Epoch 35/100: 100%|██████████| 337/337 [01:00<00:00,  5.60batch/s, accuracy=64.5, grad_norm=22.6, loss=0.888]


Epoch 35 Average Gradient Norm: 32.2174


Epoch 36/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=64.6, grad_norm=60.7, loss=0.885]


Epoch 36 Average Gradient Norm: 38.4907


Epoch 37/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=64.9, grad_norm=46.1, loss=0.88] 


Epoch 37 Average Gradient Norm: 34.3330


Epoch 38/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=64.8, grad_norm=13, loss=0.88]   


Epoch 38 Average Gradient Norm: 41.9522


Epoch 39/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=65, grad_norm=71.4, loss=0.876]  


Epoch 39 Average Gradient Norm: 38.3511


Epoch 40/100: 100%|██████████| 337/337 [01:00<00:00,  5.57batch/s, accuracy=65.1, grad_norm=31.2, loss=0.871]


Epoch 40 Average Gradient Norm: 39.1702


Epoch 41/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=65.4, grad_norm=7.59, loss=0.865]


Epoch 41 Average Gradient Norm: 30.1267


Epoch 42/100: 100%|██████████| 337/337 [01:00<00:00,  5.59batch/s, accuracy=65.6, grad_norm=30.7, loss=0.863]


Epoch 42 Average Gradient Norm: 29.6842


Epoch 43/100: 100%|██████████| 337/337 [01:00<00:00,  5.60batch/s, accuracy=65.6, grad_norm=63.3, loss=0.863]


Epoch 43 Average Gradient Norm: 31.8785


Epoch 44/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=65.7, grad_norm=13.1, loss=0.858]


Epoch 44 Average Gradient Norm: 30.0714


Epoch 45/100: 100%|██████████| 337/337 [01:00<00:00,  5.58batch/s, accuracy=65.7, grad_norm=19.8, loss=0.862]


Epoch 45 Average Gradient Norm: 37.7638


Epoch 46/100: 100%|██████████| 337/337 [01:00<00:00,  5.60batch/s, accuracy=65.7, grad_norm=41.5, loss=0.859]


Epoch 46 Average Gradient Norm: 33.2624


Epoch 47/100: 100%|██████████| 337/337 [01:03<00:00,  5.29batch/s, accuracy=65.8, grad_norm=17.8, loss=0.857]


Epoch 47 Average Gradient Norm: 31.8954


Epoch 48/100: 100%|██████████| 337/337 [01:03<00:00,  5.34batch/s, accuracy=65.8, grad_norm=30.7, loss=0.857]


Epoch 48 Average Gradient Norm: 38.8367


Epoch 49/100: 100%|██████████| 337/337 [01:09<00:00,  4.82batch/s, accuracy=65.9, grad_norm=55.9, loss=0.855]


Epoch 49 Average Gradient Norm: 34.6329


Epoch 50/100: 100%|██████████| 337/337 [01:10<00:00,  4.77batch/s, accuracy=66, grad_norm=11.7, loss=0.854]  


Epoch 50 Average Gradient Norm: 37.5984


Epoch 51/100: 100%|██████████| 337/337 [01:12<00:00,  4.63batch/s, accuracy=66.2, grad_norm=18.2, loss=0.85] 


Epoch 51 Average Gradient Norm: 26.0046


Epoch 52/100: 100%|██████████| 337/337 [01:14<00:00,  4.55batch/s, accuracy=66.3, grad_norm=11, loss=0.85]   


Epoch 52 Average Gradient Norm: 29.6765


Epoch 53/100: 100%|██████████| 337/337 [01:13<00:00,  4.61batch/s, accuracy=66.1, grad_norm=21.1, loss=0.85] 


Epoch 53 Average Gradient Norm: 28.3815


Epoch 54/100: 100%|██████████| 337/337 [01:13<00:00,  4.60batch/s, accuracy=66.4, grad_norm=9.96, loss=0.849]


Epoch 54 Average Gradient Norm: 28.4419


Epoch 55/100: 100%|██████████| 337/337 [01:13<00:00,  4.58batch/s, accuracy=66.4, grad_norm=68.6, loss=0.847]


Epoch 55 Average Gradient Norm: 29.5374


Epoch 56/100: 100%|██████████| 337/337 [01:14<00:00,  4.55batch/s, accuracy=66.2, grad_norm=8.16, loss=0.847]


Epoch 56 Average Gradient Norm: 27.6606


Epoch 57/100: 100%|██████████| 337/337 [01:15<00:00,  4.47batch/s, accuracy=66.1, grad_norm=28, loss=0.848]  


Epoch 57 Average Gradient Norm: 32.6757


Epoch 58/100:  91%|█████████ | 307/337 [01:19<00:07,  3.84batch/s, accuracy=66.1, grad_norm=21.1, loss=0.771]


KeyboardInterrupt: 

In [3]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix
import seaborn as sns
from torch.utils.data import DataLoader, TensorDataset, random_split
from datetime import datetime
from tqdm import tqdm
import random
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from sklearn.model_selection import train_test_split

# 假设 SNR_dB, fd, equalized 已定义
SNR_dB = globals().get('SNR_dB', 'no')
fd = globals().get('fd', 'no')
equalized = globals().get('equalized', 'no')

# 假设 X_train_processed, y_train, X_test_processed, y_test 已定义
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# === 模型定义 ===
class SignalTransformer(nn.Module):
    def __init__(self, raw_input_dim, model_dim, num_heads, num_layers, num_classes, dropout=0.1):
        super(SignalTransformer, self).__init__()
        self.embedding = nn.Linear(raw_input_dim, model_dim)
        encoder_layer = TransformerEncoderLayer(
            d_model=model_dim, nhead=num_heads, dropout=dropout, batch_first=True
        )
        self.encoder = TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(model_dim, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = self.encoder(x)
        x = x[:, -1, :]
        x = self.fc(x)
        return x

# === 数据准备 ===
X_train_tensor = torch.tensor(X_train_processed, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test_processed, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

num_classes = len(np.unique(y_train))

# 划分训练集 / 验证集
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# === 参数空间 ===
param_space = {
    "model_dim": [128, 256, 512],
    "num_heads": [2, 4, 8],
    "num_layers": [1, 2, 3],
    "dropout": [0.1, 0.3, 0.5],
    "learning_rate": [1e-3, 5e-4, 1e-4],
    "batch_size": [128, 256, 512]
}
num_search = 50  # 随机搜索次数
patience = 5
raw_input_dim = 2
num_epochs = 300

results_summary = []
best_config = None
best_val_acc = 0

# 计算梯度范数
def compute_grad_norm(model):
    total_norm = 0.0
    for p in model.parameters():
        if p.grad is not None:
            param_norm = p.grad.data.norm(2)
            total_norm += param_norm.item() ** 2
    return total_norm ** 0.5

# 平滑曲线
def moving_average(x, w=5):
    return np.convolve(x, np.ones(w), 'valid') / w

# === 随机搜索 ===
for search_idx in range(num_search):
    config = {k: random.choice(v) for k, v in param_space.items()}
    print(f"\n=== Random Search {search_idx+1}/{num_search} ===")
    print(f"Params: {config}")

    # 创建保存目录
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    script_name = "wisig_time_random"
    folder_name = f"{timestamp}_{script_name}_SNR{SNR_dB}"
    save_folder = os.path.join("search_results", folder_name)
    os.makedirs(save_folder, exist_ok=True)
    results_file = os.path.join(save_folder, "results.txt")

    with open(results_file, "w") as f:
        f.write(f"=== Hyperparameters ===\n{config}\n")

    # DataLoader
    train_loader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True, drop_last=True)
    val_loader = DataLoader(val_dataset, batch_size=config["batch_size"], shuffle=False, drop_last=True)
    test_loader = DataLoader(test_dataset, batch_size=config["batch_size"], shuffle=False)

    # 模型 & 优化器
    model = SignalTransformer(raw_input_dim, config["model_dim"], config["num_heads"],
                              config["num_layers"], num_classes, config["dropout"]).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"], weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []
    grad_norms = []

    best_val = 0
    patience_counter = 0
    best_model_wts = None

    for epoch in range(num_epochs):
    # 训练
        model.train()
        running_loss, correct_train, total_train = 0.0, 0, 0
        batch_grad_norms = []

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            grad_norm = compute_grad_norm(model)
            batch_grad_norms.append(grad_norm)
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        train_losses.append(running_loss / len(train_loader))
        train_accuracies.append(100 * correct_train / total_train)
        grad_norms.append(np.mean(batch_grad_norms))

        # 验证
        model.eval()
        correct_val, total_val = 0, 0
        val_loss_sum = 0.0
        with torch.no_grad():
            for val_inputs, val_labels in val_loader:
                val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)
                val_outputs = model(val_inputs)
                val_loss = criterion(val_outputs, val_labels)
                val_loss_sum += val_loss.item()
                _, val_pred = torch.max(val_outputs, 1)
                total_val += val_labels.size(0)
                correct_val += (val_pred == val_labels).sum().item()

        val_acc = 100 * correct_val / total_val
        val_losses.append(val_loss_sum / len(val_loader))
        val_accuracies.append(val_acc)

        # 早停
        if val_acc > best_val:
            best_val = val_acc
            patience_counter = 0
            best_model_wts = model.state_dict()
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

        scheduler.step()


    # 恢复最佳权重
    if best_model_wts:
        model.load_state_dict(best_model_wts)

    # 测试集
    model.eval()
    test_preds, test_true = [], []
    with torch.no_grad():
        for test_inputs, test_labels in test_loader:
            test_inputs, test_labels = test_inputs.to(device), test_labels.to(device)
            test_outputs = model(test_inputs)
            _, predicted = torch.max(test_outputs, 1)
            test_preds.extend(predicted.cpu().numpy())
            test_true.extend(test_labels.cpu().numpy())

    test_acc = 100 * np.sum(np.array(test_preds) == np.array(test_true)) / len(test_true)
    with open(results_file, "a") as f:
        f.write(f"\nVal Acc: {val_acc:.2f}% | Test Acc: {test_acc:.2f}%\n")

    # 控制台即时输出
    print(f"[Result] Config {search_idx+1}/{num_search} - Val Acc: {val_acc:.2f}%, Test Acc: {test_acc:.2f}%")

    # 记录结果
    results_summary.append((config, best_val, test_acc))
    if best_val > best_val_acc:
        best_val_acc = best_val
        best_config = (config, test_acc)


# === 最佳结果 ===
print("\n=== Best Hyperparameters ===")
print(best_config)


Using device: cuda

=== Random Search 1/50 ===
Params: {'model_dim': 512, 'num_heads': 4, 'num_layers': 1, 'dropout': 0.5, 'learning_rate': 0.0005, 'batch_size': 512}
Early stopping at epoch 19
[Result] Config 1/50 - Val Acc: 33.79%, Test Acc: 33.01%

=== Random Search 2/50 ===
Params: {'model_dim': 128, 'num_heads': 8, 'num_layers': 2, 'dropout': 0.1, 'learning_rate': 0.0005, 'batch_size': 128}
Early stopping at epoch 7
[Result] Config 2/50 - Val Acc: 16.47%, Test Acc: 16.67%

=== Random Search 3/50 ===
Params: {'model_dim': 512, 'num_heads': 2, 'num_layers': 1, 'dropout': 0.3, 'learning_rate': 0.0005, 'batch_size': 512}
Early stopping at epoch 16
[Result] Config 3/50 - Val Acc: 36.00%, Test Acc: 33.45%

=== Random Search 4/50 ===
Params: {'model_dim': 256, 'num_heads': 8, 'num_layers': 2, 'dropout': 0.5, 'learning_rate': 0.001, 'batch_size': 256}
Early stopping at epoch 6
[Result] Config 4/50 - Val Acc: 16.56%, Test Acc: 16.67%

=== Random Search 5/50 ===
Params: {'model_dim': 512, '

KeyboardInterrupt: 