In [4]:
from joblib import load
import pandas as pd
import numpy as np
import os
from  data_utilities import *
import cv2  # OpenCV 用于调整图像大小和颜色处理
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import gc  # 引入垃圾回收模块
from tqdm.auto import tqdm  # 自动适配环境 导入tqdm进度条库
from collections import defaultdict

dataset_name = 'ManySig'
dataset_path='../ManySig.pkl/'

compact_dataset = load_compact_pkl_dataset(dataset_path,dataset_name)

print("数据集发射机数量：",len(compact_dataset['tx_list']),"具体为：",compact_dataset['tx_list'])
print("数据集接收机数量：",len(compact_dataset['rx_list']),"具体为：",compact_dataset['rx_list'])
print("数据集采集天数：",len(compact_dataset['capture_date_list']),"具体为：",compact_dataset['capture_date_list'])


tx_list = compact_dataset['tx_list']
rx_list = compact_dataset['rx_list']
equalized = 1
capture_date_list = compact_dataset['capture_date_list']


n_tx = len(tx_list)
n_rx = len(rx_list)
print(n_tx,n_rx)


train_dates = ['2021_03_01', '2021_03_08', '2021_03_15']  # 设定你想用的训练日期
X_train, y_train, X_test, y_test = preprocess_dataset_for_classification(
    compact_dataset, tx_list, rx_list, train_dates, max_sig=None, equalized=0)

print("X_train shape:", X_train.shape)  # (num_blocks, 256, 250, 2)
print("y_train shape:", y_train.shape)


In [5]:
import numpy as np

# === 参数设置 ===
SNR_dB = 10            # 信噪比
fs = 20e6             # 采样率 (Hz)
fc = 2.4e9            # 载波频率 (Hz)
v = 120               # 速度 (m/s)

# === 多普勒频移计算 ===
def compute_doppler_shift(v, fc):
    c = 3e8  # 光速
    return (v / c) * fc

fd = compute_doppler_shift(v, fc)
print(f"[INFO] 多普勒频移 fd = {fd:.2f} Hz")

# === 多普勒变换 ===
def add_doppler_shift(signal, fd, fs):
    num_samples = signal.shape[-1]
    t = np.arange(num_samples) / fs
    doppler_phase = np.exp(1j * 2 * np.pi * fd * t)
    return signal * doppler_phase

# === 加噪声 + 多普勒 的主流程 ===
def preprocess_iq_data(data_real_imag, snr_db, fd, fs):
    # Step 1: 转为复数 IQ，shape: (N, T, 2) → (N, T)
    data_complex = data_real_imag[..., 0] + 1j * data_real_imag[..., 1]

    processed = []
    for sig in data_complex:
        # Step 2: 添加 AWGN 噪声
        signal_std = np.std(sig)
        noise_std = signal_std / (10 ** (snr_db / 20))
        noise = np.random.normal(0, noise_std, sig.shape) + 1j * np.random.normal(0, noise_std, sig.shape)
        noisy = sig + noise

        # Step 3: 添加多普勒频移
        shifted = add_doppler_shift(noisy, fd, fs)

        processed.append(shifted)

    processed = np.array(processed)  # shape: (N, T), complex

    # Step 4: 转回 [I, Q] 实数格式
    processed_real_imag = np.stack([processed.real, processed.imag], axis=-1)  # shape: (N, T, 2)

    return processed_real_imag


X_train_processed = preprocess_iq_data(X_train, snr_db=SNR_dB, fd=fd, fs=fs)
X_test_processed  = preprocess_iq_data(X_test,  snr_db=SNR_dB, fd=fd, fs=fs)

# 查看处理前后前10个点
print("原始信号 I 分量：", X_train[0, :10, 0])
print("处理后信号 I 分量：", X_train_processed[0, :10, 0])


[INFO] 多普勒频移 fd = 960.00 Hz
原始信号 I 分量： [ 0.00012207  0.01281771 -0.00210577 -0.01748701 -0.00283821  0.00039674
 -0.00585952  0.00900291  0.01324496 -0.0069887 ]
处理后信号 I 分量： [ 0.00348687  0.00711747 -0.00071386 -0.02241231 -0.00452609  0.00048404
 -0.00473657  0.01807841  0.02147405 -0.00801066]


In [6]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix
import seaborn as sns
from torch.utils.data import DataLoader, TensorDataset, Subset
from datetime import datetime
from tqdm import tqdm
from sklearn.model_selection import KFold
from torch.nn import TransformerEncoder, TransformerEncoderLayer

# 假设 SNR_dB 和 fd 已经定义
SNR_dB = globals().get('SNR_dB', 'no')
fd = globals().get('fd', 'no')

# === 模型与训练参数设置 ===
raw_input_dim = 2         # 每个时间步是 I/Q 两个值
model_dim = 256           # Transformer 模型内部维度
num_heads = 4
num_layers = 2
num_classes = len(np.unique(y_train))  # 或 len(tx_list)
dropout = 0.4
batch_size = 256
num_epochs = 100
learning_rate = 1e-4
patience = 5

# === 创建保存目录 ===
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
script_name = "wisig_time"
folder_name = f"{timestamp}_{script_name}_SNR{SNR_dB}dB_fd{fd}_classes_{num_classes}_Transformer"
save_folder = os.path.join(os.getcwd(), "training_results", folder_name)
os.makedirs(save_folder, exist_ok=True)

results_file = os.path.join(save_folder, "results.txt")
with open(results_file, "w") as f:
    f.write(f"=== Experiment Summary ===\n")
    f.write(f"Timestamp: {timestamp}\n")
    f.write(f"Total Classes: {num_classes}\n")
    f.write(f"SNR: {SNR_dB} dB\n")
    f.write(f"fd (Doppler shift): {fd} Hz\n")
    f.write(f"equalized: {equalized} Hz\n")

# === 模型定义 ===
class SignalTransformer(nn.Module):
    def __init__(self, raw_input_dim, model_dim, num_heads, num_layers, num_classes, dropout=0.1):
        super(SignalTransformer, self).__init__()
        self.embedding = nn.Linear(raw_input_dim, model_dim)
        encoder_layer = TransformerEncoderLayer(d_model=model_dim, nhead=num_heads, dropout=dropout, batch_first=True)
        self.encoder = TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(model_dim, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = self.encoder(x)
        x = x[:, -1, :]
        x = self.fc(x)
        return x


# === 假设 X_train, y_train, X_test, y_test 都已定义并 shape 为 (N, L, 2) ===
# 若还未定义，可自行加载并 reshape
X_test_tensor = torch.tensor(X_test_processed, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

train_dataset = TensorDataset(torch.tensor(X_train_processed, dtype=torch.float32),
                               torch.tensor(y_train, dtype=torch.long))

# === K折交叉验证训练 ===
n_splits = 5
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)
fold_results = []
test_results = []

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def compute_grad_norm(model):
    total_norm = 0.0
    for p in model.parameters():
        if p.grad is not None:
            param_norm = p.grad.data.norm(2)
            total_norm += param_norm.item() ** 2
    return total_norm ** 0.5

def moving_average(x, w=5):
    return np.convolve(x, np.ones(w), 'valid') / w

avg_grad_norms_per_fold = []

for fold, (train_idx, val_idx) in enumerate(kfold.split(train_dataset)):
    print(f"\n====== Fold {fold+1}/{n_splits} ======")

    train_subset = Subset(train_dataset, train_idx)
    val_subset = Subset(train_dataset, val_idx)

    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True, drop_last=True)
    val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=False, drop_last=True)

    model = SignalTransformer(raw_input_dim, model_dim, num_heads, num_layers, num_classes, dropout).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

    train_losses, val_losses = [], []
    train_accuracies, val_accuracies = [], []
    grad_norms = []

    best_val_loss = float('inf')
    patience_counter = 0

    for epoch in range(num_epochs):
        model.train()
        running_train_loss, correct_train, total_train = 0.0, 0, 0
        batch_grad_norms = []

        with tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", unit="batch") as tepoch:
            for inputs, labels in tepoch:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()

                grad_norm = compute_grad_norm(model)
                batch_grad_norms.append(grad_norm)

                optimizer.step()

                running_train_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total_train += labels.size(0)
                correct_train += (predicted == labels).sum().item()

                tepoch.set_postfix(loss=running_train_loss / (len(train_loader)),
                                   accuracy=100 * correct_train / total_train,
                                   grad_norm=grad_norm)

        epoch_train_loss = running_train_loss / len(train_loader)
        train_losses.append(epoch_train_loss)
        train_accuracies.append(100 * correct_train / total_train)
        avg_grad_norm = np.mean(batch_grad_norms)
        grad_norms.append(avg_grad_norm)

        print(f"Epoch {epoch+1} Average Gradient Norm: {avg_grad_norm:.4f}")

        # === 验证 ===
        model.eval()
        running_val_loss, correct_val, total_val = 0.0, 0, 0

        with torch.no_grad():
            for val_inputs, val_labels in val_loader:
                val_inputs = val_inputs.to(device)
                val_labels = val_labels.to(device)

                val_outputs = model(val_inputs)
                val_loss = criterion(val_outputs, val_labels)
                running_val_loss += val_loss.item()
                _, val_predicted = torch.max(val_outputs, 1)
                total_val += val_labels.size(0)
                correct_val += (val_predicted == val_labels).sum().item()

        epoch_val_loss = running_val_loss / len(val_loader)
        val_losses.append(epoch_val_loss)
        val_accuracies.append(100 * correct_val / total_val)

        with open(results_file, "a") as f:
            f.write(f"Epoch {epoch+1} | Train Acc: {train_accuracies[-1]:.2f}% | Val Acc: {val_accuracies[-1]:.2f}%\n")

        if epoch_val_loss < best_val_loss:
            best_val_loss = epoch_val_loss
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print("Early stopping")
            break

        scheduler.step()

    fold_results.append(max(val_accuracies))
    avg_grad_norms_per_fold.append(grad_norms)

    # === 绘制 loss 曲线 ===
    plt.figure()
    plt.plot(train_losses, label='Train Loss')
    plt.plot(val_losses, label='Val Loss')
    plt.plot(moving_average(train_losses), label='Train Loss (Smooth)', linestyle='--')
    plt.plot(moving_average(val_losses), label='Val Loss (Smooth)', linestyle='--')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title(f'Fold {fold+1} Loss Curve')
    plt.legend()
    plt.grid(True)
    plt.savefig(os.path.join(save_folder, f"fold_{fold+1}_loss_curve.png"))
    plt.close()

    # === 绘制 Gradient Norm 曲线 ===
    plt.figure()
    plt.plot(grad_norms, label='Gradient Norm')
    plt.xlabel('Epoch')
    plt.ylabel('Gradient Norm')
    plt.title(f'Fold {fold+1} Gradient Norm')
    plt.grid(True)
    plt.legend()
    plt.savefig(os.path.join(save_folder, f"fold_{fold+1}_grad_norm.png"))
    plt.close()

    # === 测试集评估 ===
    model.eval()
    test_preds, test_true = [], []

    with torch.no_grad():
        for test_inputs, test_labels in test_loader:
            test_inputs = test_inputs.to(device)
            test_labels = test_labels.to(device)

            test_outputs = model(test_inputs)
            _, predicted = torch.max(test_outputs, 1)
            test_preds.extend(predicted.cpu().numpy())
            test_true.extend(test_labels.cpu().numpy())

    test_preds = np.array(test_preds)
    test_true = np.array(test_true)
    test_accuracy = 100.0 * np.sum(test_preds == test_true) / len(test_true)
    test_results.append(test_accuracy)

    with open(results_file, "a") as f:
        f.write(f"Fold {fold+1} Test Accuracy: {test_accuracy:.2f}%\n")

    cm = confusion_matrix(test_true, test_preds)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'Test Confusion Matrix Fold {fold+1}')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.savefig(os.path.join(save_folder, f"fold_{fold+1}_test_confusion_matrix.png"))
    plt.close()

# === 总结结果 ===
avg_val = np.mean(fold_results)
avg_test = np.mean(test_results)

with open(results_file, "a") as f:
    f.write("\n=== Summary ===\n")
    for i in range(n_splits):
        f.write(f"Fold {i+1}: Val Acc = {fold_results[i]:.2f}%, Test Acc = {test_results[i]:.2f}%\n")
    f.write(f"\nAverage Validation Accuracy: {avg_val:.2f}%\n")
    f.write(f"Average Test Accuracy: {avg_test:.2f}%\n")

print("\n=== Final Summary ===")
for i in range(n_splits):
    print(f"Fold {i+1}: Val = {fold_results[i]:.2f}%, Test = {test_results[i]:.2f}%")
print(f"Average Val Accuracy: {avg_val:.2f}%")
print(f"Average Test Accuracy: {avg_test:.2f}%")


Using device: cuda



Epoch 1/100: 100%|██████████| 675/675 [03:21<00:00,  3.35batch/s, accuracy=17.7, loss=1.8]  
Epoch 2/100: 100%|██████████| 675/675 [03:22<00:00,  3.33batch/s, accuracy=28.1, loss=1.64] 
Epoch 3/100: 100%|██████████| 675/675 [03:22<00:00,  3.34batch/s, accuracy=31.6, loss=1.57] 
Epoch 4/100: 100%|██████████| 675/675 [03:22<00:00,  3.33batch/s, accuracy=33.4, loss=1.55] 
Epoch 5/100: 100%|██████████| 675/675 [03:22<00:00,  3.33batch/s, accuracy=34.6, loss=1.53] 
Epoch 6/100: 100%|██████████| 675/675 [03:23<00:00,  3.32batch/s, accuracy=35.4, loss=1.52] 
Epoch 7/100: 100%|██████████| 675/675 [03:22<00:00,  3.33batch/s, accuracy=36.3, loss=1.5]  
Epoch 8/100: 100%|██████████| 675/675 [03:23<00:00,  3.32batch/s, accuracy=37.3, loss=1.48] 
Epoch 9/100: 100%|██████████| 675/675 [03:22<00:00,  3.33batch/s, accuracy=38.3, loss=1.47] 
Epoch 10/100: 100%|██████████| 675/675 [03:23<00:00,  3.32batch/s, accuracy=39.3, loss=1.44] 
Epoch 11/100: 100%|██████████| 675/675 [03:22<00:00,  3.33batch/s, ac

Early stopping



Epoch 1/100: 100%|██████████| 675/675 [03:24<00:00,  3.31batch/s, accuracy=19.5, loss=1.77] 
Epoch 2/100: 100%|██████████| 675/675 [03:23<00:00,  3.31batch/s, accuracy=29.9, loss=1.61] 
Epoch 3/100: 100%|██████████| 675/675 [03:22<00:00,  3.33batch/s, accuracy=32, loss=1.56]   
Epoch 4/100: 100%|██████████| 675/675 [03:23<00:00,  3.32batch/s, accuracy=33.9, loss=1.54] 
Epoch 5/100: 100%|██████████| 675/675 [03:23<00:00,  3.32batch/s, accuracy=34.6, loss=1.53] 
Epoch 6/100: 100%|██████████| 675/675 [03:23<00:00,  3.32batch/s, accuracy=35.5, loss=1.51] 
Epoch 7/100: 100%|██████████| 675/675 [03:23<00:00,  3.32batch/s, accuracy=36.3, loss=1.5]  
Epoch 8/100: 100%|██████████| 675/675 [03:23<00:00,  3.32batch/s, accuracy=37.7, loss=1.47] 
Epoch 9/100: 100%|██████████| 675/675 [03:23<00:00,  3.31batch/s, accuracy=38.7, loss=1.45] 
Epoch 10/100: 100%|██████████| 675/675 [03:23<00:00,  3.32batch/s, accuracy=40.3, loss=1.42] 
Epoch 11/100: 100%|██████████| 675/675 [03:24<00:00,  3.29batch/s, ac

Early stopping



Epoch 1/100: 100%|██████████| 675/675 [01:13<00:00,  9.19batch/s, accuracy=17.9, loss=1.8]  
Epoch 2/100: 100%|██████████| 675/675 [01:12<00:00,  9.25batch/s, accuracy=28.5, loss=1.64] 
Epoch 3/100: 100%|██████████| 675/675 [01:13<00:00,  9.24batch/s, accuracy=31.4, loss=1.57] 
Epoch 4/100: 100%|██████████| 675/675 [01:13<00:00,  9.23batch/s, accuracy=33, loss=1.55]   
Epoch 5/100: 100%|██████████| 675/675 [01:13<00:00,  9.22batch/s, accuracy=34.5, loss=1.54] 
Epoch 6/100: 100%|██████████| 675/675 [01:12<00:00,  9.27batch/s, accuracy=35.7, loss=1.52] 
Epoch 7/100: 100%|██████████| 675/675 [01:12<00:00,  9.25batch/s, accuracy=36.7, loss=1.5]  
Epoch 8/100: 100%|██████████| 675/675 [01:12<00:00,  9.28batch/s, accuracy=37.7, loss=1.48] 
Epoch 9/100: 100%|██████████| 675/675 [01:12<00:00,  9.26batch/s, accuracy=38.6, loss=1.46] 
Epoch 10/100: 100%|██████████| 675/675 [01:12<00:00,  9.27batch/s, accuracy=39.3, loss=1.44] 


Early stopping



Epoch 1/100: 100%|██████████| 675/675 [01:12<00:00,  9.26batch/s, accuracy=16.9, loss=1.81] 
Epoch 2/100: 100%|██████████| 675/675 [01:12<00:00,  9.29batch/s, accuracy=26.5, loss=1.67] 
Epoch 3/100: 100%|██████████| 675/675 [01:12<00:00,  9.30batch/s, accuracy=31.7, loss=1.57] 
Epoch 4/100: 100%|██████████| 675/675 [01:12<00:00,  9.29batch/s, accuracy=34.1, loss=1.54] 
Epoch 5/100: 100%|██████████| 675/675 [01:12<00:00,  9.32batch/s, accuracy=35.8, loss=1.51] 
Epoch 6/100: 100%|██████████| 675/675 [01:12<00:00,  9.30batch/s, accuracy=36.9, loss=1.49] 
Epoch 7/100: 100%|██████████| 675/675 [01:12<00:00,  9.32batch/s, accuracy=37.7, loss=1.48] 
Epoch 8/100: 100%|██████████| 675/675 [01:12<00:00,  9.31batch/s, accuracy=39.1, loss=1.45] 
Epoch 9/100: 100%|██████████| 675/675 [01:12<00:00,  9.33batch/s, accuracy=39.8, loss=1.43] 
Epoch 10/100: 100%|██████████| 675/675 [01:12<00:00,  9.30batch/s, accuracy=40.8, loss=1.41] 
Epoch 11/100: 100%|██████████| 675/675 [01:12<00:00,  9.33batch/s, ac

Early stopping



Epoch 1/100: 100%|██████████| 675/675 [01:13<00:00,  9.18batch/s, accuracy=19.2, loss=1.78] 
Epoch 2/100: 100%|██████████| 675/675 [01:13<00:00,  9.22batch/s, accuracy=30, loss=1.61]   
Epoch 3/100: 100%|██████████| 675/675 [01:12<00:00,  9.25batch/s, accuracy=32.7, loss=1.56] 
Epoch 4/100: 100%|██████████| 675/675 [01:12<00:00,  9.26batch/s, accuracy=34.2, loss=1.54] 
Epoch 5/100: 100%|██████████| 675/675 [01:12<00:00,  9.25batch/s, accuracy=35.2, loss=1.52] 
Epoch 6/100: 100%|██████████| 675/675 [01:13<00:00,  9.23batch/s, accuracy=36, loss=1.51]   
Epoch 7/100: 100%|██████████| 675/675 [01:12<00:00,  9.25batch/s, accuracy=36.4, loss=1.5]  
Epoch 8/100: 100%|██████████| 675/675 [01:13<00:00,  9.23batch/s, accuracy=37.5, loss=1.48] 
Epoch 9/100: 100%|██████████| 675/675 [01:12<00:00,  9.26batch/s, accuracy=38.4, loss=1.47] 
Epoch 10/100: 100%|██████████| 675/675 [01:13<00:00,  9.24batch/s, accuracy=39.3, loss=1.45] 
Epoch 11/100: 100%|██████████| 675/675 [01:12<00:00,  9.26batch/s, ac

Early stopping

=== Final Summary ===
Fold 1: Val = 51.33%, Test = 50.20%
Fold 2: Val = 39.39%, Test = 39.63%
Fold 3: Val = 36.12%, Test = 32.71%
Fold 4: Val = 37.99%, Test = 38.11%
Fold 5: Val = 37.87%, Test = 36.19%
Average Val Accuracy: 40.54%
Average Test Accuracy: 39.37%
