In [None]:
# 跨日期 ResNet18 循环SNR 6个设备
from joblib import load
import pandas as pd
import numpy as np
import os
from  data_utilities import *
import cv2  # OpenCV 用于调整图像大小和颜色处理
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import gc  # 引入垃圾回收模块
from tqdm.auto import tqdm  # 自动适配环境 导入tqdm进度条库
from collections import defaultdict

dataset_name = 'ManySig'
dataset_path='../ManySig.pkl/'

compact_dataset = load_compact_pkl_dataset(dataset_path,dataset_name)

print("数据集发射机数量：",len(compact_dataset['tx_list']),"具体为：",compact_dataset['tx_list'])
print("数据集接收机数量：",len(compact_dataset['rx_list']),"具体为：",compact_dataset['rx_list'])
print("数据集采集天数：",len(compact_dataset['capture_date_list']),"具体为：",compact_dataset['capture_date_list'])
tx_list = compact_dataset['tx_list']
rx_list = compact_dataset['rx_list']
capture_date_list = compact_dataset['capture_date_list']


n_tx = len(tx_list)
n_rx = len(rx_list)
print(n_tx,n_rx)


# 参数设置（将 block_size 循环下放到 main）
equalized = 0
max_sig = None          # 每个 TX-RX-日期最多使用的信号数

train_dates = ['2021_03_15']  # 训练日期（保持不变）
test_dates  = ['2021_03_01']  # 测试日期（保持不变）

# 交错采样粒度：每次从每个 RX 取 y_per_rx 条信号；建议固定，避免与 block_size 混淆
Y_PER_RX = 1

def build_dataset_for_block(block_size):
    """
    生成跨信号拼接样本：
      - 原始信号: (N, 256, 2)
      - 以 block_size 条信号组成 block: (block_size, 256, 2)
      - 转置后拆成 256 个样本: (block_size, 2)
    """
    y = min(Y_PER_RX, block_size)
    X_tr, y_tr, X_te, y_te = preprocess_dataset_cross_IQ_blocks_single_date_per_rx_cyclic(
        compact_dataset=compact_dataset,
        tx_list=tx_list,
        train_dates=train_dates,
        test_dates=test_dates,
        max_sig=max_sig,
        equalized=equalized,
        block_size=block_size,
        y=y
    )
    return X_tr, y_tr, X_te, y_te

# ---------- PyTorch & 训练配置 ----------
import torch
torch._dynamo.disable()
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, Subset
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
import seaborn as sns
from datetime import datetime
import pywt  # 保留，若需扩展
import math

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

# 数据增强 / 信道设置（目前默认关闭）
FS = 20e6
FC = 2.4e9
SNR_DB = 20
VELOCITY_KMH = 120
ADD_NOISE = True
ADD_DOPPLER = True

# 训练参数
BATCH_SIZE = 256   # 更大 batch 更稳定（你数据很多）
EPOCHS = 200
LR = 1e-3
WEIGHT_DECAY = 1e-4
N_SPLITS = 5
PATIENCE = 5
MIN_DELTA = 0.1   # early stopping 的最小进步（%）

SAVE_ROOT = "./training_results"
os.makedirs(SAVE_ROOT, exist_ok=True)
script_name = "wisig_XFR"

# ---------- 可选：AWGN / Doppler（保留） ----------
def compute_doppler_shift(v_kmh, fc_hz):
    if not v_kmh:
        return 0.0
    c = 3e8
    v = v_kmh / 3.6
    return fc_hz * v / c

def add_complex_awgn(signal, snr_db):
    if snr_db is None:
        return signal
    power = np.mean(np.abs(signal)**2)
    noise_power = power / (10**(snr_db/10))
    noise_std = np.sqrt(noise_power/2)
    noise = noise_std * (np.random.randn(*signal.shape) + 1j*np.random.randn(*signal.shape))
    return signal + noise

def apply_doppler_shift(signal, fd_hz, fs_hz):
    if fd_hz is None or fd_hz == 0:
        return signal
    t = np.arange(len(signal)) / fs_hz
    return signal * np.exp(1j * 2 * np.pi * fd_hz * t)

# ---------- 数据预处理：针对交错样本，用 per-sample 标准化----------
def preprocess_for_pointcloud_cnn(data_real_imag, add_noise=False, snr_db=None,
                                  add_doppler=False, fd_hz=None, fs_hz=FS):
    """
    输入 data_real_imag: np.array [N, L, 2] (I,Q)
    输出 torch.tensor [N, L, 2] (float32)，每个样本做 zero-mean unit-std 标准化（按样本）
    """
    data = data_real_imag.astype(np.float32).copy()
    N, L, C = data.shape
    out = np.empty_like(data, dtype=np.float32)
    for i in range(N):
        iq = data[i]  # (L,2)
        # 组合成 complex
        sigc = iq[...,0] + 1j * iq[...,1]

        # step1 归一化功率
        sigc = sigc / (np.sqrt(np.mean(np.abs(sigc)**2)) + 1e-12)

        # step2 可选多普勒
        if add_doppler:
            sigc = apply_doppler_shift(sigc, fd_hz, fs_hz)

        # step3 可选噪声
        if add_noise:
            sigc = add_complex_awgn(sigc, snr_db)

        # 转回 IQ
        iq = np.stack([np.real(sigc), np.imag(sigc)], axis=-1).astype(np.float32)

        # per-sample 标准化
        mu = iq.mean(axis=(0,))
        sigma = iq.std(axis=(0,))
        sigma[sigma < 1e-8] = 1.0
        iq_norm = (iq - mu) / sigma
        out[i] = iq_norm

    return torch.tensor(out, dtype=torch.float32)


# ---------- 新的 RF1DCNN ----------
class ResidualBlock1D(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=5, stride=1):
        super().__init__()
        padding = kernel_size // 2
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, stride, padding, bias=False)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, 1, padding, bias=False)
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.downsample = None
        if in_channels != out_channels or stride != 1:
            self.downsample = nn.Sequential(
                nn.Conv1d(in_channels, out_channels, 1, stride, bias=False),
                nn.BatchNorm1d(out_channels)
            )
    def forward(self, x):
        identity = x
        out = self.conv1(x); out = self.bn1(out); out = self.relu(out)
        out = self.conv2(out); out = self.bn2(out)
        if self.downsample is not None:
            identity = self.downsample(identity)
        out += identity
        out = self.relu(out)
        return out

class RF1DCNN(nn.Module):
    def __init__(self, num_classes, dropout=0.3, input_length=256):
        super().__init__()
        self.layer1 = ResidualBlock1D(2, 32, kernel_size=7); self.pool1 = nn.MaxPool1d(2)
        self.layer2 = ResidualBlock1D(32, 64, kernel_size=5); self.pool2 = nn.MaxPool1d(2)
        self.layer3 = ResidualBlock1D(64, 128, kernel_size=5); self.pool3 = nn.MaxPool1d(2)
        self.layer4 = ResidualBlock1D(128, 256, kernel_size=3); self.pool4 = nn.MaxPool1d(2)

        # 动态计算 flatten 后长度
        L = input_length
        for _ in range(4):  # 4个 MaxPool1d(2)
            L = L // 2
        self.flattened_length = 256 * L

        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.flattened_length, 512),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = x.permute(0,2,1)  # [B,L,2] -> [B,2,L]
        x = self.layer1(x); x = self.pool1(x)
        x = self.layer2(x); x = self.pool2(x)
        x = self.layer3(x); x = self.pool3(x)
        x = self.layer4(x); x = self.pool4(x)
        return self.fc(x)


# ---------- 辅助绘图/评估函数 ----------
def evaluate_model(model, dataloader, device, num_classes):
    model.eval()
    correct, total = 0, 0
    all_labels, all_preds = [], []
    with torch.no_grad():
        for xb, yb in dataloader:
            xb, yb = xb.to(device), yb.to(device)
            out = model(xb)
            _, p = torch.max(out, 1)
            correct += (p == yb).sum().item()
            total += yb.size(0)
            all_labels.extend(yb.cpu().numpy())
            all_preds.extend(p.cpu().numpy())
    acc = 100.0 * correct / total if total>0 else 0.0
    cm = confusion_matrix(all_labels, all_preds, labels=list(range(num_classes)))
    return acc, cm

def plot_confusion_matrix(cm, classes, fold, save_folder, dataset_type='Test'):
    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'{dataset_type} Confusion Matrix Fold{fold}')
    plt.ylabel('True')
    plt.xlabel('Predicted')
    plt.savefig(os.path.join(save_folder, f'{dataset_type.lower()}_cm_fold{fold}.png'))
    plt.close()

def plot_curves(train_losses, val_losses, train_acc, val_acc, fold, save_folder):
    plt.figure(); plt.plot(train_losses, label='Train Loss'); plt.plot(val_losses, label='Val Loss')
    plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.title(f'Fold {fold} Loss'); plt.legend(); plt.grid(True)
    plt.savefig(os.path.join(save_folder, f'loss_fold{fold}.png')); plt.close()
    plt.figure(); plt.plot(train_acc, label='Train Acc'); plt.plot(val_acc, label='Val Acc')
    plt.xlabel('Epoch'); plt.ylabel('Accuracy (%)'); plt.title(f'Fold {fold} Accuracy'); plt.legend(); plt.grid(True)
    plt.savefig(os.path.join(save_folder, f'acc_fold{fold}.png')); plt.close()

# ---------- K-Fold 训练主函数（使用 RF1DCNN） ----------
def train_kfold_pointcloud(X_train, y_train, X_test, y_test, num_classes, device=DEVICE,
                           batch_size=BATCH_SIZE, epochs=EPOCHS, lr=LR, weight_decay=WEIGHT_DECAY,
                           n_splits=N_SPLITS, patience=PATIENCE, min_delta=MIN_DELTA,
                           input_length=None,
                           script_name=script_name):
    if input_length is None:
        input_length = int(X_train.shape[1])
    fd = int(compute_doppler_shift(VELOCITY_KMH, FC))
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    save_dir = f"{timestamp}_{script_name}_block{input_length}_SNR{SNR_DB}dB_fd{fd}_classes_{num_classes}_ResNet"
    save_folder = os.path.join(SAVE_ROOT, save_dir)
    os.makedirs(save_folder, exist_ok=True)
    results_file = os.path.join(save_folder, "results.txt")

    # 保存参数
    with open(results_file, 'w') as f:
        f.write("=== Experiment Parameters ===\n")
        f.write(f"Timestamp: {timestamp}\n")
        f.write(f"SNR_dB: {SNR_DB}, ADD_NOISE: {ADD_NOISE}, ADD_DOPPLER: {ADD_DOPPLER}\n")
        f.write(f"FS: {FS}, FC: {FC}, Velocity_kmh: {VELOCITY_KMH}\n")
        f.write(f"Batch: {batch_size}, Epochs: {epochs}, LR: {lr}, WD: {weight_decay}\n")
        f.write(f"Num classes: {num_classes}, K-Fold: {n_splits}, Patience: {patience}, MinDelta: {min_delta}\n")
        f.write("============================\n\n")

    # test loader
    test_dataset = TensorDataset(X_test, y_test)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

    # full dataset for K-Fold
    full_dataset = TensorDataset(X_train, y_train)
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    indices = np.arange(len(full_dataset))

    val_scores, test_scores = [], []

    for fold, (tr_idx, va_idx) in enumerate(kf.split(indices)):
        print(f"\n=== Fold {fold+1}/{n_splits} ===")
        tr_sub = Subset(full_dataset, tr_idx)
        va_sub = Subset(full_dataset, va_idx)
        tr_loader = DataLoader(tr_sub, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
        va_loader = DataLoader(va_sub, batch_size=batch_size, shuffle=False, num_workers=2, pin_memory=True)

        model = RF1DCNN(num_classes=num_classes, dropout=0.3, input_length=input_length).to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

        best_val = 0.0
        best_wts = None
        patience_cnt = 0

        train_losses, val_losses, train_accs, val_accs = [], [], [], []
        avg_grad_list = []

        for epoch in range(epochs):
            model.train()
            running_loss, correct, total = 0.0, 0, 0
            total_grad, cnt_grad = 0.0, 0
            for xb, yb in tr_loader:
                xb = xb.to(device); yb = yb.to(device)
                optimizer.zero_grad()
                out = model(xb)
                loss = criterion(out, yb)
                loss.backward()
                # grad norm
                grad_norms = [p.grad.norm().item() for p in model.parameters() if p.grad is not None]
                if grad_norms:
                    total_grad += np.mean(grad_norms); cnt_grad += 1
                optimizer.step()
                running_loss += loss.item()
                _, p = torch.max(out, 1)
                correct += (p == yb).sum().item()
                total += yb.size(0)
            avg_grad = total_grad / max(cnt_grad, 1)
            avg_grad_list.append(avg_grad)

            train_loss = running_loss / max(1, len(tr_loader))
            train_acc = 100.0 * correct / max(1, total)
            train_losses.append(train_loss); train_accs.append(train_acc)

            # validation
            model.eval()
            vloss, vcorrect, vtotal = 0.0, 0, 0
            all_labels, all_preds = [], []
            with torch.no_grad():
                for xb, yb in va_loader:
                    xb = xb.to(device); yb = yb.to(device)
                    out = model(xb)
                    loss = criterion(out, yb)
                    vloss += loss.item()
                    _, p = torch.max(out, 1)
                    vcorrect += (p == yb).sum().item()
                    vtotal += yb.size(0)
                    all_labels.extend(yb.cpu().numpy()); all_preds.extend(p.cpu().numpy())
            val_loss = vloss / max(1, len(va_loader))
            val_acc = 100.0 * vcorrect / max(1, vtotal)
            val_losses.append(val_loss); val_accs.append(val_acc)
            val_cm = confusion_matrix(all_labels, all_preds, labels=list(range(num_classes)))
            np.save(os.path.join(save_folder, f'val_cm_fold{fold+1}.npy'), val_cm)

            print(f"Epoch {epoch+1}/{epochs} | TrainAcc={train_acc:.2f}% | ValAcc={val_acc:.2f}% | "
                  f"TrainLoss={train_loss:.4f} | ValLoss={val_loss:.4f} | AvgGrad={avg_grad:.4f}")
            with open(results_file, 'a') as f:
                f.write(f"Fold{fold+1} Epoch{epoch+1} | TrainAcc={train_acc:.2f}% | ValAcc={val_acc:.2f}% | "
                        f"TrainLoss={train_loss:.4f} | ValLoss={val_loss:.4f} | AvgGrad={avg_grad:.4f}\n")

            # Early stopping on validation accuracy with min_delta (percentage points)
            if val_acc > best_val + min_delta:
                best_val = val_acc
                best_wts = model.state_dict()
                patience_cnt = 0
            else:
                patience_cnt += 1
                if patience_cnt >= patience:
                    print("Early stopping.")
                    break

            scheduler.step()

        # restore best
        if best_wts is not None:
            model.load_state_dict(best_wts)

        # 保存 train/val 混淆矩阵
        train_acc, train_cm = evaluate_model(model, tr_loader, device, num_classes)
        np.save(os.path.join(save_folder, f'train_cm_fold{fold+1}.npy'), train_cm)
        plot_confusion_matrix(train_cm, classes=list(range(num_classes)), fold=fold+1, save_folder=save_folder, dataset_type='Train')

        val_acc, val_cm = evaluate_model(model, va_loader, device, num_classes)
        np.save(os.path.join(save_folder, f'val_cm_fold{fold+1}.npy'), val_cm)
        plot_confusion_matrix(val_cm, classes=list(range(num_classes)), fold=fold+1, save_folder=save_folder, dataset_type='Val')

        # test eval
        test_acc, test_cm = evaluate_model(model, test_loader, device, num_classes)
        np.save(os.path.join(save_folder, f'test_cm_fold{fold+1}.npy'), test_cm)
        plot_confusion_matrix(test_cm, classes=list(range(num_classes)), fold=fold+1, save_folder=save_folder, dataset_type='Test')

        print(f"Fold {fold+1} Test Accuracy: {test_acc:.2f}%")
        with open(results_file, 'a') as f:
            f.write(f"Fold{fold+1} TestAcc={test_acc:.2f}%\n")

        # 保存曲线 & 模型
        plot_curves(train_losses, val_losses, train_accs, val_accs, fold+1, save_folder)
        torch.save(model.state_dict(), os.path.join(save_folder, f'model_fold{fold+1}.pth'))

        val_scores.append(val_acc)
        test_scores.append(test_acc)

    # summary
    print("\n=== Overall Summary ===")
    print(f"Val Acc: {np.mean(val_scores):.2f} ± {np.std(val_scores):.2f}")
    print(f"Test Acc: {np.mean(test_scores):.2f} ± {np.std(test_scores):.2f}")
    with open(results_file, 'a') as f:
        f.write(f"\n=== Overall Summary ===\nVal Acc: {np.mean(val_scores):.2f} ± {np.std(val_scores):.2f}\nTest Acc: {np.mean(test_scores):.2f} ± {np.std(test_scores):.2f}\n")

    print(f"\nAll results saved in {save_folder}")
    return save_folder, float(np.mean(test_scores))

# ---------- 运行前的准备（预处理并转换为张量） ----------
# X_train, X_test 目前 shape: [N, 240, 2] (numpy)
# 我们对每个样本做 per-sample 标准化（zero-mean unit-std）
print("Preprocessing (per-sample normalization)...")

###########################################################
# === block_size Sweep: 固定 SNR=20 dB，观察 block_size 对准确率影响 ===
###########################################################

def run_experiment_with_block_size(block_size, snr_db=20):
    global SNR_DB, ADD_NOISE
    # 固定 20 dB
    SNR_DB = snr_db
    ADD_NOISE = True

    print("\n" + "="*70)
    print(f"Running experiment: block_size={block_size}, SNR={snr_db} dB")
    print("="*70)

    # 1) 重建数据集（block_size 变化会改变样本长度）
    X_train, y_train, X_test, y_test = build_dataset_for_block(block_size)
    if len(X_train)==0 or len(X_test)==0:
        print(f"[WARN] block_size={block_size} 可用样本不足，跳过。")
        return None

    # 2) 预处理（per-sample 标准化 + 可选 Doppler/AWGN）
    fd = int(compute_doppler_shift(VELOCITY_KMH, FC)) if ADD_DOPPLER else 0
    X_train_torch = preprocess_for_pointcloud_cnn(
        X_train, add_noise=True, snr_db=snr_db, add_doppler=ADD_DOPPLER, fd_hz=fd, fs_hz=FS
    )
    X_test_torch  = preprocess_for_pointcloud_cnn(
        X_test,  add_noise=True, snr_db=snr_db, add_doppler=ADD_DOPPLER, fd_hz=fd, fs_hz=FS
    )
    y_train_torch = torch.tensor(y_train, dtype=torch.long)
    y_test_torch  = torch.tensor(y_test,  dtype=torch.long)
    num_classes = len(torch.unique(y_train_torch))

    # 3) 训练 + 测试（K-Fold on train set）
    save_folder, mean_test_acc = train_kfold_pointcloud(
        X_train_torch, y_train_torch, X_test_torch, y_test_torch,
        num_classes=num_classes,
        input_length=block_size,
        script_name=f"{script_name}_Block"
    )
    print(f"Finished block_size={block_size}: mean test acc={mean_test_acc:.2f}% → {save_folder}")
    return mean_test_acc, save_folder

if __name__ == "__main__":
    # 注意：RF1DCNN 内含 4 次 MaxPool1d(2)，因此 block_size 至少 >= 16
    block_sizes = list(range(24, 480, 12))
    accs = []
    folders = []
    used_bs = []

    for bs in block_sizes:
        out = run_experiment_with_block_size(bs, snr_db=20)
        if out is None:
            continue
        acc, folder = out
        used_bs.append(bs)
        accs.append(acc)
        folders.append(folder)

    print("\n\n================ FINAL SUMMARY ================")
    for bs, acc, folder in zip(used_bs, accs, folders):
        print(f"block_size {bs:>4} → TestAcc(mean)={acc:6.2f}% → {folder}")
    print("=============================================")

    # 保存汇总结果
    if len(used_bs) > 0:
        os.makedirs(SAVE_ROOT, exist_ok=True)
        summary_path = os.path.join(SAVE_ROOT, "block_size_sweep_summary.csv")
        import csv
        with open(summary_path, "w", newline="") as f:
            w = csv.writer(f)
            w.writerow(["block_size", "mean_test_acc", "result_folder"])
            for bs, acc, folder in zip(used_bs, accs, folders):
                w.writerow([bs, acc, folder])
        print(f"Summary CSV saved: {summary_path}")

        # 绘制曲线（可选）
        plt.figure(figsize=(8,5))
        plt.plot(used_bs, accs, marker="o")
        plt.xlabel("block_size")
        plt.ylabel("Mean Test Accuracy (%)")
        plt.title("block_size vs accuracy (SNR=20 dB)")
        plt.grid(True)
        fig_path = os.path.join(SAVE_ROOT, "wisig_block_size_vs_accuracy.png")
        plt.savefig(fig_path)
        plt.close()
        print(f"Curve saved: {fig_path}")


  from .autonotebook import tqdm as notebook_tqdm


数据集发射机数量： 6 具体为： ['14-10', '14-7', '20-15', '20-19', '6-15', '8-20']
数据集接收机数量： 12 具体为： ['1-1', '1-19', '14-7', '18-2', '19-2', '2-1', '2-19', '20-1', '3-19', '7-14', '7-7', '8-8']
数据集采集天数： 4 具体为： ['2021_03_01', '2021_03_08', '2021_03_15', '2021_03_23']
6 12
Preprocessing (per-sample normalization)...

Running experiment: block_size=24, SNR=20 dB

=== Fold 1/5 ===
Epoch 1/200 | TrainAcc=93.60% | ValAcc=96.11% | TrainLoss=0.1780 | ValLoss=0.1110 | AvgGrad=0.0916
Epoch 2/200 | TrainAcc=96.41% | ValAcc=96.46% | TrainLoss=0.1037 | ValLoss=0.1032 | AvgGrad=0.0590
Epoch 3/200 | TrainAcc=97.05% | ValAcc=96.85% | TrainLoss=0.0861 | ValLoss=0.0932 | AvgGrad=0.0507
Epoch 4/200 | TrainAcc=97.47% | ValAcc=97.30% | TrainLoss=0.0742 | ValLoss=0.0810 | AvgGrad=0.0466
Epoch 5/200 | TrainAcc=97.81% | ValAcc=97.22% | TrainLoss=0.0643 | ValLoss=0.0867 | AvgGrad=0.0445
Epoch 6/200 | TrainAcc=98.07% | ValAcc=97.47% | TrainLoss=0.0564 | ValLoss=0.0779 | AvgGrad=0.0432
Epoch 7/200 | TrainAcc=98.30% | ValAcc=9

: 