In [None]:
from joblib import load
import pandas as pd
import numpy as np
import os
from  data_utilities import *
import cv2  # OpenCV 用于调整图像大小和颜色处理
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import gc  # 引入垃圾回收模块
from tqdm.auto import tqdm  # 自动适配环境 导入tqdm进度条库
from collections import defaultdict

dataset_name = 'ManyTx'
dataset_path='../ManyTx.pkl/'

compact_dataset = load_compact_pkl_dataset(dataset_path,dataset_name)

print("数据集发射机数量：",len(compact_dataset['tx_list']),"具体为：",compact_dataset['tx_list'])
print("数据集接收机数量：",len(compact_dataset['rx_list']),"具体为：",compact_dataset['rx_list'])
print("数据集采集天数：",len(compact_dataset['capture_date_list']),"具体为：",compact_dataset['capture_date_list'])


tx_list = compact_dataset['tx_list']
rx_list = compact_dataset['rx_list']
equalized = 0
capture_date_list = compact_dataset['capture_date_list']


n_tx = len(tx_list)
n_rx = len(rx_list)
print(n_tx,n_rx)


# 参数设置
max_sig = None          # 每个 TX-RX-日期最多使用的信号数
block_size = 240        # 每个 block 的信号数
y = 5                  # 拼接时每组多少条信号
test_ratio = 0.25        # 测试集比例

# 调用函数
X_train, y_train, X_test, y_test = preprocess_dataset_cross_IQ_blocks_all_mix_random(
    compact_dataset=compact_dataset,
    tx_list=tx_list,
    max_sig=max_sig,
    equalized=equalized,
    block_size=block_size,
    y=y,
    test_ratio=test_ratio,
    seed=42
)

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test  shape:", X_test.shape) 
print("y_test  shape:", y_test.shape)

import os
import numpy as np
import torch
torch._dynamo.disable()
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader, Subset
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from tqdm.auto import tqdm
import pywt

# ====================== 参数设置 ======================
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

# 数据处理参数
USE_LOG = True
WAVELET = 'db6'
WAVELET_LEVEL = 6
FS = 20e6
FC = 2.4e9
SNR_DB = 20          # None 或具体数值
VELOCITY_KMH = 120     # None 或具体数值
ADD_NOISE = True
ADD_DOPPLER = True

# 训练参数
BATCH_SIZE = 64
EPOCHS = 200
LR = 1e-3
WEIGHT_DECAY = 1e-4
N_SPLITS = 5
PATIENCE = 8

# 保存路径
SAVE_ROOT = "./training_results"
os.makedirs(SAVE_ROOT, exist_ok=True)

# ====================== 数据处理函数 ======================
def compute_doppler_shift(v_kmh, fc_hz):
    if not v_kmh: return 0
    c = 3e8
    v_mps = v_kmh / 3.6
    return fc_hz * v_mps / c

def add_complex_awgn(signal, snr_db):
    """
    为复数信号添加AWGN噪声
    """
    # 计算信号功率
    signal_power = np.mean(np.abs(signal) ** 2)
    
    # 计算噪声功率
    snr_linear = 10 ** (snr_db / 10)
    noise_power = signal_power / snr_linear
    
    # 生成复数噪声（实部和虚部独立，各占一半功率）
    noise_std = np.sqrt(noise_power / 2)
    noise_real = np.random.normal(0, noise_std, signal.shape)
    noise_imag = np.random.normal(0, noise_std, signal.shape)
    noise = noise_real + 1j * noise_imag
    
    return signal + noise

def apply_doppler_shift(signal, fd_hz, fs_hz):
    if fd_hz is None or fd_hz == 0:
        return signal
    t = np.arange(len(signal)) / fs_hz
    return signal * np.exp(1j * 2 * np.pi * fd_hz * t)

def process_signal_led_rff(sig_complex, use_log=False, wavelet='db6', level=6):
    amp = np.abs(sig_complex)   # 幅度谱

    # --------------------
    # 1. 可选 log 处理
    # --------------------
    if use_log:
        amp = np.log(amp + 1e-8)

    # --------------------
    # 2. 小波分解 + 最高频置零 + 重构
    # --------------------
    coeffs = pywt.wavedec(amp, wavelet, level=level)

    # 置零最高频（最高 detail 层）
    coeffs[-1] = np.zeros_like(coeffs[-1])

    rec = pywt.waverec(coeffs, wavelet)
    rec = rec[:len(amp)]

    # --------------------
    # 3. 归一化
    # --------------------
    mu, sigma = rec.mean(), rec.std()
    if sigma < 1e-8:
        feat = (rec - mu).astype(np.float32)
    else:
        feat = ((rec - mu) / (sigma + 1e-8)).astype(np.float32)

    return feat


def preprocess_iq_dataset_led_rff(data_real_imag, snr_db=SNR_DB, velocity_kmh=VELOCITY_KMH,
                                  fc_hz=FC, fs_hz=FS, use_log=USE_LOG, wavelet=WAVELET,
                                  level=WAVELET_LEVEL, add_noise=ADD_NOISE, add_doppler=ADD_DOPPLER):
    num_samples, sig_len, _ = data_real_imag.shape
    processed_feats = []

    data_complex = data_real_imag[...,0] + 1j*data_real_imag[...,1]
    fd_hz = compute_doppler_shift(velocity_kmh, fc_hz) if add_doppler else None

    for i in range(num_samples):
        sig = data_complex[i]
        if add_noise: sig = add_complex_awgn(sig, snr_db)
        if add_doppler: sig = apply_doppler_shift(sig, fd_hz, fs_hz)
        feat = process_signal_led_rff(sig, use_log=use_log, wavelet=wavelet, level=level)
        processed_feats.append(feat)

    processed_feats = np.stack(processed_feats, axis=0)
    return torch.tensor(processed_feats, dtype=torch.float32)[:, None, :]


# ====================== InceptionTime 模型 ======================
class InceptionBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        bottleneck_channels = max(1, out_channels // 4)
        self.bottleneck = nn.Conv1d(in_channels, bottleneck_channels, kernel_size=1, bias=False)
        self.conv1 = nn.Conv1d(bottleneck_channels, out_channels, kernel_size=10, padding=5)
        self.conv2 = nn.Conv1d(bottleneck_channels, out_channels, kernel_size=20, padding=10)
        self.conv3 = nn.Conv1d(bottleneck_channels, out_channels, kernel_size=40, padding=20)
        self.maxpool = nn.MaxPool1d(3, stride=1, padding=1)
        self.convpool = nn.Conv1d(bottleneck_channels, out_channels, kernel_size=1, bias=False)
        self.bn = nn.BatchNorm1d(4*out_channels)
        self.relu = nn.ReLU()
    def forward(self, x):
        x_b = self.bottleneck(x)
        c1 = self.conv1(x_b)
        c2 = self.conv2(x_b)
        c3 = self.conv3(x_b)
        c4 = self.convpool(self.maxpool(x_b))
        min_len = min(c1.shape[-1], c2.shape[-1], c3.shape[-1], c4.shape[-1])
        c1=c1[...,:min_len]; c2=c2[...,:min_len]; c3=c3[...,:min_len]; c4=c4[...,:min_len]
        out = torch.cat([c1,c2,c3,c4], dim=1)
        return self.relu(self.bn(out))

class InceptionTime(nn.Module):
    def __init__(self, num_classes, in_channels=1, channels=32):
        super().__init__()
        self.b1 = InceptionBlock(in_channels, channels)
        self.b2 = InceptionBlock(4*channels, channels)
        self.b3 = InceptionBlock(4*channels, channels)
        self.gap = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(4*channels, num_classes)
    def forward(self, x):
        if x.shape[-1] % 2 == 1: x = x[...,:-1]
        x = self.b1(x); x = self.b2(x); x = self.b3(x)
        x = self.gap(x).squeeze(-1)
        return self.fc(x)

# ====================== 工具函数 ======================
def evaluate_model(model, dataloader, device, num_classes):
    model.eval()
    correct, total = 0, 0
    all_labels, all_preds = [], []
    with torch.no_grad():
        for xb, yb in dataloader:
            xb, yb = xb.to(device), yb.to(device)
            out = model(xb)
            _, p = torch.max(out, 1)
            correct += (p == yb).sum().item()
            total += yb.size(0)
            all_labels.extend(yb.cpu().numpy())
            all_preds.extend(p.cpu().numpy())
    acc = 100.0 * correct / total
    cm = confusion_matrix(all_labels, all_preds, labels=list(range(num_classes)))
    return acc, cm

def plot_confusion_matrix(cm, classes, fold, save_folder, dataset_type='Test'):
    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'{dataset_type} Confusion Matrix Fold{fold}')
    plt.ylabel('True')
    plt.xlabel('Predicted')
    plt.savefig(os.path.join(save_folder,f'{dataset_type.lower()}_cm_fold{fold}.png'))
    plt.close()

def plot_curves(train_losses, val_losses, train_acc, val_acc, fold, save_folder):
    plt.figure(); plt.plot(train_losses,label='Train Loss'); plt.plot(val_losses,label='Val Loss')
    plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.title(f'Fold {fold} Loss'); plt.legend(); plt.grid(True)
    plt.savefig(os.path.join(save_folder,f'loss_fold{fold}.png')); plt.close()
    plt.figure(); plt.plot(train_acc,label='Train Acc'); plt.plot(val_acc,label='Val Acc')
    plt.xlabel('Epoch'); plt.ylabel('Accuracy (%)'); plt.title(f'Fold {fold} Accuracy'); plt.legend(); plt.grid(True)
    plt.savefig(os.path.join(save_folder,f'acc_fold{fold}.png')); plt.close()

# ====================== KFold 训练（带参数保存 + 混淆矩阵 + 曲线 + 平均梯度范数） ======================
def train_kfold(X_train, y_train, X_test, y_test, num_classes, device=DEVICE, 
                     snr_db=SNR_DB, velocity_kmh=VELOCITY_KMH, fc_hz=FC, fs_hz=FS,
                     wavelet=WAVELET, wavelet_level=WAVELET_LEVEL,
                     batch_size=BATCH_SIZE, epochs=EPOCHS, lr=LR, weight_decay=WEIGHT_DECAY,
                     n_splits=N_SPLITS, patience=PATIENCE):

    fd_hz = compute_doppler_shift(velocity_kmh, fc_hz)
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    script_name='wisig_LED'
    save_dir = f"{timestamp}_{script_name}_SNR{snr_db}dB_fd{int(fd_hz)}_classes_{num_classes}_CNN"
    save_folder = os.path.join(SAVE_ROOT, save_dir)
    os.makedirs(save_folder, exist_ok=True)
    results_file = os.path.join(save_folder,"results.txt")

    # 保存实验参数
    with open(results_file,'w') as f:
        f.write("=== Experiment Parameters ===\n")
        f.write(f"Timestamp: {timestamp}\n")
        f.write(f"Device: {device}\n")
        f.write(f"SNR_dB: {snr_db}\nDoppler_fd: {fd_hz:.2f} Hz\nFS: {fs_hz}\nFC: {fc_hz}\n")
        f.write(f"Wavelet: {wavelet}, Level: {wavelet_level}\n")
        f.write(f"Batch size: {batch_size}, Epochs: {epochs}, LR: {lr}, Weight decay: {weight_decay}\n")
        f.write(f"K-Fold: {n_splits}, Patience: {patience}, Num classes: {num_classes}\n")
        f.write("============================\n\n")

    full_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
    indices = np.arange(len(full_dataset))

    val_scores, test_scores = [], []

    for fold, (tr_idx, va_idx) in enumerate(kf.split(indices)):
        print(f"\n=== Fold {fold+1}/{n_splits} ===")
        tr_sub, va_sub = Subset(full_dataset, tr_idx), Subset(full_dataset, va_idx)
        tr_loader = DataLoader(tr_sub, batch_size=batch_size, shuffle=True)
        va_loader = DataLoader(va_sub, batch_size=batch_size, shuffle=False)

        model = InceptionTime(num_classes=num_classes, in_channels=X_train.shape[1]).to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

        best_val, best_wts, patience_cnt = 0.0, None, 0
        train_losses, val_losses, train_acc_list, val_acc_list, avg_grad_list = [], [], [], [], []

        for epoch in range(epochs):
            model.train(); running_loss, correct, total = 0.0,0,0
            total_grad = 0.0; count_grad = 0
            for xb, yb in tr_loader:
                xb, yb = xb.to(device), yb.to(device)
                optimizer.zero_grad()
                out = model(xb)
                loss = criterion(out, yb)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()
                _, p = torch.max(out,1)
                correct += (p==yb).sum().item()
                total += yb.size(0)
                # 平均梯度范数
                grad_norms = [p.grad.norm().item() for p in model.parameters() if p.grad is not None]
                if grad_norms:
                    total_grad += np.mean(grad_norms)
                    count_grad += 1
            avg_grad = total_grad / max(count_grad,1)
            avg_grad_list.append(avg_grad)

            train_loss = running_loss / len(tr_loader)
            train_acc = 100.0*correct/total
            train_losses.append(train_loss)
            train_acc_list.append(train_acc)

            # Validation
            model.eval(); vloss,vcorrect,vtotal=0.0,0,0
            with torch.no_grad():
                all_labels, all_preds = [], []
                for xb,yb in va_loader:
                    xb,yb = xb.to(device), yb.to(device)
                    out = model(xb)
                    loss = criterion(out,yb)
                    vloss += loss.item()
                    _,p = torch.max(out,1)
                    vcorrect += (p==yb).sum().item()
                    vtotal += yb.size(0)
                    all_labels.extend(yb.cpu().numpy())
                    all_preds.extend(p.cpu().numpy())
            val_loss = vloss / len(va_loader)
            val_acc = 100.0*vcorrect/vtotal
            val_losses.append(val_loss)
            val_acc_list.append(val_acc)
            val_cm = confusion_matrix(all_labels, all_preds, labels=list(range(num_classes)))
            np.save(os.path.join(save_folder,f'val_cm_fold{fold+1}.npy'), val_cm)

            print(f"Epoch {epoch+1}/{epochs} | TrainAcc={train_acc:.2f}% | ValAcc={val_acc:.2f}% | "
                  f"TrainLoss={train_loss:.4f} | ValLoss={val_loss:.4f} | AvgGrad={avg_grad:.4f}")
            with open(results_file,'a') as f:
                f.write(f"Fold{fold+1} Epoch{epoch+1} | TrainAcc={train_acc:.2f}% | ValAcc={val_acc:.2f}% | "
                        f"TrainLoss={train_loss:.4f} | ValLoss={val_loss:.4f} | AvgGrad={avg_grad:.4f}\n")

            # Early stopping
            if val_acc > best_val + 0.01:
                best_val = val_acc
                best_wts = model.state_dict()
                patience_cnt = 0
            else:
                patience_cnt += 1
                if patience_cnt >= patience:
                    print("Early stopping.")
                    break
            scheduler.step()

        if best_wts is not None:
            model.load_state_dict(best_wts)

        # Train/Val confusion matrices
        train_acc, train_cm = evaluate_model(model, tr_loader, device, num_classes)
        np.save(os.path.join(save_folder,f'train_cm_fold{fold+1}.npy'), train_cm)
        plot_confusion_matrix(train_cm, classes=list(range(num_classes)), fold=fold+1, save_folder=save_folder, dataset_type='Train')

        val_acc, val_cm = evaluate_model(model, va_loader, device, num_classes)
        np.save(os.path.join(save_folder,f'val_cm_fold{fold+1}.npy'), val_cm)
        plot_confusion_matrix(val_cm, classes=list(range(num_classes)), fold=fold+1, save_folder=save_folder, dataset_type='Val')

        # Test evaluation
        test_acc, test_cm = evaluate_model(model, test_loader, device, num_classes)
        np.save(os.path.join(save_folder,f'test_cm_fold{fold+1}.npy'), test_cm)
        plot_confusion_matrix(test_cm, classes=list(range(num_classes)), fold=fold+1, save_folder=save_folder, dataset_type='Test')
        with open(results_file,'a') as f:
            f.write(f"Fold{fold+1} TestAcc={test_acc:.2f}%\n")
        print(f"Fold {fold+1} Test Accuracy: {test_acc:.2f}%")

        # 绘制训练曲线
        plot_curves(train_losses, val_losses, train_acc_list, val_acc_list, fold+1, save_folder)

        # 保存模型
        torch.save(model.state_dict(), os.path.join(save_folder,f'model_fold{fold+1}.pth'))

        val_scores.append(val_acc)
        test_scores.append(test_acc)

    # 总结
    print("\n=== Overall Summary ===")
    print(f"Val Acc: {np.mean(val_scores):.2f} ± {np.std(val_scores):.2f}")
    print(f"Test Acc: {np.mean(test_scores):.2f} ± {np.std(test_scores):.2f}")
    with open(results_file, 'a') as f:
        f.write(f"\n=== Overall Summary ===\nVal Acc: {np.mean(val_scores):.2f} ± {np.std(val_scores):.2f}\nTest Acc: {np.mean(test_scores):.2f} ± {np.std(test_scores):.2f}\n")
    
    print(f"\nAll results saved in {save_folder}")
    return save_folder


# ====================== 使用示例 ======================
# 假设 IQ 数据 shape=[num_samples, length, 2]，y=[num_samples]
X_train_proc = preprocess_iq_dataset_led_rff(X_train)
X_test_proc  = preprocess_iq_dataset_led_rff(X_test)
y_train_torch = torch.tensor(y_train, dtype=torch.long)
y_test_torch  = torch.tensor(y_test, dtype=torch.long)
num_classes = len(np.unique(y_train_torch))
save_folder = train_kfold(X_train_proc, y_train_torch, X_test_proc, y_test_torch, num_classes)


数据集发射机数量： 150 具体为： ['1-1', '1-10', '1-11', '1-12', '1-14', '1-15', '1-16', '1-18', '1-19', '1-2', '1-8', '10-1', '10-10', '10-11', '10-17', '10-4', '10-7', '11-1', '11-10', '11-17', '11-19', '11-20', '11-4', '11-7', '12-1', '12-19', '12-20', '12-7', '13-14', '13-18', '13-19', '13-20', '13-3', '13-7', '14-10', '14-11', '14-12', '14-13', '14-14', '14-20', '14-7', '14-8', '14-9', '15-1', '15-19', '15-6', '16-1', '16-16', '16-19', '16-20', '16-5', '17-10', '17-11', '18-1', '18-10', '18-11', '18-12', '18-13', '18-14', '18-15', '18-16', '18-17', '18-2', '18-20', '18-4', '18-5', '18-7', '18-8', '18-9', '19-1', '19-10', '19-11', '19-12', '19-13', '19-14', '19-19', '19-2', '19-20', '19-3', '19-4', '19-6', '19-7', '19-8', '19-9', '2-1', '2-12', '2-13', '2-14', '2-15', '2-16', '2-17', '2-19', '2-20', '2-3', '2-4', '2-5', '2-6', '2-7', '2-8', '20-1', '20-12', '20-14', '20-15', '20-16', '20-18', '20-19', '20-20', '20-3', '20-4', '20-5', '20-7', '20-8', '3-1', '3-13', '3-18', '3-19', '3-2', '3-20', 




=== Fold 1/5 ===
Epoch 1/200 | TrainAcc=3.45% | ValAcc=4.14% | TrainLoss=4.6977 | ValLoss=4.6715 | AvgGrad=0.0823
Epoch 2/200 | TrainAcc=7.46% | ValAcc=3.66% | TrainLoss=4.4545 | ValLoss=5.1885 | AvgGrad=0.1603
Epoch 3/200 | TrainAcc=11.32% | ValAcc=7.47% | TrainLoss=4.2307 | ValLoss=4.5571 | AvgGrad=0.2348
Epoch 4/200 | TrainAcc=14.17% | ValAcc=8.17% | TrainLoss=4.0610 | ValLoss=4.8306 | AvgGrad=0.2829
Epoch 5/200 | TrainAcc=16.36% | ValAcc=3.93% | TrainLoss=3.9276 | ValLoss=6.4030 | AvgGrad=0.3149
Epoch 6/200 | TrainAcc=18.25% | ValAcc=5.69% | TrainLoss=3.8220 | ValLoss=7.1926 | AvgGrad=0.3365
Epoch 7/200 | TrainAcc=19.63% | ValAcc=7.79% | TrainLoss=3.7294 | ValLoss=5.2930 | AvgGrad=0.3542
Epoch 8/200 | TrainAcc=21.06% | ValAcc=11.62% | TrainLoss=3.6474 | ValLoss=4.4682 | AvgGrad=0.3688
Epoch 9/200 | TrainAcc=22.38% | ValAcc=8.44% | TrainLoss=3.5699 | ValLoss=4.7256 | AvgGrad=0.3816
Epoch 10/200 | TrainAcc=23.51% | ValAcc=13.90% | TrainLoss=3.4999 | ValLoss=4.4098 | AvgGrad=0.3948
E