In [None]:
import os
import glob
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, Subset
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from tqdm import tqdm
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import h5py
import random

# ================= 数据路径和信号参数 =================
data_path = "E:/rf_datasets/"
SNR_dB = 10
fs = 20e6
fc = 2.4e9
v = 120
apply_doppler = False
apply_awgn = False

# ================= 训练参数 =================
num_epochs = 50
patience = 5
batch_size = 256
weight_decay = 5e-4

# ================= 数据处理函数 =================
def compute_doppler_shift(v, fc):
    c = 3e8
    return (v / c) * fc

def apply_doppler_shift(signal, fd, fs):
    t = np.arange(signal.shape[-1]) / fs
    doppler_phase = np.exp(1j * 2 * np.pi * fd * t)
    return signal * doppler_phase

def add_awgn(signal, snr_db):
    signal_power = np.mean(np.abs(signal)**2)
    noise_power = signal_power / (10**(snr_db/10))
    noise = np.sqrt(noise_power/2) * (np.random.randn(*signal.shape) + 1j*np.random.randn(*signal.shape))
    return signal + noise

def load_and_preprocess(mat_folder, apply_doppler=False, target_velocity=30, apply_awgn=False, snr_db=20, fs=20e6, fc=2.4e9):
    mat_files = glob.glob(os.path.join(mat_folder, '*.mat'))
    print(f"共找到 {len(mat_files)} 个 .mat 文件")

    X_list, y_list = [], []
    fd = compute_doppler_shift(target_velocity, fc)

    for file in tqdm(mat_files, desc='读取与处理数据'):
        with h5py.File(file, 'r') as f:
            rfDataset = f['rfDataset']
            dmrs_struct = rfDataset['dmrs'][:]
            dmrs_complex = dmrs_struct['real'] + 1j * dmrs_struct['imag']
            txID_uint16 = rfDataset['txID'][:].flatten()
            tx_id = ''.join(chr(c) for c in txID_uint16 if c != 0)

        processed_signals = []
        for sig in dmrs_complex:
            if apply_doppler:
                sig = apply_doppler_shift(sig, fd, fs)
            if apply_awgn:
                sig = add_awgn(sig, snr_db)
            iq = np.stack((sig.real, sig.imag), axis=-1)
            processed_signals.append(iq)

        processed_signals = np.array(processed_signals)
        X_list.append(processed_signals)
        y_list.append(tx_id)

    unique_labels = sorted(list(set(y_list)))
    label_to_idx = {lab: i for i, lab in enumerate(unique_labels)}
    y_idx = np.array([label_to_idx[lab] for lab in y_list])

    X_all = np.concatenate(X_list, axis=0)
    y_all = np.repeat(y_idx, dmrs_complex.shape[0])

    print(f"数据维度: X={X_all.shape}, y={y_all.shape}")
    print(f"类别映射: {label_to_idx}")
    return X_all, y_all, label_to_idx

# ================= 模型 =================
class SignalTransformer(nn.Module):
    def __init__(self, raw_input_dim, model_dim, num_heads, num_layers, num_classes, dropout=0.4):
        super(SignalTransformer, self).__init__()
        self.embedding = nn.Linear(raw_input_dim, model_dim)
        encoder_layer = TransformerEncoderLayer(d_model=model_dim, nhead=num_heads, dropout=dropout, batch_first=True)
        self.encoder = TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(model_dim, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        x = self.encoder(x)
        x = x[:, -1, :]
        x = self.fc(x)
        return x

# ================= 工具函数 =================
def evaluate_model(model, dataloader, device, num_classes):
    model.eval()
    correct, total = 0, 0
    all_labels, all_preds = [], []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    acc = 100 * correct / total
    cm = confusion_matrix(all_labels, all_preds, labels=range(num_classes))
    return acc, cm

def plot_confusion_matrix(cm, save_path=None):
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches="tight")
    plt.close()

def random_search_train(X_all, y_all, label_to_idx, param_grid, n_iter=10, save_dir=None):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    X_tensor = torch.tensor(X_all, dtype=torch.float32)
    y_tensor = torch.tensor(y_all, dtype=torch.long)
    full_dataset = TensorDataset(X_tensor, y_tensor)

    indices = np.arange(len(full_dataset))
    train_idx, temp_idx, y_train, y_temp = train_test_split(
        indices, y_all, test_size=0.3, stratify=y_all, random_state=42
    )
    val_idx, test_idx = train_test_split(
        temp_idx, test_size=0.5, stratify=y_temp, random_state=42
    )

    best_val_acc = 0.0
    best_params = None
    best_model_state = None
    best_test_acc = 0
    best_cm = None

    results_file = os.path.join(save_dir, "results.txt")
    with open(results_file, "w", encoding="utf-8") as f:
        f.write(f"数据集路径: {data_path}\n")
        f.write("===== 随机搜索结果 =====\n")

    for i in range(n_iter):
        params = {k: random.choice(v) for k, v in param_grid.items()}
        print(f"\n===== 随机搜索 {i+1}/{n_iter} 参数: {params} =====")

        train_loader = DataLoader(Subset(full_dataset, train_idx), batch_size=params["batch_size"], shuffle=True)
        val_loader = DataLoader(Subset(full_dataset, val_idx), batch_size=params["batch_size"], shuffle=False)
        test_loader = DataLoader(Subset(full_dataset, test_idx), batch_size=params["batch_size"], shuffle=False)

        model = SignalTransformer(
            raw_input_dim=2,
            model_dim=params["model_dim"],
            num_heads=params["num_heads"],
            num_layers=params["num_layers"],
            num_classes=len(label_to_idx),
            dropout=params["dropout"]
        ).to(device)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"], weight_decay=params["weight_decay"])

        best_epoch_val_acc = 0
        patience_counter = 0
        model_state = None

        for epoch in range(params["num_epochs"]):
            model.train()
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

            val_acc, _ = evaluate_model(model, val_loader, device, len(label_to_idx))
            if val_acc > best_epoch_val_acc:
                best_epoch_val_acc = val_acc
                patience_counter = 0
                model_state = model.state_dict()
            else:
                patience_counter += 1
                if patience_counter >= params["patience"]:
                    print(f"早停触发: 连续 {params['patience']} 轮无提升")
                    break

        model.load_state_dict(model_state)

        # 计算训练、验证、测试集准确率
        train_acc, _ = evaluate_model(model, train_loader, device, len(label_to_idx))
        val_acc, _ = evaluate_model(model, val_loader, device, len(label_to_idx))
        test_acc, cm = evaluate_model(model, test_loader, device, len(label_to_idx))

        print(f"训练集: {train_acc:.2f}% | 验证集: {val_acc:.2f}% | 测试集: {test_acc:.2f}%")

        # 保存每轮结果到文件
        with open(results_file, "a", encoding="utf-8") as f:
            f.write(f"[组合 {i+1}] {params} | Train: {train_acc:.2f}% | Val: {val_acc:.2f}% | Test: {test_acc:.2f}%\n")

        # 如果是最佳模型，保存状态和混淆矩阵
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_params = params
            best_model_state = model_state
            best_test_acc = test_acc
            best_cm = cm

            best_model_path = os.path.join(save_dir, "best_model.pth")
            torch.save(best_model_state, best_model_path)
            print(f"[INFO] 新最佳模型已保存到: {best_model_path}")

    # 保存最佳结果总结
    with open(results_file, "a", encoding="utf-8") as f:
        f.write("\n===== 最佳结果 =====\n")
        f.write(f"最佳参数: {best_params}\n")
        f.write(f"验证集准确率: {best_val_acc:.2f}%\n")
        f.write(f"测试集准确率: {best_test_acc:.2f}%\n")
        f.write(f"最佳模型路径: {os.path.join(save_dir, 'best_model.pth')}\n")

    # 保存混淆矩阵
    cm_path = os.path.join(save_dir, "best_confusion_matrix.png")
    plot_confusion_matrix(best_cm, cm_path)

    print(f"[INFO] 所有结果已保存到: {results_file}")
    print(f"[INFO] 混淆矩阵已保存到: {cm_path}")
    print(f"[INFO] 最佳模型已保存到: {os.path.join(save_dir, 'best_model.pth')}")

# ================= 主函数 =================
if __name__ == "__main__":
    X_all, y_all, label_to_idx = load_and_preprocess(
        data_path,
        apply_doppler=apply_doppler,
        target_velocity=v,
        apply_awgn=apply_awgn,
        snr_db=SNR_dB,
        fs=fs,
        fc=fc
    )

    param_grid = {
        'model_dim': [32, 64, 128, 256],
        'num_heads': [2, 4, 8],
        'num_layers': [1, 2],
        'dropout': [0.1, 0.3, 0.5],
        'batch_size': [64, 128, 256],
        'learning_rate': [1e-4, 5e-4, 1e-3],
        'weight_decay': [1e-4, 5e-4],
        'num_epochs': [200],
        'patience': [5]
    }

    fd = compute_doppler_shift(v, fc)
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    script_name = "LTE-V_time_random"
    folder_name = f"{timestamp}_{script_name}_SNR{SNR_dB}_fd{fd:.2f}"
    save_dir = os.path.join("search_results", folder_name)
    os.makedirs(save_dir, exist_ok=True)

    random_search_train(X_all, y_all, label_to_idx, param_grid, n_iter=50, save_dir=save_dir)


共找到 72 个 .mat 文件


读取与处理数据: 100%|██████████| 72/72 [00:03<00:00, 23.19it/s]


数据维度: X=(215928, 288, 2), y=(215928,)
类别映射: {'001': 0, '002': 1, '003': 2, '004': 3, '005': 4, '006': 5, '007': 6, '008': 7, '009': 8}

===== 随机搜索 1/100 参数: {'model_dim': 256, 'num_heads': 2, 'num_layers': 1, 'dropout': 0.1, 'batch_size': 256, 'learning_rate': 0.0001, 'weight_decay': 0.0005, 'num_epochs': 200, 'patience': 5} =====
早停触发: 连续 5 轮无提升
训练集: 11.12% | 验证集: 11.07% | 测试集: 11.13%
[INFO] 新最佳模型已保存到: search_results\2025-08-16_10-07-15_LTE-V_time_random_SNR10_fd960.00\best_model.pth

===== 随机搜索 2/100 参数: {'model_dim': 64, 'num_heads': 2, 'num_layers': 1, 'dropout': 0.3, 'batch_size': 256, 'learning_rate': 0.0001, 'weight_decay': 0.0005, 'num_epochs': 200, 'patience': 5} =====
早停触发: 连续 5 轮无提升
训练集: 11.11% | 验证集: 11.11% | 测试集: 11.11%
[INFO] 新最佳模型已保存到: search_results\2025-08-16_10-07-15_LTE-V_time_random_SNR10_fd960.00\best_model.pth

===== 随机搜索 3/100 参数: {'model_dim': 64, 'num_heads': 2, 'num_layers': 2, 'dropout': 0.5, 'batch_size': 128, 'learning_rate': 0.0001, 'weight_decay': 0.0005,