In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import json
import time
import random
import itertools
from datetime import datetime
from time import time as ttime

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
import seaborn as sns

from scipy.io import loadmat
import pywt

# ====================== 配置参数 ======================
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

# 数据参数（按需修改）
SNR_dB = 20
ADD_NOISE = True
ADD_DOPPLER = True
FS = 20e6
FC = 2.4e9
VELOCITY_KMH = 120

# Wavelet 特征
USE_LOG = True
WAVELET = 'db6'
WAVELET_LEVEL = 6

# 训练默认参数（可被参数搜索覆盖）
BATCH_SIZE = 64
EPOCHS = 200
LR = 1e-3
WEIGHT_DECAY = 1e-4
PATIENCE = 10

# 参数搜索默认
N_TRIALS_DEFAULT = 12

# 保存路径
SAVE_ROOT = "./search_results"
os.makedirs(SAVE_ROOT, exist_ok=True)

# 数据文件夹（请修改为你的路径）
DATA_FOLDER = r"..\los_data"  # Windows 风格相对路径示例

# ====================== 数据读取 ======================
def load_iq_mat_dataset(data_folder):
    file_list = sorted([f for f in os.listdir(data_folder) if f.endswith('.mat')])
    if len(file_list) == 0:
        raise FileNotFoundError(f"No .mat files found in {data_folder}")
    X_list, y_list = [], []
    for idx, file_name in enumerate(file_list):
        mat = loadmat(os.path.join(data_folder, file_name))
        if 'data_Ineed' not in mat:
            print(f"Warning: {file_name} 没有 'data_Ineed' 变量，跳过。")
            continue
        data_arr = mat['data_Ineed'].T  # 转置为 [num_samples, length]
        X_list.append(data_arr)
        y_list.append(np.full(data_arr.shape[0], idx, dtype=np.int64))
        print(f"Loaded file idx={idx}: {file_name}, shape (after transpose): {data_arr.shape}")
    X = np.vstack(X_list)
    y = np.concatenate(y_list)
    print(f"Total data shape: {X.shape}, labels shape: {y.shape}")
    return X, y

# ====================== 数据处理函数 ======================
def compute_doppler_shift(v_kmh, fc_hz):
    if not v_kmh:
        return 0.0
    c = 3e8
    v_mps = v_kmh / 3.6
    return fc_hz * v_mps / c

def add_complex_awgn(signal, snr_db):
    if snr_db is None:
        return signal
    power = np.mean(np.abs(signal)**2)
    noise_power = power / (10**(snr_db/10))
    noise_std = np.sqrt(noise_power/2)
    noise = noise_std * (np.random.randn(*signal.shape) + 1j*np.random.randn(*signal.shape))
    return signal + noise

def apply_doppler_shift(signal, fd_hz, fs_hz):
    if fd_hz is None or fd_hz == 0:
        return signal
    t = np.arange(len(signal)) / fs_hz
    return signal * np.exp(1j * 2 * np.pi * fd_hz * t)

def process_signal_led_rff(sig_complex, use_log=False, wavelet='db6', level=6):
    # 1. FFT 幅度谱（取对称谱前半段）
    freq_sig = np.fft.fft(sig_complex)
    amp = np.abs(freq_sig)
    amp = amp[:len(amp)//2]

    # 2. 可选 log
    if use_log:
        amp = np.log(amp + 1e-8)

    # 3. 小波分解 + 低频置零 + 重构
    coeffs = pywt.wavedec(amp, wavelet, level=level)
    coeffs[0] = np.zeros_like(coeffs[0])
    rec = pywt.waverec(coeffs, wavelet)
    rec = rec[:len(amp)]

    # 4. 归一化
    mu, sigma = rec.mean(), rec.std()
    if sigma < 1e-8:
        feat = (rec - mu).astype(np.float32)
    else:
        feat = ((rec - mu) / (sigma + 1e-8)).astype(np.float32)
    return feat

def preprocess_iq_dataset_led_rff(data_real, snr_db=SNR_dB, velocity_kmh=VELOCITY_KMH,
                                  fc_hz=FC, fs_hz=FS, use_log=USE_LOG,
                                  wavelet=WAVELET, level=WAVELET_LEVEL,
                                  add_noise=ADD_NOISE, add_doppler=ADD_DOPPLER):
    """
    输入 data_real: numpy array [N, length] (real-valued samples but treated as complex)
    返回: torch.Tensor [N, 1, feat_len]
    """
    num_samples, sig_len = data_real.shape
    processed_feats = []
    data_complex = data_real.astype(np.complex64)
    fd_hz = compute_doppler_shift(velocity_kmh, fc_hz) if add_doppler else None

    for i in range(num_samples):
        sig = data_complex[i]
        if add_noise:
            sig = add_complex_awgn(sig, snr_db)
        if add_doppler:
            sig = apply_doppler_shift(sig, fd_hz, fs_hz)
        feat = process_signal_led_rff(sig, use_log=use_log, wavelet=wavelet, level=level)
        processed_feats.append(feat)
    processed_feats = np.stack(processed_feats, axis=0)
    return torch.tensor(processed_feats, dtype=torch.float32)[:, None, :]  # [N, 1, length]

# ====================== InceptionTime 模型 ======================
class InceptionBlock(nn.Module):
    def __init__(self, in_channels, out_channels, dropout=0.0):
        super().__init__()
        bottleneck_channels = max(1, out_channels // 4)
        self.bottleneck = nn.Conv1d(in_channels, bottleneck_channels, kernel_size=1, bias=False)

        self.conv1 = nn.Conv1d(bottleneck_channels, out_channels, kernel_size=10, padding=5)
        self.conv2 = nn.Conv1d(bottleneck_channels, out_channels, kernel_size=20, padding=10)
        self.conv3 = nn.Conv1d(bottleneck_channels, out_channels, kernel_size=40, padding=20)
        self.maxpool = nn.MaxPool1d(3, stride=1, padding=1)
        self.convpool = nn.Conv1d(bottleneck_channels, out_channels, kernel_size=1, bias=False)

        self.bn = nn.BatchNorm1d(4*out_channels)
        self.relu = nn.ReLU()

        # ⭐ 加入 dropout（0 不启用）
        self.dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity()

    def forward(self, x):
        x_b = self.bottleneck(x)
        c1 = self.conv1(x_b)
        c2 = self.conv2(x_b)
        c3 = self.conv3(x_b)
        c4 = self.convpool(self.maxpool(x_b))

        min_len = min(c1.shape[-1], c2.shape[-1], c3.shape[-1], c4.shape[-1])
        c1 = c1[..., :min_len]
        c2 = c2[..., :min_len]
        c3 = c3[..., :min_len]
        c4 = c4[..., :min_len]

        out = torch.cat([c1, c2, c3, c4], dim=1)
        out = self.relu(self.bn(out))
        return self.dropout(out)


class InceptionTime(nn.Module):
    def __init__(self, num_classes, in_channels=1, channels=32, dropout=0.0):
        super().__init__()
        self.b1 = InceptionBlock(in_channels, channels, dropout=dropout)
        self.b2 = InceptionBlock(4*channels, channels, dropout=dropout)
        self.b3 = InceptionBlock(4*channels, channels, dropout=dropout)
        self.gap = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(4*channels, num_classes)

    def forward(self, x):
        if x.shape[-1] % 2 == 1:
            x = x[..., :-1]
        x = self.b1(x); x = self.b2(x); x = self.b3(x)
        x = self.gap(x).squeeze(-1)
        return self.fc(x)

# ====================== 工具函数 ======================
def evaluate_model(model, dataloader, device, num_classes):
    model.eval()
    correct, total = 0, 0
    all_labels, all_preds = [], []
    with torch.no_grad():
        for xb, yb in dataloader:
            xb, yb = xb.to(device), yb.to(device)
            out = model(xb)
            _, p = torch.max(out, 1)
            correct += (p == yb).sum().item()
            total += yb.size(0)
            all_labels.extend(yb.cpu().numpy())
            all_preds.extend(p.cpu().numpy())
    acc = 100.0 * correct / total if total > 0 else 0.0
    cm = confusion_matrix(all_labels, all_preds, labels=list(range(num_classes)))
    return acc, cm

def plot_confusion_matrix(cm, classes, fold, save_folder, dataset_type='Test'):
    plt.figure(figsize=(6,5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'{dataset_type} Confusion Matrix Trial{fold}')
    plt.ylabel('True')
    plt.xlabel('Predicted')
    plt.savefig(os.path.join(save_folder, f'{dataset_type.lower()}_cm_trial{fold}.png'))
    plt.close()

def plot_curves(train_losses, val_losses, train_acc, val_acc, fold, save_folder):
    plt.figure(); plt.plot(train_losses,label='Train Loss'); plt.plot(val_losses,label='Val Loss')
    plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.title(f'Trial {fold} Loss'); plt.legend(); plt.grid(True)
    plt.savefig(os.path.join(save_folder,f'loss_trial{fold}.png')); plt.close()
    plt.figure(); plt.plot(train_acc,label='Train Acc'); plt.plot(val_acc,label='Val Acc')
    plt.xlabel('Epoch'); plt.ylabel('Accuracy (%)'); plt.title(f'Trial {fold} Accuracy'); plt.legend(); plt.grid(True)
    plt.savefig(os.path.join(save_folder,f'acc_trial{fold}.png')); plt.close()

# ====================== 训练单次试验 ======================
def train_one_run(model, tr_loader, va_loader, criterion, optimizer, scheduler,
                  device, epochs, patience, results_file):
    best_val = 0.0
    best_wts = None
    patience_cnt = 0
    train_losses, val_losses, train_acc_list, val_acc_list = [], [], [], []
    for epoch in range(epochs):
        model.train()
        running_loss, correct, total = 0.0, 0, 0
        for xb, yb in tr_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            out = model(xb)
            loss = criterion(out, yb)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            _, p = torch.max(out, 1)
            correct += (p == yb).sum().item()
            total += yb.size(0)
        train_loss = running_loss / len(tr_loader)
        train_acc = 100.0 * correct / total if total > 0 else 0.0
        train_losses.append(train_loss); train_acc_list.append(train_acc)

        # validation
        model.eval()
        vloss, vcorrect, vtotal = 0.0, 0, 0
        with torch.no_grad():
            for xb, yb in va_loader:
                xb, yb = xb.to(device), yb.to(device)
                out = model(xb)
                loss = criterion(out, yb)
                vloss += loss.item()
                _, p = torch.max(out, 1)
                vcorrect += (p == yb).sum().item()
                vtotal += yb.size(0)
        val_loss = vloss / len(va_loader) if len(va_loader) > 0 else 0.0
        val_acc = 100.0 * vcorrect / vtotal if vtotal > 0 else 0.0
        val_losses.append(val_loss); val_acc_list.append(val_acc)

        # logging
        line = f"Epoch {epoch+1}/{epochs} | TrainAcc={train_acc:.2f}% | ValAcc={val_acc:.2f}% | TrainLoss={train_loss:.4f} | ValLoss={val_loss:.4f}"
        print(line)
        with open(results_file, 'a') as f:
            f.write(line + '\n')

        # check best
        if val_acc > best_val + 0.1:
            best_val = val_acc
            best_wts = {k: v.cpu().clone() for k, v in model.state_dict().items()}
            patience_cnt = 0
        else:
            patience_cnt += 1
            if patience_cnt >= patience:
                print("Early stopping.")
                break
        if scheduler is not None:
            try:
                scheduler.step()
            except Exception:
                pass

    # restore best weights if available
    if best_wts is not None:
        model.load_state_dict(best_wts)
    return model, best_val, (train_losses, val_losses, train_acc_list, val_acc_list)

# ====================== 参数搜索主函数 ======================
def param_search(X_train_val, y_train_val, X_test, y_test, num_classes,
                 param_grid=None, mode='grid', n_trials=N_TRIALS_DEFAULT, val_size=0.2,
                 device=DEVICE, save_root=SAVE_ROOT, random_seed=42):
    """
    param_grid: dict of lists, e.g.
      {
        'lr': [1e-3, 3e-4],
        'weight_decay': [1e-4, 1e-5],
        'batch_size': [32,64],
        'channels': [16,32],
        'epochs': [100],
      }
    mode: 'grid' or 'random'
    n_trials: limit number of trials (for random or to cap grid)
    """
    os.makedirs(save_root, exist_ok=True)
    timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    save_dir = f"{timestamp}_param_search_SNR{SNR_dB}dB_fd{int(compute_doppler_shift(VELOCITY_KMH, FC))}_classes_{num_classes}"
    base_folder = os.path.join(save_root, save_dir)
    os.makedirs(base_folder, exist_ok=True)
    results_file_all = os.path.join(base_folder, "param_search_results.txt")

    if param_grid is None:
        param_grid = {
            'lr': [1e-3, 3e-4, 1e-4],
            'weight_decay': [1e-4, 1e-5],
            'batch_size': [32, 64],
            'channels': [16, 32],
            'epochs': [80],
            'patience': [10],
        }

    keys = list(param_grid.keys())
    combos = []
    if mode == 'grid':
        for vals in itertools.product(*(param_grid[k] for k in keys)):
            combos.append(dict(zip(keys, vals)))
        # if too many grid combinations, limit
        if len(combos) > n_trials:
            random.shuffle(combos)
            combos = combos[:n_trials]
    else:  # random mode
        random.seed(random_seed)
        np.random.seed(random_seed)
        for _ in range(n_trials):
            choice = {k: random.choice(param_grid[k]) for k in keys}
            combos.append(choice)

    print(f"Param search: mode={mode}, total_trials={len(combos)}. Results base: {base_folder}")
    with open(results_file_all, 'w') as f:
        f.write(f"Param search started at {timestamp}\nMode: {mode}\nTotal trials: {len(combos)}\n\n")

    # fixed test loader
    test_dataset = TensorDataset(X_test, y_test)
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

    trial_summaries = []
    best_overall = {'acc': -1.0, 'config': None, 'model_path': None}

    for tidx, cfg in enumerate(combos):
        t_start = ttime()
        print(f"\n=== Trial {tidx+1}/{len(combos)} | cfg: {cfg} ===")
        trial_folder = os.path.join(base_folder, f"trial_{tidx+1}")
        os.makedirs(trial_folder, exist_ok=True)
        results_file = os.path.join(trial_folder, "log.txt")
        with open(results_file, 'w') as f:
            f.write("Config:\n" + json.dumps(cfg, indent=2) + "\n\n")

        # split train/val from X_train_val
        X_tr, X_va, y_tr, y_va = train_test_split(
            X_train_val, y_train_val, test_size=val_size, stratify=y_train_val, random_state=42 + tidx
        )

        batch_size = int(cfg.get('batch_size', BATCH_SIZE))
        tr_loader = DataLoader(TensorDataset(X_tr, y_tr), batch_size=batch_size, shuffle=True)
        va_loader = DataLoader(TensorDataset(X_va, y_va), batch_size=batch_size, shuffle=False)

        channels = int(cfg.get('channels', 16))
        dropout = float(cfg.get('dropout', 0.0))
        model = InceptionTime(num_classes=num_classes, in_channels=1, channels=channels, dropout=dropout).to(device)
        criterion = nn.CrossEntropyLoss()
        lr = float(cfg.get('lr', LR))
        wd = float(cfg.get('weight_decay', WEIGHT_DECAY))
        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5)

        epochs = int(cfg.get('epochs', EPOCHS))
        patience = int(cfg.get('patience', PATIENCE))

        model, best_val_acc, hist = train_one_run(model, tr_loader, va_loader, criterion,
                                                  optimizer, scheduler, device, epochs,
                                                  patience, results_file)
        test_acc, test_cm = evaluate_model(model, test_loader, device, num_classes)
        print(f"Trial {tidx+1} done. BestValAcc={best_val_acc:.2f} | TestAcc={test_acc:.2f}")
        with open(results_file, 'a') as f:
            f.write(f"\nTrial Summary: BestValAcc={best_val_acc:.2f} | TestAcc={test_acc:.2f}\n")

        model_path = os.path.join(trial_folder, "best_model.pth")
        torch.save(model.state_dict(), model_path)
        plot_confusion_matrix(test_cm, classes=list(range(num_classes)), fold=tidx+1, save_folder=trial_folder, dataset_type='Test')
        plot_curves(*hist, fold=tidx+1, save_folder=trial_folder)

        elapsed = ttime() - t_start
        rec = {**cfg}
        rec.update({'trial': tidx+1, 'best_val_acc': float(best_val_acc), 'test_acc': float(test_acc),
                    'model_path': model_path, 'trial_folder': trial_folder, 'elapsed_sec': elapsed})
        trial_summaries.append(rec)

        if test_acc > best_overall['acc']:
            best_overall['acc'] = float(test_acc)
            best_overall['config'] = cfg
            best_overall['model_path'] = model_path

        with open(results_file_all, 'a') as f:
            f.write(json.dumps(rec, ensure_ascii=False) + '\n')

    # save summary dataframe
    try:
        df = pd.DataFrame(trial_summaries)
        df.to_csv(os.path.join(base_folder, "trial_summary.csv"), index=False)
    except Exception as e:
        print("Warning: cannot save summary dataframe:", e)
        df = None

    with open(os.path.join(base_folder, "best_config.json"), 'w') as f:
        json.dump({'best_test_acc': best_overall['acc'], 'best_config': best_overall['config'], 'model_path': best_overall['model_path']}, f, indent=2)

    print("\n=== Param search finished ===")
    print(f"Best test acc: {best_overall['acc']:.2f} | config: {best_overall['config']}")
    print(f"Results saved in: {base_folder}")
    return best_overall, df, base_folder

# ====================== 主流程 ======================
if __name__ == "__main__":
    # 1) 加载原始 IQ 数据 (numpy)
    print("Loading data...")
    X_raw, y_raw = load_iq_mat_dataset(DATA_FOLDER)
    num_classes = len(np.unique(y_raw))
    print(f"Num classes detected: {num_classes}")

    # 2) 预处理 -> 特征 (只做一次以加快参数搜索)
    print("Preprocessing (this may take some time)...")
    X_proc = preprocess_iq_dataset_led_rff(X_raw,
                                           snr_db=SNR_dB,
                                           velocity_kmh=VELOCITY_KMH,
                                           fc_hz=FC,
                                           fs_hz=FS,
                                           use_log=USE_LOG,
                                           wavelet=WAVELET,
                                           level=WAVELET_LEVEL,
                                           add_noise=ADD_NOISE,
                                           add_doppler=ADD_DOPPLER)
    y_torch = torch.tensor(y_raw, dtype=torch.long)
    print(f"Preprocessed features shape: {X_proc.shape}")

    # 3) 划分 train_val / test（参数搜索会在 train_val 上再做 train/val 划分）
    X_train_val, X_test, y_train_val, y_test = train_test_split(
        X_proc, y_torch, test_size=0.2, stratify=y_torch, random_state=42
    )
    print(f"Train_val size: {len(X_train_val)}, Test size: {len(X_test)}")

    # 4) 设置参数搜索网格（示例，可按需修改）
    param_grid = {
    'lr': [1e-3, 3e-4],
    'weight_decay': [1e-4, 1e-5],
    'batch_size': [32, 64],
    'channels': [16, 32],
    'dropout': [0.0, 0.1, 0.3, 0.5], 
    'epochs': [200],
    'patience': [10],
}


    # 5) 运行参数搜索（mode='grid' 或 'random'）
    #    如果使用 grid 并且组合过多，会被截断到 n_trials。
    best, df_summary, save_folder = param_search(
        X_train_val, y_train_val, X_test, y_test, num_classes,
        param_grid=param_grid, mode='grid', n_trials=8, val_size=0.2,
        device=DEVICE, save_root=SAVE_ROOT, random_seed=42
    )

    print("Done. Best result:")
    print(best)
    if df_summary is not None:
        print(f"Trial summary saved to {os.path.join(save_folder, 'trial_summary.csv')}")


Loading data...
Loaded file idx=0: dev0.mat, shape (after transpose): (7014, 320)
Loaded file idx=1: dev1.mat, shape (after transpose): (6632, 320)
Loaded file idx=2: dev10.mat, shape (after transpose): (8935, 320)
Loaded file idx=3: dev11.mat, shape (after transpose): (7776, 320)
Loaded file idx=4: dev13.mat, shape (after transpose): (7410, 320)
Loaded file idx=5: dev14.mat, shape (after transpose): (6274, 320)
Loaded file idx=6: dev15.mat, shape (after transpose): (5706, 320)
Loaded file idx=7: dev16.mat, shape (after transpose): (12968, 320)
Loaded file idx=8: dev17.mat, shape (after transpose): (9508, 320)
Loaded file idx=9: dev18.mat, shape (after transpose): (3343, 320)
Loaded file idx=10: dev19.mat, shape (after transpose): (4756, 320)
Loaded file idx=11: dev2.mat, shape (after transpose): (6316, 320)
Loaded file idx=12: dev20.mat, shape (after transpose): (5592, 320)
Loaded file idx=13: dev3.mat, shape (after transpose): (5536, 320)
Loaded file idx=14: dev4.mat, shape (after tr



Preprocessed features shape: torch.Size([138904, 1, 160])
Train_val size: 111123, Test size: 27781
Param search: mode=grid, total_trials=8. Results base: ./search_results\2025-11-24_15-16-22_param_search_SNR20dB_fd266_classes_20

=== Trial 1/8 | cfg: {'lr': 0.001, 'weight_decay': 1e-05, 'batch_size': 32, 'channels': 16, 'dropout': 0.0, 'epochs': 200, 'patience': 10} ===
Epoch 1/200 | TrainAcc=65.69% | ValAcc=22.52% | TrainLoss=0.9816 | ValLoss=4.4320
Epoch 2/200 | TrainAcc=82.68% | ValAcc=70.44% | TrainLoss=0.4878 | ValLoss=1.0474
Epoch 3/200 | TrainAcc=85.21% | ValAcc=31.28% | TrainLoss=0.4079 | ValLoss=3.6358
Epoch 4/200 | TrainAcc=86.48% | ValAcc=87.68% | TrainLoss=0.3724 | ValLoss=0.3388
Epoch 5/200 | TrainAcc=87.65% | ValAcc=80.66% | TrainLoss=0.3397 | ValLoss=0.5197
Epoch 6/200 | TrainAcc=88.61% | ValAcc=75.29% | TrainLoss=0.3189 | ValLoss=0.6357
Epoch 7/200 | TrainAcc=89.08% | ValAcc=80.09% | TrainLoss=0.3003 | ValLoss=0.5618
Epoch 8/200 | TrainAcc=89.88% | ValAcc=19.47% | Train