In [1]:
from google.colab import drive
import os
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
WORK_DIR = '/content/drive/MyDrive/MIMO_Project'

In [3]:
try:
    os.chdir(WORK_DIR)
    print(f"✅ 当前工作目录已切换至: {os.getcwd()}")
except OSError:
    print(f"❌ 路径不存在: {WORK_DIR}，请检查路径拼写！")
# Note: User's query refers to an IndentationError in cell S7ykF6D18U8K, not in this cell.

✅ 当前工作目录已切换至: /content/drive/MyDrive/MIMO_Project


In [4]:
# ==========================================
# 步骤 1: 导入依赖与配置
# ==========================================
import sys
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import scipy.io as sio
import numpy as np
import glob

In [5]:
# 训练超参数
BATCH_SIZE = 64
LR = 0.001
EPOCHS = 1000
HIDDEN_SIZE = 4096

In [6]:
# 选择设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"当前运行设备: {device}")

当前运行设备: cpu


In [7]:
# ==========================================
# 步骤 2: 定义模型
# ==========================================
class ChannelNet(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ChannelNet, self).__init__()
        # 第一层增加宽度，使用平滑降维
        self.layer1 = nn.Sequential(
            nn.Linear(input_dim, 8192),
            nn.BatchNorm1d(8192),
            nn.LeakyReLU(0.2)
        )
        self.layer2 = nn.Sequential(
            nn.Linear(8192, 4096),
            nn.BatchNorm1d(4096),
            nn.LeakyReLU(0.2)
        )
        # 输出层
        self.output_layer = nn.Linear(4096, output_dim)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        return self.output_layer(x)

# ==========================================
# 步骤 3: 数据加载 (内存优化版)
# ==========================================
def load_new_format_data():
    """
    适配 Colab:
    1. 使用 os.path.join 拼接路径
    2. 返回 CPU Tensor，不要立刻传到 GPU
    """
    X_list = []
    Y_list = []

    # 在当前工作目录(WORK_DIR)下查找
    file_pattern = "TrainData_Batch_*.mat"
    files = glob.glob(file_pattern)

    if not files:
        raise ValueError(f"❌ 在 {os.getcwd()} 下未找到任何匹配文件: {file_pattern}")

    print(f"\n>>> 发现 {len(files)} 个数据文件，开始加载...")

    count = 0
    for filename in files:
        try:
            mat_data = sio.loadmat(filename)
            if 'Batch_Buffer' not in mat_data:
                continue

            batch_buffer = mat_data['Batch_Buffer']
            num_samples = batch_buffer.shape[1]

            for i in range(num_samples):
                sample = batch_buffer[0, i]

                # 提取数据
                r_real = sample['R_Real']
                r_imag = sample['R_Imag']
                p_real = sample['P_Real']
                p_imag = sample['P_Imag']

                # Flatten
                x_vec = np.concatenate((r_real.flatten(), r_imag.flatten()))
                y_vec = np.concatenate((p_real.flatten(), p_imag.flatten()))

                X_list.append(x_vec)
                Y_list.append(y_vec)
                count += 1

            print(f"   [已加载] {os.path.basename(filename)}")

        except Exception as e:
            print(f"   [错误] 读取 {filename} 失败: {e}")

    if count == 0:
        raise ValueError("❌ 未成功加载任何样本数据！")

    X_all = np.array(X_list)
    Y_all = np.array(Y_list)

    print(f"\n✅ 数据加载完毕！")
    print(f"   总样本数: {X_all.shape[0]}")
    print(f"   输入维度: {X_all.shape[1]}")
    print(f"   输出维度: {Y_all.shape[1]}")

    # 【关键修改】保留在 CPU 上，不要在这里 .to(device)
    # 你的数据量很大，全部放入 GPU 可能会 OOM (Out of Memory)
    X_tensor = torch.FloatTensor(X_all)
    Y_tensor = torch.FloatTensor(Y_all)

    return X_tensor, Y_tensor

# ==========================================
# 步骤 4: 训练循环
# ==========================================
def train():
    # 1. 加载数据 (CPU)
    X_train, Y_train = load_new_format_data()

    # 2. 创建 DataLoader
    dataset = TensorDataset(X_train, Y_train)
    # pin_memory=True 可以加速数据从 CPU 到 GPU 的传输
    loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)

    input_dim = X_train.shape[1]
    output_dim = Y_train.shape[1]

    # 3. 初始化模型 (GPU)
    model = ChannelNet(input_dim, output_dim).to(device)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=LR)

    print(f"\n=== 开始训练 DNN 模型 ===")
    print(f"模型参数量: {sum(p.numel() for p in model.parameters()) / 1e6:.2f} Million")
    model.train()

    # 混合 Loss 定义
    class CombinedLoss(nn.Module):
        def __init__(self, alpha=0.1):
            super(CombinedLoss, self).__init__()
            self.mse = nn.MSELoss()
            self.cosine = nn.CosineSimilarity(dim=1)
            self.alpha = alpha

        def forward(self, pred, target):
            loss_mse = self.mse(pred, target)
            sim = self.cosine(pred, target)
            loss_cos = 1.0 - torch.mean(sim)
            return loss_mse + self.alpha * loss_cos

    # 使用混合 Loss (如果你只想用 MSE，可以用上面的 criterion)
    loss_fn = CombinedLoss(alpha=0.1)

    for epoch in range(EPOCHS):
        total_loss = 0
        for batch_x, batch_y in loader:
            # 【关键修改】在每个 Batch 训练时再移动到 GPU
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)

            optimizer.zero_grad()
            output = model(batch_x)

            # 计算 Loss
            loss = loss_fn(output, batch_y)

            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        if (epoch+1) % 10 == 0:
            avg_loss = total_loss / len(loader)
            print(f"Epoch [{epoch+1}/{EPOCHS}], Loss: {avg_loss:.6f}")

    print("训练完成！")

    # 保存模型 (直接保存到云盘 WORK_DIR)
    save_name = f'DFDCA_DNN_Model_TrainBatch1to5_4096hidden_500epoch.pth'
    torch.save(model.state_dict(), os.path.join(WORK_DIR, save_name))
    print(f"✅ 模型已保存至云盘: {os.path.join(WORK_DIR, save_name)}")

In [8]:
if __name__ == '__main__':
    train()


>>> 发现 5 个数据文件，开始加载...


KeyboardInterrupt: 