实验五：卷积神经网络实验<br>
实验内容：采用任意一种课程中介绍过的或者其它卷积神经网络模型（如Lenet-5、AlexNet等）用于解决某种媒体类型的模式识别问题。<br>
要求：<br>
卷积神经网络可以基于现有框架如TensorFlow、Pytorch或者Mindspore等构建，也可以自行设计实现。<br>
数据集可以使用手写体数字图像标准数据集，也可以自行构建。预测问题可以包括分类或者回归等。
实验工作还需要对激活函数的选择、dropout等技巧的使用做实验分析。必要时上网查找有关参考文献。<br>
用不同数据量，不同超参数，比较实验效果，并给出截图和分析

# 环境配置

In [None]:
!nvidia-smi  # 查看GPU是否可用（可选）

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
import numpy as np
from typing import Dict
import time

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# 模型定义

## LeNet5

In [None]:
class LeNet5(nn.Module):
    def __init__(self, num_classes=10, in_channels=1,
           activation='relu', dropout=0.0, use_bn=False):

        super(LeNet5, self).__init__()

        self.num_classes = num_classes
        self.dropout_p = dropout
        self.use_bn = use_bn
        # 激活函数映射
        self.activation = self._get_activation(activation)

        self.conv1 = nn.Conv2d(in_channels, 6, kernel_size=5, stride=1, padding=2)
        self.pool1 = nn.AvgPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.pool2 = nn.AvgPool2d(2, 2)

        self.fc1 = nn.Linear(16 * 6 * 6, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)

    def _get_activation(self, name):
        act_map = {
            "relu": nn.ReLU(),
            "leakyrelu": nn.LeakyReLU(0.1),
            "elu": nn.ELU(),
            "selu": nn.SELU(),
            "gelu": nn.GELU()
        }
        return act_map.get(name.lower(), nn.ReLU())

    def forward(self, x):
        x = self.activation(self.conv1(x))
        x = self.pool1(x)
        x = self.activation(self.conv2(x))
        x = self.pool2(x)

        x = x.view(x.size(0), -1)
        x = F.dropout(x, p=self.dropout_p, training=self.training)
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.fc3(x)
        return x


## VGG11

In [None]:
class VGG11(nn.Module):
    def __init__(self, num_classes=10, in_channels=3,
           activation='relu', dropout=0.0, use_bn=False):

        super(VGG11, self).__init__()
        self.conv_layer1 = self._make_conv_1(in_channels,64)
        self.conv_layer2 = self._make_conv_1(64,128)
        self.conv_layer3 = self._make_conv_2(128,256)
        self.conv_layer4 = self._make_conv_2(256,512)
        self.conv_layer5 = self._make_conv_2(512,512)
        self.classifier = nn.Sequential(
            nn.Linear(512, 64),    # 此处修改输入输出维度
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(64, 64),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(64, num_classes)
        )
    def _make_conv_1(self,in_channels,out_channels):
        layer = nn.Sequential(
                nn.Conv2d(in_channels,out_channels, kernel_size=3, padding=1),
                # batchnorm
                nn.BatchNorm2d(out_channels, affine=True),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2)
            )
        return layer
    def _make_conv_2(self,in_channels,out_channels):
        layer = nn.Sequential(
                nn.Conv2d(in_channels,out_channels, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_channels, affine=True),
                nn.ReLU(inplace=True),

                nn.Conv2d(out_channels,out_channels, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_channels, affine=True),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2)
              )
        return layer

    def forward(self, x):
        # 32*32 channel == 3
        x = self.conv_layer1(x)
        # 16*16 channel == 64
        x = self.conv_layer2(x)
        # 8*8 channel == 128
        x = self.conv_layer3(x)
        # 4*4 channel == 256
        x = self.conv_layer4(x)
        # 2*2 channel == 512
        x = self.conv_layer5(x)
        # 1*1 channel == 512
        x = x.view(x.size(0), -1)
        # 512
        x = self.classifier(x)
        # 10
        return x

## mobilenetv2

In [None]:
def get_mobilenetv2(num_classes=10, pretrained=False):
    model = models.mobilenet_v2(pretrained=pretrained)
    model.classifier[1] = nn.Linear(model.last_channel, num_classes)
    return model


class MobileNetV2(nn.Module):
    def __init__(self, num_classes=10, ispretrained=False):
        super().__init__()
        self.base = models.mobilenet_v2(pretrained=ispretrained)
        self.base.features[0][0] = nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1, bias=False)
        self.base.classifier[1] = nn.Linear(self.base.last_channel, num_classes)
    def forward(self, x):
        return self.base(x)


# 数据加载和增强

In [None]:
def getData(dataset_name = "MNIST", batch_size = 128, aug_level="none"):
  # 设定不同增强强度
  if aug_level == "none":
      transform_train = transforms.Compose([
          transforms.Resize((32, 32)),
          transforms.ToTensor(),
      ])
  elif aug_level == "basic":
      transform_train = transforms.Compose([
          transforms.Resize((32, 32)),
          transforms.RandomHorizontalFlip(),
          transforms.RandomCrop(32, padding=4),
          transforms.ColorJitter(brightness=0.2, contrast=0.2),
          transforms.ToTensor(),
      ])
  elif aug_level == "strong":
      transform_train = transforms.Compose([
          transforms.Resize((32, 32)),
          transforms.RandomHorizontalFlip(),
          transforms.RandomRotation(30),
          transforms.RandomCrop(32, padding=4),
          transforms.ToTensor(),
          transforms.RandomErasing(p=0.3),
      ])

  transform_test = transforms.Compose([
      transforms.Resize((32, 32)),
      transforms.ToTensor(),
  ])
  dataset_name = dataset_name.upper()
  # 选择数据集
  if dataset_name == "MNIST":
      transform_train = transforms.Compose([
          transform_train,
          transforms.Normalize((0.5,), (0.5,)),
      ])
      transform_test = transforms.Compose([
          transform_test,
          transforms.Normalize((0.5,), (0.5,)),
      ])
      train_dataset = datasets.MNIST(root="./data", train=True, download=True, transform=transform_train)
      test_dataset  = datasets.MNIST(root="./data", train=False, download=True, transform=transform_test)
      in_channels = 1

  elif dataset_name == "CIFAR10":
      transform_train = transforms.Compose([
          transform_train,
          transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
      ])
      transform_test = transforms.Compose([
          transform_test,
          transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
      ])
      train_dataset = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform_train)
      test_dataset  = datasets.CIFAR10(root="./data", train=False, download=True, transform=transform_test)
      in_channels = 3

  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
  test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

  return train_loader, test_loader, in_channels

  print(f"{dataset_name} loaded: {len(train_dataset)} training samples, {len(test_dataset)} test samples, aug_level = {aug_level}")


# 模型加载

In [None]:
def get_model(name, num_classes=10, in_channels=3, ispretrained=False, activation='relu', dropout=0.0, use_bn=False):
    name = name.lower()
    if name == "lenet5":
        return LeNet5(num_classes=num_classes, in_channels=in_channels, activation=activation, dropout=dropout, use_bn=use_bn)
    elif name == "vgg11":
        return VGG11(num_classes=num_classes, in_channels=in_channels, activation=activation, dropout=dropout, use_bn=use_bn)
    # 暂不考虑resnet18
    elif name == "resnet18":
        model = models.resnet18(pretrained=ispretrained)
        model.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
        model.fc = nn.Linear(model.fc.in_features, num_classes)
        return model
    elif name == "mobilenetv2":
        model = models.mobilenet_v2(pretrained=ispretrained)
        model.features[0][0] = nn.Conv2d(in_channels, 32, kernel_size=3, stride=2, padding=1, bias=False)
        model.classifier[1] = nn.Linear(model.last_channel, num_classes)
        return model
    else:
        raise ValueError(f"Unknown model name: {name}")

# 核心训练流程

In [None]:
def train_one_epoch(model, device, loader, optimizer, criterion):
    model.train()
    total_loss, correct, total = 0.0, 0, 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, pred = out.max(1)
        total += y.size(0)
        correct += pred.eq(y).sum().item()
    return total_loss / len(loader), 100. * correct / total


def test_one_epoch(model, device, loader, criterion):
    model.eval()
    total_loss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            out = model(x)
            loss = criterion(out, y)
            total_loss += loss.item()
            _, pred = out.max(1)
            total += y.size(0)
            correct += pred.eq(y).sum().item()
    return total_loss / len(loader), 100. * correct / total

def run_training(model, train_loader, test_loader, optimizer, criterion, num_epochs=10):
    start_time = time.time()
    train_losses, test_losses, train_accs, test_accs = [], [], [], []

    for epoch in range(num_epochs):
        tr_loss, tr_acc = train_one_epoch(model, device, train_loader, optimizer, criterion)
        te_loss, te_acc = test_one_epoch(model, device, test_loader, criterion)

        train_losses.append(tr_loss)
        test_losses.append(te_loss)
        train_accs.append(tr_acc)
        test_accs.append(te_acc)

        print(f"Epoch {epoch+1}/{num_epochs}: TrainLoss={tr_loss:.4f}, TestAcc={te_acc:.2f}%")

    elapsed = (time.time() - start_time) / num_epochs
    params = sum(p.numel() for p in model.parameters() if p.requires_grad)

    return {
        "train_losses": train_losses,
        "test_losses": test_losses,
        "train_accs": train_accs,
        "test_accs": test_accs,
        "param_count": params,
        "epoch_time": elapsed,
        "final_acc": test_accs[-1]
    }


# 批量运行实验

In [None]:
def run_experiment(config):
    print(f"\n=== Running {config['model']} | {config['dataset']} | {config['activation']} ===")

    # dataset config: augment
    train_loader, test_loader, in_ch = getData(config["dataset"], batch_size=128, aug_level=config["augment"])

    # model config: activation, dropout, batchnorm
    model = get_model(name=config['model'], num_classes=10, in_channels=in_ch,
                   activation=config["activation"],
                   dropout=config["dropout"],
                   use_bn=config["use_bn"]).to(device)

    # training config: optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer_type = config["optimizer"]
    if optimizer_type == "SGD":
        optimizer = optim.SGD(model.parameters(), lr=config["lr"], weight_decay=config.get("l2", 0))
    elif optimizer_type == "RMSprop":
        optimizer = optim.RMSprop(model.parameters(), lr=config["lr"], weight_decay=config.get("l2", 0))
    else:
        optimizer = optim.Adam(model.parameters(), lr=config["lr"], weight_decay=config.get("l2", 0))

    result = run_training(model, train_loader, test_loader, optimizer, criterion, num_epochs=config["epochs"])
    return result

# 主执行代码

In [None]:
experiment1 = [
    {"model": "lenet5","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "MNIST", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},

    {"model": "vgg11","activation": "relu",
     "dataset": "MNIST", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},

    {"model": "lenet5","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},

    {"model": "vgg11","activation": "relu",
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},

]
# 激活函数对比 LeNet5和CIFAR10
experiment2 = [
    {"model": "vgg11","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
    {"model": "vgg11","activation": "leakyrelu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
    {"model": "vgg11","activation": "elu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
    {"model": "vgg11","activation": "selu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
    {"model": "vgg11","activation": "gelu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
]
# 正则化对比 LeNet5和CIFAR10
experiment3 = [
    {"model": "vgg11","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
    {"model": "vgg11","activation": "relu", "dropout": 0.3,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
    {"model": "vgg11","activation": "relu", "dropout": 0.5,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
    {"model": "vgg11","activation": "relu", "dropout": 0.0,"use_bn": True,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
    {"model": "vgg11","activation": "relu", "dropout": 0.3,"use_bn": True,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
    {"model": "vgg11","activation": "relu", "dropout": 0.5,"use_bn": True,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
]
# 优化器对比 CIFAR10
experiment4 = [
    {"model": "lenet5","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
    {"model": "lenet5","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "SGD", "lr": 0.001, "epochs": 5},
    {"model": "lenet5","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "RMSprop", "lr": 0.001, "epochs": 5},

    {"model": "vgg11","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
    {"model": "vgg11","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "SGD", "lr": 0.001, "epochs": 5},
    {"model": "vgg11","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "RMSprop", "lr": 0.001, "epochs": 5},
]
# 学习率对比 CIFAR10
experiment5 = [
    {"model": "lenet5","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.003, "epochs": 5},
    {"model": "lenet5","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.005, "epochs": 5},
    {"model": "lenet5","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.008, "epochs": 5},
    {"model": "lenet5","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.010, "epochs": 5},



    {"model": "vgg11","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.00003, "epochs": 5},
    {"model": "vgg11","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.00005, "epochs": 5},
    {"model": "vgg11","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.0001, "epochs": 5},
]

# 数据增强对比
experiment6 = [
    {"model": "lenet5","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "strong",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
    {"model": "lenet5","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "basic",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
    {"model": "lenet5","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},

    {"model": "vgg11","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "strong",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
    {"model": "vgg11","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "basic",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
    {"model": "vgg11","activation": "relu", "dropout": 0.0,"use_bn": False,
     "dataset": "CIFAR10", "augment": "none",
     "optimizer": "Adam", "lr": 0.001, "epochs": 5},
]

results = {}
for i, cfg in enumerate(experiment6):
    results[f"exp{i+1}"] = run_experiment(cfg)



In [None]:
for key, res in results.items():
    plt.figure(figsize=(10,4))
    plt.plot(res["train_losses"], label="Train Loss")
    plt.plot(res["test_losses"], label="Test Loss")
    plt.title(f"{key} - Loss Curve")
    plt.legend()
    plt.show()

print("\n=== 实验总结 ===")
for k, r in results.items():
    print(f"{k:6s} | Acc={r['final_acc']:.2f}% | Params={r['param_count']/1e3:.1f}K | EpochTime={r['epoch_time']:.2f}s")