In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm import tqdm, trange
import matplotlib.pyplot as plt
from typing import Any, Callable, List, Optional, Type, Union
from torch import Tensor

# 打补丁

In [None]:
def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
    """3x3 convolution with padding"""
    return nn.Conv2d(
        in_planes,
        out_planes,
        kernel_size=3,
        stride=stride,
        padding=dilation,
        groups=groups,
        bias=False,
        dilation=dilation,
    )


def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


In [None]:
class MyBasicBlock(nn.Module):
    expansion: int = 1
    scale_factor: float = 1
    
    def __init__(
        self,
        inplanes: int,
        planes: int,
        stride: int = 1,
        downsample: Optional[nn.Module] = None,
        groups: int = 1,
        base_width: int = 64,
        dilation: int = 1,
        norm_layer: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        # if groups != 1 or base_width != 64:
        #     raise ValueError("BasicBlock only supports groups=1 and base_width=64")
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = norm_layer(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = norm_layer(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        # out += identity
        out = self.scale_factor * out + identity
        out = self.relu(out)

        return out

from torchvision.models import resnet
resnet.BasicBlock = MyBasicBlock

# 实验配置

In [None]:
# 基础配置
BATCH_SIZE = 128
EPOCHS = 20
NUM_CLASSES = 100
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# 数据预处理
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)),
])

In [None]:
train_set = torchvision.datasets.CIFAR100(root='./data', train=True, download=False, transform=transform_train)
test_set = torchvision.datasets.CIFAR100(root='./data', train=False, download=False, transform=transform_test)

train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)


# 实验函数

In [None]:
# 实验控制函数
def run_experiment(model, experiment_name):
    model = model.to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

    train_acc_list = []
    train_loss_list = []
    test_acc_list = []
    test_loss_list = []    
    for epoch in trange(EPOCHS):
        model.train()
        train_loss = 0.0
        correct = 0
        total = 0
        
        # for inputs, targets in tqdm(train_loader):
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
        
        scheduler.step()
        train_acc = 100.*correct/total

        # 测试阶段
        model.eval()
        test_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, targets in test_loader:
                inputs, targets = inputs.to(DEVICE), targets.to(DEVICE)
                outputs = model(inputs)
                loss = criterion(outputs, targets)

                test_loss += loss.item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()

        test_acc = 100.*correct/total
        
        train_acc_list.append(train_acc)
        train_loss_list.append(train_loss)
        test_acc_list.append(test_acc)
        test_loss_list.append(test_loss)
    
    print(f"{experiment_name} | Epoch {epoch+1}/{EPOCHS} | Train Acc: {train_acc:.2f}% | Test Acc: {test_acc:.2f}%")
    
    return train_acc_list, train_loss_list, test_acc_list, test_loss_list

In [None]:
def plot_experiment_results(experiments, results):
    cols = ['Train Acc', 'Train Loss', 'Test Acc', 'Test Loss']
    fig, ax = plt.subplots(2,2)
    for i, col in enumerate(cols):
        ax[i//2, i%2].set_title(col)
        ax[i//2, i%2].set_xlabel('Epoch')
        ax[i//2, i%2].set_ylabel('')
        for j, experiment in enumerate(experiments):
            ax[i//2, i%2].plot(results[j][i], label=experiment)
            ax[i//2, i%2].legend()
    fig.tight_layout()  # 自动调整布局
    plt.show()
    

# 实验配置

In [None]:
from torchvision.models.resnet import resnet18, resnet34, resnet50

In [None]:
# 实验1：不同深度对比
depth_experiments = [
    ('ResNet-18', resnet18(num_classes=NUM_CLASSES)),
    ('ResNet-34', resnet34(num_classes=NUM_CLASSES)),
    ('ResNet-50', resnet50(num_classes=NUM_CLASSES)),
]

In [None]:
# 实验4：不同深度对比
depth_experiments2 = [
    ('ResNet-18', resnet18(num_classes=NUM_CLASSES)),
    ('ResNet-34', resnet34(num_classes=NUM_CLASSES)),
    ('ResNet-50', resnet50(num_classes=NUM_CLASSES)),
    ('ResNet-50p', resnet50(num_classes=NUM_CLASSES))
]
depth_experiments2[3][1].conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
depth_experiments2[3][1].maxpool = nn.Identity()

In [None]:
# 实验2：不同宽度对比
width_experiments = [
    ('Width-0.5x', resnet34(num_classes=NUM_CLASSES, width_per_group=64*0.5)),
    ('Width-1x', resnet34(num_classes=NUM_CLASSES, width_per_group=64)),
    ('Width-2x', resnet34(num_classes=NUM_CLASSES, width_per_group=64*2))
]

In [None]:
from torchvision.models.resnet import ResNet
def resnet34(scale, **kwargs: Any) -> ResNet:
    MyBasicBlock.scale = scale
    return resnet.resnet34(**kwargs)


In [None]:
# 实验3：残差连接对比
residual_experiments = [
    ('scale-2.0x', resnet34(2.0, num_classes=NUM_CLASSES)),
    ('scale-1.0x', resnet34(1.0, num_classes=NUM_CLASSES)),
    ('scale-0.5x', resnet34(0.5, num_classes=NUM_CLASSES)),
    ('scale-0.0x', resnet34(0.0, num_classes=NUM_CLASSES))
]

In [None]:
def run_experiments(experiment_list):
    results = []
    for name, model in experiment_list:
        results.append(run_experiment(model,name))
    plot_experiment_results([name for name, _ in experiment_list], results)

# 运行实验（根据需要选择执行）

In [None]:
run_experiments(depth_experiments)

In [None]:
run_experiments(width_experiments)

In [None]:
run_experiments(residual_experiments)

In [None]:
run_experiments(depth_experiments2)