In [1]:
import torch
import torch.nn as nn
import torchvision
from task2_cifar import train_valid_data, init_weight, ResidualBlock, run

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
dataset = train_valid_data()

| n*[Conv+Poll]+MLP | 参数量  | 迭代用时 | 最佳迭代次数 | 最佳准确率 |
| :---------------: | :-----: | :------: | :----------: | :--------: |
|       一层        | 8402890 | 1.16it/s |     136      |   71.21%   |
|       两层        | 4227210 | 1.15it/s |     129      |   77.28%   |
|       三层        | 2204170 | 1.02it/s |     103      |   81.07%   |
|       四层        | 1451274 | 1.07s/it |      45      |   79.72%   |

In [2]:
# 一层[Conv+Poll]
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(32 * 16 * 16, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10),
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

 15%|█▍        | 119/800 [01:44<09:56,  1.14it/s, accuracy=71.67%, best_accuracy=71.91%, lr=1e-05, test_loss=1.45e-02, train_loss=4.81e-05]

模型连续40次无提升，提前终止训练
最佳迭代次数: 80
最佳准确率：71.91%
损失函数=CrossEntropyLoss() 学习率=0.01 批大小=128
模型参数=8402890





In [None]:
# 两层[Conv+Poll]
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (3,16,16) -> (64,8,8)
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(64 * 8 * 8, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

  3%|▎         | 21/800 [00:17<11:19,  1.15it/s, accuracy=74.37%, best_accuracy=76.13%, lr=5e-03, test_loss=1.09e-02, train_loss=3.55e-04]

In [None]:
# 三层[Conv+Poll]
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (32,16,16) -> (64,8,8)
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (64,8,8) -> (128,4,4)
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(128 * 4 * 4, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

In [None]:
# 四层[Conv+Poll]
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (32,16,16) -> (64,8,8)
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (64,8,8) -> (128,4,4)
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (128,4,4) -> (256,2,2)
            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(256 * 2 * 2, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-3, 128, dataset, device)

| n*[2Conv+Poll]+MLP | 参数量  | 迭代用时 | 最佳迭代次数 | 最佳准确率 |
| :----------------: | :-----: | :------: | :----------: | :--------: |
|        一层        | 8412202 | 1.09s/it |      85      |   73.91%   |
|        两层        | 4273578 | 1.14s/it |     117      |   81.97%   |
|        三层        | 2398378 | 1.29s/it |     130      |   84.99%   |
|        四层        | 2236074 | 1.53s/it |      39      |   83.29%   |

In [None]:
# 一层[2Conv+Poll]
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(32 * 16 * 16, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

In [None]:
# 两层[2Conv+Poll]
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (3,16,16) -> (64,8,8)
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(64 * 8 * 8, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

In [None]:
# 三层[2Conv+Poll]
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (3,16,16) -> (64,8,8)
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (64,8,8) -> (128,4,4)
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(128 * 4 * 4, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

In [None]:
# 四层[2Conv+Poll]
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (3,16,16) -> (64,8,8)
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (64,8,8) -> (128,4,4)
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (128,4,4) -> (256,2,2)
            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(256 * 2 * 2, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-3, 128, dataset, device)

| n*[2Conv+Poll]+MLP | 卷积核 | 参数量  | 迭代用时 | 最佳迭代次数 | 最佳准确率 |
| :----------------: | :----: | :-----: | :------: | :----------: | :--------: |
|        一层        |   3    | 8412202 | 1.09s/it |      85      |   73.91%   |
|        一层        |   5    | 8430122 | 1.37s/it |      28      |   63.35%   |
|        一层        |   7    | 8457002 | 1.66s/it |      35      |   60.62%   |

In [None]:
# 一层[2Conv+Poll] ker=5
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 5, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 5, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(32 * 16 * 16, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

In [None]:
# 一层[2Conv+Poll] ker=5
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 7, padding=3),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 7, padding=3),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(32 * 16 * 16, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

| n*[2Conv+Poll]+MLP | 池化方式 | 迭代用时 | 最佳迭代次数 | 最佳准确率 |
| :----------------: | :------: | :------: | :----------: | :--------: |
|        一层        |   最大   | 1.09s/it |      85      |   73.91%   |
|        一层        |   平均   | 1.11s/it |      61      |   72.83%   |


In [None]:
# 一层[2Conv+Poll] avg池化
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.AvgPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(32 * 16 * 16, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

In [None]:
model = torchvision.models.resnet34(weights=None)
model.conv1 = nn.Conv2d(3, 64, 3,  padding=1)
model.maxpool = nn.Identity()
model.fc = nn.Linear(model.fc.in_features, 10)
model = model.apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

  5%|▌         | 41/800 [10:22<3:12:00, 15.18s/it, accuracy=81.99%, best_accuracy=82.46%, lr=1e-03, test_loss=1.07e-02, train_loss=1.14e-05]


KeyboardInterrupt: 