In [None]:
import torch
import torch.nn as nn
from task2_cifar import train_valid_data, init_weight, run

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
dataset = train_valid_data()

| n*[Conv+Poll]+MLP | 参数量  | 迭代用时 | 最佳迭代次数 | 最佳准确率 |
| :---------------: | :-----: | :------: | :----------: | :--------: |
|       一层        | 8402890 | 1.16it/s |     136      |   71.21%   |
|       两层        | 4227210 | 1.15it/s |     129      |   77.28%   |
|       三层        | 2204170 | 1.02it/s |     103      |   81.07%   |
|       四层        | 1451274 | 1.07s/it |      45      |   79.72%   |

In [2]:
# 一层[Conv+Poll]
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(32 * 16 * 16, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10),
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

 15%|█▍        | 119/800 [01:44<09:56,  1.14it/s, accuracy=71.67%, best_accuracy=71.91%, lr=1e-05, test_loss=1.45e-02, train_loss=4.81e-05]

模型连续40次无提升，提前终止训练
最佳迭代次数: 80
最佳准确率：71.91%
损失函数=CrossEntropyLoss() 学习率=0.01 批大小=128
模型参数=8402890





In [3]:
# 两层[Conv+Poll]
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (3,16,16) -> (64,8,8)
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(64 * 8 * 8, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

 13%|█▎        | 102/800 [01:31<10:24,  1.12it/s, accuracy=77.71%, best_accuracy=77.82%, lr=4e-05, test_loss=1.20e-02, train_loss=1.96e-05]

模型连续40次无提升，提前终止训练
最佳迭代次数: 63
最佳准确率：77.82%
损失函数=CrossEntropyLoss() 学习率=0.01 批大小=128
模型参数=4227210





In [4]:
# 三层[Conv+Poll]
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (32,16,16) -> (64,8,8)
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (64,8,8) -> (128,4,4)
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(128 * 4 * 4, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

 24%|██▍       | 192/800 [03:04<09:44,  1.04it/s, accuracy=80.96%, best_accuracy=81.11%, lr=2e-07, test_loss=1.13e-02, train_loss=1.37e-05]

模型连续40次无提升，提前终止训练
最佳迭代次数: 153
最佳准确率：81.11%
损失函数=CrossEntropyLoss() 学习率=0.01 批大小=128
模型参数=2204170





In [5]:
# 四层[Conv+Poll]
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (32,16,16) -> (64,8,8)
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (64,8,8) -> (128,4,4)
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (128,4,4) -> (256,2,2)
            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(256 * 2 * 2, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-3, 128, dataset, device)

 24%|██▎       | 189/800 [03:27<11:10,  1.10s/it, accuracy=78.82%, best_accuracy=78.99%, lr=2e-08, test_loss=1.24e-02, train_loss=5.62e-07]

模型连续40次无提升，提前终止训练
最佳迭代次数: 150
最佳准确率：78.99%
损失函数=CrossEntropyLoss() 学习率=0.001 批大小=128
模型参数=1451274





| n*[2Conv+Poll]+MLP | 参数量  | 迭代用时 | 最佳迭代次数 | 最佳准确率 |
| :----------------: | :-----: | :------: | :----------: | :--------: |
|        一层        | 8412202 | 1.09s/it |      85      |   73.91%   |
|        两层        | 4273578 | 1.14s/it |     117      |   81.97%   |
|        三层        | 2398378 | 1.29s/it |     130      |   84.99%   |
|        四层        | 2236074 | 1.53s/it |      39      |   83.29%   |

In [6]:
# 一层[2Conv+Poll]
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(32 * 16 * 16, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

 23%|██▎       | 184/800 [03:18<11:05,  1.08s/it, accuracy=75.06%, best_accuracy=75.17%, lr=2e-07, test_loss=1.36e-02, train_loss=1.32e-05]

模型连续40次无提升，提前终止训练
最佳迭代次数: 145
最佳准确率：75.17%
损失函数=CrossEntropyLoss() 学习率=0.01 批大小=128
模型参数=8412202





In [7]:
# 两层[2Conv+Poll]
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (3,16,16) -> (64,8,8)
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(64 * 8 * 8, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

 23%|██▎       | 186/800 [03:34<11:48,  1.15s/it, accuracy=82.42%, best_accuracy=82.58%, lr=2e-07, test_loss=9.87e-03, train_loss=8.54e-06]

模型连续40次无提升，提前终止训练
最佳迭代次数: 147
最佳准确率：82.58%
损失函数=CrossEntropyLoss() 学习率=0.01 批大小=128
模型参数=4273578





In [8]:
# 三层[2Conv+Poll]
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (3,16,16) -> (64,8,8)
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (64,8,8) -> (128,4,4)
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(128 * 4 * 4, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

 17%|█▋        | 138/800 [02:59<14:19,  1.30s/it, accuracy=84.61%, best_accuracy=84.72%, lr=2e-06, test_loss=1.04e-02, train_loss=4.33e-06]

模型连续40次无提升，提前终止训练
最佳迭代次数: 99
最佳准确率：84.72%
损失函数=CrossEntropyLoss() 学习率=0.01 批大小=128
模型参数=2398378





In [9]:
# 四层[2Conv+Poll]
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (3,16,16) -> (64,8,8)
            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (64,8,8) -> (128,4,4)
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            # (128,4,4) -> (256,2,2)
            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(256 * 2 * 2, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-3, 128, dataset, device)

  9%|▉         | 71/800 [01:49<18:42,  1.54s/it, accuracy=83.29%, best_accuracy=83.33%, lr=3e-05, test_loss=9.91e-03, train_loss=3.55e-07]

模型连续40次无提升，提前终止训练
最佳迭代次数: 32
最佳准确率：83.33%
损失函数=CrossEntropyLoss() 学习率=0.001 批大小=128
模型参数=2236074





| n*[2Conv+Poll]+MLP | 卷积核 | 参数量  | 迭代用时 | 最佳迭代次数 | 最佳准确率 |
| :----------------: | :----: | :-----: | :------: | :----------: | :--------: |
|        一层        |   3    | 8412202 | 1.09s/it |      85      |   73.91%   |
|        一层        |   5    | 8430122 | 1.37s/it |      28      |   63.35%   |
|        一层        |   7    | 8457002 | 1.66s/it |      35      |   60.62%   |

In [10]:
# 一层[2Conv+Poll] ker=5
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 5, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 5, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(32 * 16 * 16, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

 20%|█▉        | 156/800 [03:30<14:29,  1.35s/it, accuracy=75.09%, best_accuracy=75.30%, lr=1e-06, test_loss=1.34e-02, train_loss=2.07e-05]

模型连续40次无提升，提前终止训练
最佳迭代次数: 117
最佳准确率：75.30%
损失函数=CrossEntropyLoss() 学习率=0.01 批大小=128
模型参数=8430122





In [None]:
# 一层[2Conv+Poll] ker=7
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 7, padding=3),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 7, padding=3),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(32 * 16 * 16, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

 19%|█▉        | 150/800 [04:04<17:40,  1.63s/it, accuracy=74.56%, best_accuracy=74.62%, lr=1e-06, test_loss=1.37e-02, train_loss=2.08e-05]

模型连续40次无提升，提前终止训练
最佳迭代次数: 111
最佳准确率：74.62%
损失函数=CrossEntropyLoss() 学习率=0.01 批大小=128
模型参数=8457002





| n*[2Conv+Poll]+MLP | 池化方式 | 迭代用时 | 最佳迭代次数 | 最佳准确率 |
| :----------------: | :------: | :------: | :----------: | :--------: |
|        一层        |   最大   | 1.09s/it |      85      |   73.91%   |
|        一层        |   平均   | 1.11s/it |      61      |   72.83%   |


In [12]:
# 一层[2Conv+Poll] avg池化
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            #
            # (3,32,32) -> (32,16,16)
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.AvgPool2d(2),
            #
            nn.Flatten(),
            nn.Linear(32 * 16 * 16, 1024),
            nn.BatchNorm1d(1024),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 10)
        )

    def forward(self, x):
        return self.net(x)


model = Model().apply(init_weight).to(device)
run(model, nn.CrossEntropyLoss(), 1e-2, 128, dataset, device)

 15%|█▌        | 120/800 [02:10<12:20,  1.09s/it, accuracy=74.08%, best_accuracy=74.29%, lr=1e-05, test_loss=1.36e-02, train_loss=1.85e-05]

模型连续40次无提升，提前终止训练
最佳迭代次数: 81
最佳准确率：74.29%
损失函数=CrossEntropyLoss() 学习率=0.01 批大小=128
模型参数=8412202



