# AlexNet

![](img/alexnet.png)

![](img/alexnet.webp)

https://towardsdatascience.com/the-w3h-of-alexnet-vggnet-resnet-and-inception-7baaaecccc96

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels=3 , out_channels=96  , kernel_size=11, stride=4),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            nn.Conv2d(in_channels=96, out_channels=256 , kernel_size=5, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, padding=1),
            nn.ReLU(),
            
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, padding=1),
            nn.ReLU(),
            
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(),
            
            nn.MaxPool2d(kernel_size=3, stride=2)
        )
        
        self.fc = nn.Sequential(
            # 1234567890123
            # ---         |
            #   ---       |
            #     ---     |
            #       ---   |
            #         --- |
            #           ---
            nn.Linear(256*5*5, 4096), # 疑问：我什么不是256x6x6？（相关，但是似乎无关）https://zhuanlan.zhihu.com/p/180554948
            nn.ReLU(),
            nn.Dropout(p = 0.5), # 使用丢弃层（丢失部分神经元）来缓解过拟合，以增强泛化能力 https://blog.csdn.net/leviopku/article/details/120786990
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, 10) #使用Fashion-MNIST，所以用类别数为10，而非论文中的1000
        )
        
    def forward(self, X):
        feature = self.conv(X)
        output = self.fc(feature.view(X.shape[0], -1))
        return output
    
net = AlexNet()
print(net)

AlexNet(
  (conv): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU()
    (10): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU()
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=6400, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU()
    (5): Linear(in_features=4096, out_featur