In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Alexnet

## alexnet in paper

input image size: 227\*227\*3<br>
- 第1层conv: <br>
(3,96,kernel_size=(11,11), strid=4),outsize=(224-11 + 2\*2)/4+1=55<br>
参数数量为$3\times 11 \times 11 \times 96 + 96 = 34944=3.5k$
- 第2层maxpool:<br>
(3,3,stride=2,pad=0),outsize=(55-3)/2+1=27
- 第3层conv: <br>
(96,256,kernel_size=(5,5), stride=1,pad=2),outsize=(27-5+2\*2)/1+1=27<br>
参数数量为$96\times 5 \times 5 \times 256 + 256 = 614656=61.5k$
- 第4层maxpool:<br>
(3,3,stride=2),outsize=(27-3)/2+1=13
- 第5层conv:<br>
(256,384,kernel_size=(3,3), stride=1, pad=1),outsize=(13-3+2\*1)/1+1=13<br>
参数数量为$256\times 3 \times 3 \times 384 + 384 = 885120=88.5k$
- 第6层conv:<br>
(384,256,kernel_size=(3,3), stride=1, pad=1),outsize=(13-3+2\*1)/1+1=13<br>
参数数量为$384\times 3 \times 3 \times 256 + 256 = 884992=88.5k$
- 第7层maxpool:<br>
(3,3,stride=2),outsize=(13-3)/2+1=6
reshape to vector,outsize=6\*6\*256=9216
- 第8层fc:<br>
(9216,4096),outsize=4096<br>
参数数量为$9216\times 4096 + 4096=37752832=37.7million$<br>
- 第9层relu:<br>
- 第10层fc:<br>
(4096,4096),outsize=4096<br>
参数数量为$4096\times 4096 + 4096=16781312=16.7million$<br>
- 第11层relu:<br>
- 第12层fc:<br>
(4096,1000),outsize=1000<br>
参数数量为$4096\times 1000 + 1000=4097000=4million$

最后softmax

总共参数数量为：60million

In [62]:
class Alexnet(nn.Module):
    '''
    '''
    def __init__(self):
        super(Alexnet, self).__init__()
        self.features = nn.Sequential(
        nn.Conv2d(3,96,11,stride=4,padding=0),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(3,stride=2),
            
        nn.Conv2d(96,256,5,stride=1,padding=2),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(3,stride=2),
            
        nn.Conv2d(256,384,3,stride=1,padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(384,256,3,stride=1,padding=1),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(3,stride=2),
        )
        self.classifier = nn.Sequential(
        nn.Dropout(),
        nn.Linear(256*6*6, 4096),
        nn.ReLU(inplace=True),
        nn.Dropout(),
        nn.Linear(4096, 4096),
        nn.ReLU(inplace=True),
        nn.Linear(4096, 1000),
        )
    def forward(self,x):
        x = self.features(x)
        x = x.view(x.shape[0],-1)
        x = self.classifier(x)
        return x

In [63]:
alexnet = Alexnet()
alexnet

Alexnet(
  (features): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace)
    (3): Dropout(p=0.5)
    (4): Linear(in_features=4096, out_features=4096, bias=True)
    (5): ReLU(inplace)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)

In [64]:
img = torch.randn(1,3,227,227)
img[0,:,0,0]

tensor([-1.1145,  0.0334,  1.0321])

In [65]:
out = alexnet.features(img)
out.shape

torch.Size([1, 256, 6, 6])

In [66]:
out = alexnet(img)
out.shape

torch.Size([1, 1000])

## alexnet in pytorch

In [39]:
import torchvision.models as models
net = models.alexnet()
net

AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
    (2): ReLU(inplace)
    (3): Dropout(p

In [45]:
img = torch.randn(1,3,224,224)
img[0,:,0,0]

tensor([-0.5358,  0.8940, -0.9849])

In [46]:
out = net(img)
out.shape

torch.Size([1, 1000])