In [12]:
from torch import nn

net = nn.Sequential(
    nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4, padding=1), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
    nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2),
    nn.Flatten())

In [13]:
import torch
x = torch.rand(1, 1, 224, 224)

In [14]:
net(x).shape

torch.Size([1, 6400])

In [18]:
class AlexNet(nn.Module):
    def __init__(self):
        super(AlexNet, self).__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4, padding=1), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
            nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
            nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Flatten(),
            nn.Linear(6400, 4096), nn.ReLU(), nn.Dropout(),
            nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(),
            nn.Linear(4096, 10))

        self.net.apply(self.init_xavier_uniform)


    def forward(self, X):
        X = self.net(X)
        return X
    @staticmethod
    def init_xavier_uniform(layer):
        if isinstance(layer, (nn.Linear, nn.Conv2d)):
            torch.nn.init.xavier_uniform_(layer.weight)
            layer.bias.data.fill_(0.0001)

    def layer_summary(self, X_shape: tuple):
        X = torch.rand(*X_shape)
        for layer in self.net:
            X = layer(X)
            print(layer.__class__.__name__, "output shape: ", X.shape)

In [19]:
model = AlexNet()


tensor([[-0.0233, -0.0387, -0.0694, -0.0738,  0.0384, -0.0301, -0.0287,  0.0401,
         -0.0055,  0.0152]], grad_fn=<AddmmBackward0>)

In [20]:
net = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11, stride=4, padding=1), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(96, 256, kernel_size=5, padding=2), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(256, 384, kernel_size=3, padding=1), nn.ReLU(),
            nn.Conv2d(384, 384, kernel_size=3, padding=1), nn.ReLU(),
            nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Flatten(),
            nn.Linear(6400, 4096), nn.ReLU(), nn.Dropout(),
            nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(),
            nn.Linear(4096, 10))

In [23]:
net(x)

tensor([[ 0.0007, -0.0140,  0.0054,  0.0057, -0.0039, -0.0107, -0.0064,  0.0188,
          0.0102, -0.0033]], grad_fn=<AddmmBackward0>)

In [24]:
model(x)

tensor([[ 0.0336, -0.0143, -0.0195, -0.0578, -0.0207,  0.0328, -0.0014,  0.0649,
         -0.0345, -0.0443]], grad_fn=<AddmmBackward0>)

In [27]:
model.net[0](x)

tensor([[[[ 0.0549,  0.0577,  0.0405,  ...,  0.0178, -0.0257,  0.0711],
          [-0.0078, -0.0047,  0.0020,  ..., -0.0608,  0.0714, -0.0081],
          [ 0.0438,  0.0321,  0.0372,  ...,  0.0461,  0.0436,  0.0351],
          ...,
          [-0.0222,  0.0543,  0.0836,  ..., -0.0121,  0.0322,  0.0747],
          [-0.0051, -0.0361,  0.0893,  ...,  0.1227,  0.1030,  0.0811],
          [ 0.0463,  0.0011,  0.0254,  ...,  0.0138,  0.0866,  0.0090]],

         [[-0.1126, -0.1300, -0.0585,  ..., -0.0973, -0.0884, -0.0572],
          [-0.0590, -0.0912, -0.0507,  ..., -0.0614, -0.0354, -0.0829],
          [-0.0940, -0.0944, -0.1065,  ..., -0.0173, -0.0398, -0.1003],
          ...,
          [-0.1315, -0.1466, -0.0793,  ..., -0.1010, -0.0771, -0.0887],
          [-0.0811, -0.0633, -0.1034,  ..., -0.1236, -0.0589, -0.0866],
          [-0.1783, -0.0023, -0.0791,  ..., -0.0373, -0.2028, -0.0729]],

         [[ 0.0239,  0.0431, -0.0408,  ..., -0.0225, -0.0364, -0.0490],
          [ 0.0234, -0.0408, -

In [29]:
net[0](x).bias

AttributeError: 'Tensor' object has no attribute 'bias'