In [2]:
from torch import nn
import torch

class LeNetOrig(nn.Module):
    """this architecture is based on [LeCun et al., 1998b], keep all the original network"""

    def __init__(self, in_channels, num_classes):
        super(LeNetOrig, self).__init__()
        self.num_classes = num_classes
        self.in_channels = in_channels
        self.net = nn.Sequential(
            nn.Conv2d(in_channels= self.in_channels , out_channels=6, kernel_size=5, padding=2), nn.Sigmoid(),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
            nn.Sigmoid(),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.Linear(16*36, 120), nn.Sigmoid(),
            nn.Linear(120, 84), nn.Sigmoid(),
            nn.Linear(84, self.num_classes)
        )

    def forward(self, X):
        X = self.net(X)
        return X

    def layer_summary(self, Input_shape):
        Rand_Input = torch.rand(*Input_shape)
        for layer in self.net:
            Rand_Input = layer(Rand_Input)
            print(layer.__class__.__name__, "output shape: \t", Rand_Input.shape)


In [154]:
net = nn.Sequential(
            nn.Conv2d(in_channels= 1 , out_channels=6, kernel_size=5, padding=2), nn.Sigmoid(),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
            nn.Sigmoid(),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Flatten())
# To calculate the output of Flatten layer, we don't care about batch size

In [172]:
model = LeNetOrig(1,10)

In [173]:
X = torch.rand(5, 1, 32, 32)
X.shape

torch.Size([5, 1, 32, 32])

In [174]:
model.forward(X)

tensor([[-0.0475, -0.1430, -0.4903, -0.2810, -0.4066,  0.4294,  0.1157,  0.1564,
          0.1111, -0.1128],
        [-0.0475, -0.1429, -0.4903, -0.2810, -0.4066,  0.4294,  0.1157,  0.1564,
          0.1111, -0.1128],
        [-0.0475, -0.1429, -0.4903, -0.2809, -0.4066,  0.4294,  0.1157,  0.1564,
          0.1111, -0.1128],
        [-0.0475, -0.1429, -0.4903, -0.2809, -0.4066,  0.4294,  0.1157,  0.1564,
          0.1111, -0.1128],
        [-0.0475, -0.1429, -0.4903, -0.2809, -0.4066,  0.4294,  0.1158,  0.1564,
          0.1111, -0.1129]], grad_fn=<AddmmBackward0>)

In [176]:
model.layer_summary((1,1,28, 28))

Conv2d output shape: 	 torch.Size([1, 6, 28, 28])
Sigmoid output shape: 	 torch.Size([1, 6, 28, 28])
AvgPool2d output shape: 	 torch.Size([1, 6, 14, 14])
Conv2d output shape: 	 torch.Size([1, 16, 10, 10])
Sigmoid output shape: 	 torch.Size([1, 16, 10, 10])
AvgPool2d output shape: 	 torch.Size([1, 16, 5, 5])
Flatten output shape: 	 torch.Size([1, 400])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x400 and 576x120)

In [137]:
model.net

Sequential(
  (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): Sigmoid()
  (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (4): Sigmoid()
  (5): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (6): Flatten(start_dim=1, end_dim=-1)
  (7): Linear(in_features=400, out_features=120, bias=True)
  (8): Sigmoid()
  (9): Linear(in_features=120, out_features=84, bias=True)
  (10): Sigmoid()
  (11): Linear(in_features=84, out_features=10, bias=True)
)

In [164]:
net(X).shape

torch.Size([5, 576])

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [179]:
net(X).to(device)

tensor([[0.5081, 0.5107, 0.5075,  ..., 0.4857, 0.4869, 0.4857],
        [0.5079, 0.5059, 0.5071,  ..., 0.4859, 0.4879, 0.4882],
        [0.5082, 0.5065, 0.5087,  ..., 0.4866, 0.4869, 0.4863],
        [0.5056, 0.5068, 0.5056,  ..., 0.4886, 0.4866, 0.4858],
        [0.5061, 0.5075, 0.5102,  ..., 0.4874, 0.4860, 0.4890]],
       grad_fn=<ReshapeAliasBackward0>)

In [4]:
device

device(type='cpu')

In [6]:
torch.cuda.is_available()

False