In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class CNN(nn.Module):
    
    def __init__(self):
        super(CNN, self).__init__()

        self.conv1 = nn.Conv2d(1,6,5)
        self.conv2 = nn.Conv2d(6,16,5)


        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84,10)


    def forward(self, input):

        c1 = F.relu(self.conv1(input))
        
        s2 = F.max_pool2d(c1, (2,2))

        c3 = F.relu(self.conv2(s2))

        s4 = F.max_pool2d(c3, 2)

        s4 = torch.flatten(s4, 1)

        f5 = F.relu(self.fc1(s4))

        f6 = F.relu(self.fc2(f5))

        output = self.fc3(f6)

        return output
    

net = CNN()
print(net)

CNN(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [3]:
params = list(net.parameters())
print(len(params))
print(params[0].size())  # c

10
torch.Size([6, 1, 5, 5])


In [4]:
input = torch.randn(1000, 1, 32, 32)
target = torch.randint(0, 10, (1000,))
print(input, target)

tensor([[[[ 1.8304,  2.1741,  1.2894,  ..., -0.0283,  0.0301, -0.6318],
          [ 1.5042, -0.9755,  0.0124,  ..., -0.7749, -0.4634,  1.4272],
          [ 1.0852,  1.8906,  1.0847,  ...,  1.2250,  0.7021, -0.5109],
          ...,
          [ 0.3150,  1.0009, -0.7436,  ..., -0.2444,  0.3981,  1.4002],
          [-0.9972, -1.9709,  0.2885,  ..., -0.1777,  1.7906, -1.9297],
          [ 0.5686, -1.2948,  0.6613,  ..., -0.7192, -0.5541,  0.3755]]],


        [[[ 0.4138,  0.1743,  0.7633,  ..., -0.2714,  0.6033,  0.0261],
          [-1.5431,  0.4041, -0.4057,  ...,  0.6670, -0.3052, -0.8656],
          [ 1.5907,  0.9108, -1.5594,  ..., -1.9808, -0.4612,  2.1691],
          ...,
          [ 0.4705,  0.1105,  0.4917,  ..., -0.9505, -1.4356, -2.7950],
          [ 0.1087, -0.5404, -0.7334,  ...,  1.3305, -1.4843,  0.1007],
          [ 1.0316,  0.0487,  0.0299,  ...,  0.9173,  0.8617,  1.5371]]],


        [[[-0.0633, -0.3099, -1.1111,  ...,  0.4349, -1.1570,  0.7946],
          [-0.1998, -0.582

In [5]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()

# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in your training loop:
  # Does the update

for i in range(500):
    optimizer.zero_grad()   # zero the gradient buffers
    output = net(input)

    loss = criterion(output, target)
    print(loss)
    loss.backward()
    optimizer.step()  


tensor(2.3072, grad_fn=<NllLossBackward0>)
tensor(2.3071, grad_fn=<NllLossBackward0>)
tensor(2.3069, grad_fn=<NllLossBackward0>)
tensor(2.3068, grad_fn=<NllLossBackward0>)
tensor(2.3067, grad_fn=<NllLossBackward0>)
tensor(2.3066, grad_fn=<NllLossBackward0>)
tensor(2.3064, grad_fn=<NllLossBackward0>)
tensor(2.3063, grad_fn=<NllLossBackward0>)
tensor(2.3062, grad_fn=<NllLossBackward0>)
tensor(2.3061, grad_fn=<NllLossBackward0>)
tensor(2.3060, grad_fn=<NllLossBackward0>)
tensor(2.3059, grad_fn=<NllLossBackward0>)
tensor(2.3058, grad_fn=<NllLossBackward0>)
tensor(2.3057, grad_fn=<NllLossBackward0>)
tensor(2.3056, grad_fn=<NllLossBackward0>)
tensor(2.3055, grad_fn=<NllLossBackward0>)
tensor(2.3054, grad_fn=<NllLossBackward0>)
tensor(2.3053, grad_fn=<NllLossBackward0>)
tensor(2.3052, grad_fn=<NllLossBackward0>)
tensor(2.3051, grad_fn=<NllLossBackward0>)
tensor(2.3050, grad_fn=<NllLossBackward0>)
tensor(2.3049, grad_fn=<NllLossBackward0>)
tensor(2.3048, grad_fn=<NllLossBackward0>)
tensor(2.30