# Training a single image of FashionMNIST with forward propagation
## input : 1 × 28 × 28
## output: 1 × 10

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms

torch.set_printoptions(linewidth=120)

In [2]:
train_set = torchvision.datasets.FashionMNIST(
    root='./data/FashionMNIST'
    ,train = True
    , download = True
    , transform =transforms.Compose([
        transforms.ToTensor()
    ])
)

In [3]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120)
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
        
    def forward(self, t):
        
        
        # (2) hidden conv layer
        
        t = F.relu(self.conv1(t))
        t = F.max_pool2d(t, kernel_size = 2, stride = 2)
        
        # (3) hidden conv layer
        
        t = F.relu(self.conv2(t))
        t = F.max_pool2d(t, kernel_size = 2, stride = 2)
        
        # (4) hidden linear layer
        t = t.reshape(-1, 12 * 4 * 4)
        
        t = F.relu(self.fc1(t))
        
        # (5) hidden linear layer
        
        t = F.relu(self.fc2(t))
        
        # (6) output layer
        t = self.out(t)
        #t = F.softmax(t,dim = 1)
        
        return t

In [4]:
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x22f39a75a90>

In [5]:
network = Network()

In [6]:
sample = next(iter(train_set)) # the first image in training set

In [7]:
image, label = sample
image.shape #  a sigal image with label 9 (Ankle boot) will transmit through nn

torch.Size([1, 28, 28])

In [8]:
label

9

In [9]:
image.unsqueeze(0).shape # this give us a batch with size 1

torch.Size([1, 1, 28, 28])

In [10]:
pred = network(image.unsqueeze(0)) # image shape needs to be (batch_size × in_channels × H × W )

In [11]:
pred.shape

torch.Size([1, 10])

In [12]:
pred

tensor([[-0.0022, -0.0183,  0.0559,  0.0380,  0.0372,  0.0746,  0.0706, -0.1070, -0.0628, -0.0647]])

In [13]:
label

9

In [14]:
pred.argmax(dim = 1) # the prediction in this case is incorrect

tensor([5])

In [15]:
F.softmax(pred,dim=1)

tensor([[0.0994, 0.0978, 0.1053, 0.1035, 0.1034, 0.1073, 0.1069, 0.0895, 0.0935, 0.0934]])

Network weights are randomly generated. Each time we create a new instance of our network, the weights within the network will be different. This means that the predictions we get will be different if we create different networks. 

In [16]:
F.softmax(pred, dim=1).sum()

tensor(1.)

In [17]:
print(network)

Network(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 12, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=192, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=60, bias=True)
  (out): Linear(in_features=60, out_features=10, bias=True)
)


In [18]:
network.conv1.weight

Parameter containing:
tensor([[[[ 0.0223,  0.1937,  0.0145, -0.1463, -0.1362],
          [-0.0176, -0.1004, -0.1849,  0.0675, -0.1715],
          [ 0.1360,  0.0861,  0.1588, -0.1981, -0.1969],
          [-0.1239,  0.0620,  0.0521,  0.0004,  0.0939],
          [ 0.0705, -0.0191, -0.0849, -0.0874,  0.0426]]],


        [[[-0.1045, -0.1695, -0.1428,  0.0501, -0.1895],
          [-0.0589,  0.0844,  0.1490,  0.1911,  0.1579],
          [-0.0212,  0.0917, -0.0421, -0.0755, -0.1491],
          [-0.0199,  0.1599, -0.1593, -0.1041, -0.0893],
          [-0.1800,  0.1123,  0.0095,  0.1188, -0.0050]]],


        [[[-0.0440,  0.1306, -0.1850,  0.0351,  0.1271],
          [-0.1005, -0.1520,  0.0990, -0.0930,  0.0393],
          [ 0.1182, -0.0668, -0.0547, -0.0179,  0.0718],
          [-0.0218,  0.1389, -0.0575, -0.0886,  0.1490],
          [ 0.1632,  0.1838,  0.1496, -0.0337, -0.1269]]],


        [[[ 0.0151, -0.1776, -0.0718, -0.1233, -0.0684],
          [ 0.1111,  0.0321, -0.0381,  0.1587,  0.0638

In [19]:
network.fc1.weight

Parameter containing:
tensor([[-0.0018,  0.0078,  0.0129,  ...,  0.0271,  0.0717,  0.0343],
        [ 0.0601, -0.0669,  0.0465,  ..., -0.0249,  0.0662, -0.0495],
        [-0.0681,  0.0158, -0.0437,  ..., -0.0292, -0.0489, -0.0326],
        ...,
        [ 0.0178,  0.0468, -0.0143,  ...,  0.0460,  0.0661, -0.0047],
        [-0.0482,  0.0383, -0.0506,  ...,  0.0604, -0.0186, -0.0660],
        [-0.0244, -0.0491,  0.0379,  ..., -0.0438,  0.0332,  0.0628]], requires_grad=True)

In [20]:
network.fc1.weight.shape


torch.Size([120, 192])

In [21]:
for para in network.parameters():
    print(para.shape)

torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([12, 6, 5, 5])
torch.Size([12])
torch.Size([120, 192])
torch.Size([120])
torch.Size([60, 120])
torch.Size([60])
torch.Size([10, 60])
torch.Size([10])


In [22]:
for name, para in network.named_parameters():
    print(name, '\t\t', para.shape)

conv1.weight 		 torch.Size([6, 1, 5, 5])
conv1.bias 		 torch.Size([6])
conv2.weight 		 torch.Size([12, 6, 5, 5])
conv2.bias 		 torch.Size([12])
fc1.weight 		 torch.Size([120, 192])
fc1.bias 		 torch.Size([120])
fc2.weight 		 torch.Size([60, 120])
fc2.bias 		 torch.Size([60])
out.weight 		 torch.Size([10, 60])
out.bias 		 torch.Size([10])


In [23]:
in_features = torch.tensor([1,2,3,4], dtype =torch.float32)

In [24]:
weight_matrix = torch.tensor([
    [1,2,3,4],
    [2,3,4,5],
    [3,4,5,6]
], dtype = torch.float32)

In [25]:
weight_matrix.matmul(in_features)

tensor([30., 40., 50.])

In [26]:
fc = nn.Linear(in_features=4, out_features=3, bias = False)

In [27]:
fc.weight = nn.Parameter(weight_matrix)

In [28]:
fc(in_features)

tensor([30., 40., 50.])