In [2]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6,15,5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [4]:
net = Net()
net

Net(
  (conv1): Conv2d (1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d (6, 15, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120)
  (fc2): Linear(in_features=120, out_features=84)
  (fc3): Linear(in_features=84, out_features=10)
)

In [5]:
param = list(net.parameters())
#print(param)
print(len(param))
print(param[0].size())

10
torch.Size([6, 1, 5, 5])


In [6]:
input = Variable(torch.randn(1,1,32,32))
input

Variable containing:
(0 ,0 ,.,.) = 
 -0.6942 -1.5305 -0.0919  ...   1.1585 -1.1838 -1.5291
  1.0865  0.4666 -1.0670  ...   1.9313 -0.5371  0.6627
  0.1947  0.6698  0.4105  ...  -1.5660  0.7879  1.1434
           ...             ⋱             ...          
  0.9158  1.0929  0.1171  ...  -0.0980 -0.6740 -1.2711
  1.3006 -1.3908 -0.4397  ...  -0.8409  0.2024  0.6765
  0.9412 -0.7685  1.8606  ...  -0.3987  0.6987 -0.9139
[torch.FloatTensor of size 1x1x32x32]

In [7]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
print(net)


Net(
  (conv1): Conv2d (1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d (6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120)
  (fc2): Linear(in_features=120, out_features=84)
  (fc3): Linear(in_features=84, out_features=10)
)


In [8]:
params = list(net.parameters())
print(len(params))
print(params[0].size()) 

10
torch.Size([6, 1, 5, 5])


In [9]:
input = Variable(torch.randn(1, 1, 32, 32))
out = net(input)
print(out)


Variable containing:
 0.0202 -0.0006  0.0502  0.0801  0.1233 -0.0306  0.0187  0.0380 -0.0985  0.0769
[torch.FloatTensor of size 1x10]



In [10]:
net

Net(
  (conv1): Conv2d (1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d (6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120)
  (fc2): Linear(in_features=120, out_features=84)
  (fc3): Linear(in_features=84, out_features=10)
)

In [11]:
net.zero_grad()
out.backward(torch.randn(1,10))

## Loss Function

In [12]:
output = net(input)
target = Variable(torch.arange(1,11))
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

Variable containing:
 38.2563
[torch.FloatTensor of size 1]



In [14]:
print(loss.grad_fn)

<MseLossBackward object at 0x7f1d59303cc0>


In [15]:
print(loss.grad_fn.next_functions[0][0])

<AddmmBackward object at 0x7f1d6c156c18>


In [16]:
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<ExpandBackward object at 0x7f1d6c103748>


In [17]:
net.zero_grad()
print(net.conv1.bias.grad)

Variable containing:
 0
 0
 0
 0
 0
 0
[torch.FloatTensor of size 6]



In [18]:
loss.backward()

In [19]:
print(net.conv1.bias.grad)

Variable containing:
1.00000e-02 *
 -6.3835
 -4.2272
 -2.7133
 -1.9252
  2.2236
  2.2399
[torch.FloatTensor of size 6]



## Update the weights

#### weight = weight - learning_rate * gradient

In [21]:
list(net.parameters())

[Parameter containing:
 (0 ,0 ,.,.) = 
   0.0925  0.0922 -0.0192 -0.0864 -0.0189
  -0.0247 -0.1674  0.0085  0.0797  0.1546
  -0.1720 -0.0154  0.1709 -0.0915  0.1874
   0.0447 -0.1285 -0.0339 -0.1926  0.0246
  -0.1246  0.1604 -0.0717  0.1736  0.0851
 
 (1 ,0 ,.,.) = 
  -0.0327  0.0834 -0.1257 -0.0210 -0.1381
  -0.0698 -0.0799 -0.0607 -0.0886 -0.0157
  -0.1498 -0.1427 -0.1823  0.0107 -0.0035
   0.1780  0.0916  0.1774  0.1672  0.1471
  -0.1979  0.0728  0.0179  0.0420  0.1083
 
 (2 ,0 ,.,.) = 
  -0.1373 -0.0740 -0.1030  0.0666  0.0156
  -0.1180  0.1460  0.0693 -0.0391  0.1883
  -0.0161 -0.1292  0.0277 -0.1514  0.0012
  -0.1018  0.0023  0.0763  0.0961  0.0301
   0.1791  0.1040 -0.0032  0.1868 -0.0663
 
 (3 ,0 ,.,.) = 
  -0.1180  0.0819 -0.1447 -0.1249  0.1745
  -0.1220 -0.0927  0.0440  0.0152 -0.0046
  -0.1224  0.1593  0.0883  0.1939 -0.0638
  -0.1402 -0.1120 -0.1136  0.1462 -0.1696
  -0.0071 -0.0384 -0.0625  0.0835 -0.0112
 
 (4 ,0 ,.,.) = 
  -0.1981 -0.1976 -0.1026  0.1756 -0.1226
  -0.01

In [22]:
learning_rate = 1e-1
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [23]:
import torch.optim as optim
# create optimizer
optimizer = optim.SGD(net.parameters(), lr= learning_rate)

# in training loop
optimizer.zero_grad()
output = net(input)
loss = criterion(output,target)
loss.backward()
optimizer.step() ## does the update