In [6]:
import torch
from torch.autograd import Variable
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
#Part 1: Start of PyTorch
#Initialize a matrix
x1 = torch.Tensor(5, 3)
print(x1)

x2 = torch.rand(5, 3)
print(x2)

#Different ways of addition
y1 = x1 + x2
y2 = torch.add(x1, x2)

#Resizing
x = torch.rand(5, 3)
y = x.view(15)
z = x.view(-1, 3)

#Convert a torch tensor to numpy array
a = torch.ones(5)
b = a.numpy()
print(b)

#Convert a numpy array to a torch tensor
a = np.ones(5)
b = torch.from_numpy(a)


 0.0000e+00  2.0000e+00 -2.7242e-22
 3.6902e+19  5.6052e-45  0.0000e+00
 0.0000e+00  0.0000e+00  2.1450e-28
 1.4013e-45  1.1445e-28  1.4013e-45
 5.6052e-45  0.0000e+00  0.0000e+00
[torch.FloatTensor of size 5x3]


 0.8738  0.9731  0.1336
 0.2734  0.9482  0.6494
 0.8611  0.0948  0.1456
 0.8436  0.9301  0.1027
 0.9056  0.0169  0.5691
[torch.FloatTensor of size 5x3]

[ 1.  1.  1.  1.  1.]


In [3]:
#Part 2: Autograd: automatic differentiation
x = Variable(torch.ones(2, 2), requires_grad=True)
print(x)
y = x*x
print('The gradient of y on x: ', y.grad_fn)

z = y * y * 3
out = z.mean()

print(z, out)

x = torch.randn(3)
x = Variable(x, requires_grad=True)

y = x * 2
while y.data.norm() < 1000:
    y = y * 2

print(y)

gradients = torch.FloatTensor([0.1, 1.0, 0.0001])
y.backward(gradients)

print(x.grad)

Variable containing:
 1  1
 1  1
[torch.FloatTensor of size 2x2]

The gradient of y on x:  <MulBackward1 object at 0x112eaa1d0>
Variable containing:
 3  3
 3  3
[torch.FloatTensor of size 2x2]
 Variable containing:
 3
[torch.FloatTensor of size 1]

Variable containing:
 -959.0991
-1477.7640
  452.4305
[torch.FloatTensor of size 3]

Variable containing:
  102.4000
 1024.0000
    0.1024
[torch.FloatTensor of size 3]



In [8]:
#Part 3: Neural Networks
#Neural Networks can be constructed using the torch.nn package
#nn.Module contains layers / forward(input) -> returns output
#Gradient Descent: weight = weight - learning_rate * gradient

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # and affine operation: y = Wx + b
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        #Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        #If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
        
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [10]:
net = Net()
print(net)

Net(
  (conv1): Conv2d (1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d (6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120)
  (fc2): Linear(in_features=120, out_features=84)
  (fc3): Linear(in_features=84, out_features=10)
)


In [11]:
params = list(net.parameters())
print(len(params))
print(params[0].size())

10
torch.Size([6, 1, 5, 5])


In [15]:
input = Variable(torch.randn(1, 1, 32, 32))
out = net(input)
print(out)

Variable containing:
-0.0759 -0.0791 -0.0303 -0.0103  0.1322 -0.0821  0.0713  0.1030 -0.0002  0.1637
[torch.FloatTensor of size 1x10]



In [14]:
net.zero_grad()
out.backward(torch.randn(1, 10))

RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed. Specify retain_graph=True when calling backward the first time.

In [None]:
#Parameters have been learned 
torch.Tensor
autograd.Variable
nn.Module
nn.Parameter
autograd.Function

In [None]:
#Loss and Backpropagation
output = net(input)
target = Variable(torch.arange(1, 11))
#Define the loss function
criterion = nn.MSELoss()

#Define the loss function
loss = criterion(output, target)

net.zero_grad()     # zeroes the gradient buffers of all parameters

#Backpropagation
#You only need to use this only code for the back propagation
loss.backward()

#Update the weights
#Define the learning rate
#Manual Gradient descent, but we also can use the optimizer
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate) #This is the gradient descent algorithm

#Using optimizer 
optimizer = optim.Adam(net.parameters(), lr=0.01)

#In your training loop
criterion = nn.MSELoss()
loss = criterion(output, target)
net.zero_grad()
loss.backward()
optimizer.step() #update the gradient