In [1]:
from __future__ import print_function
import torch

In [2]:
x = torch.Tensor(5, 3)  # construct a 5x3 matrix, uninitialized

In [3]:
x


 0.0000e+00  0.0000e+00  3.3418e-37
 0.0000e+00  3.3658e-37  0.0000e+00
 6.1938e-38  0.0000e+00  6.1938e-38
 0.0000e+00  2.7339e+20  1.8939e+34
 1.8966e+28  7.3977e+31  1.7860e+25
[torch.FloatTensor of size 5x3]

In [4]:
x = torch.rand(5, 3)  # construct a randomly initialized matrix

In [5]:
x


 0.1207  0.2637  0.5643
 0.4842  0.7861  0.8789
 0.6946  0.8326  0.1511
 0.5046  0.3508  0.9930
 0.9675  0.1005  0.5702
[torch.FloatTensor of size 5x3]

In [6]:
x.size()

torch.Size([5, 3])

In [7]:
y = torch.rand(5, 3)

In [8]:
y


 0.4007  0.4604  0.5863
 0.6113  0.9807  0.4373
 0.5460  0.4784  0.7155
 0.0684  0.5380  0.0970
 0.2770  0.0806  0.3854
[torch.FloatTensor of size 5x3]

In [9]:
# addition: syntax 1
x + y


 0.5214  0.7242  1.1506
 1.0955  1.7668  1.3162
 1.2406  1.3110  0.8665
 0.5729  0.8888  1.0899
 1.2445  0.1811  0.9556
[torch.FloatTensor of size 5x3]

In [10]:
# addition: syntax 2
torch.add(x, y)


 0.5214  0.7242  1.1506
 1.0955  1.7668  1.3162
 1.2406  1.3110  0.8665
 0.5729  0.8888  1.0899
 1.2445  0.1811  0.9556
[torch.FloatTensor of size 5x3]

In [11]:
# addition: giving an output tensor
result = torch.Tensor(5, 3)
torch.add(x, y, out=result)


 0.5214  0.7242  1.1506
 1.0955  1.7668  1.3162
 1.2406  1.3110  0.8665
 0.5729  0.8888  1.0899
 1.2445  0.1811  0.9556
[torch.FloatTensor of size 5x3]

In [12]:
result


 0.5214  0.7242  1.1506
 1.0955  1.7668  1.3162
 1.2406  1.3110  0.8665
 0.5729  0.8888  1.0899
 1.2445  0.1811  0.9556
[torch.FloatTensor of size 5x3]

In [13]:
# addition: in-place
y.add_(x) # adds x to y


 0.5214  0.7242  1.1506
 1.0955  1.7668  1.3162
 1.2406  1.3110  0.8665
 0.5729  0.8888  1.0899
 1.2445  0.1811  0.9556
[torch.FloatTensor of size 5x3]

In [14]:
y


 0.5214  0.7242  1.1506
 1.0955  1.7668  1.3162
 1.2406  1.3110  0.8665
 0.5729  0.8888  1.0899
 1.2445  0.1811  0.9556
[torch.FloatTensor of size 5x3]

In [15]:
# standard numpy-like indexing with all bells and whistles
x[:,1]


 0.2637
 0.7861
 0.8326
 0.3508
 0.1005
[torch.FloatTensor of size 5]

In [16]:
a = torch.ones(5)
a


 1
 1
 1
 1
 1
[torch.FloatTensor of size 5]

In [17]:
b = a.numpy()
b

array([ 1.,  1.,  1.,  1.,  1.], dtype=float32)

In [18]:
a.add_(1)
print(a)
print(b) # see how the numpy array changed in value


 2
 2
 2
 2
 2
[torch.FloatTensor of size 5]

[ 2.  2.  2.  2.  2.]


In [19]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b) # see how changing the np array changed the torch Tensor automatically

[ 2.  2.  2.  2.  2.]

 2
 2
 2
 2
 2
[torch.DoubleTensor of size 5]



In [20]:
torch.cuda.is_available()

True

In [22]:
# let us run this cell only if CUDA is available
if torch.cuda.is_available():
    x = x.cuda()
    y = y.cuda()
    x + y
    print(x + y)


 0.6421  0.9879  1.7149
 1.5796  2.5529  2.1951
 1.9352  2.1436  1.0176
 1.0775  1.2397  2.0829
 2.2120  0.2816  1.5259
[torch.cuda.FloatTensor of size 5x3 (GPU 0)]



In [23]:
from torch.autograd import Variable

In [24]:
x = Variable(torch.ones(2, 2), requires_grad = True)
x

Variable containing:
 1  1
 1  1
[torch.FloatTensor of size 2x2]

In [25]:
y = x + 2
y

Variable containing:
 3  3
 3  3
[torch.FloatTensor of size 2x2]

In [26]:
x.creator

In [27]:
y.creator

<torch.autograd._functions.basic_ops.AddConstant at 0x2ddb370>

In [28]:
z = y * y * 3
z

Variable containing:
 27  27
 27  27
[torch.FloatTensor of size 2x2]

In [29]:
out = z.mean()
out

Variable containing:
 27
[torch.FloatTensor of size 1]

In [30]:
# let's backprop now
out.backward()

# out.backward() is equivalent to doing out.backward(torch.Tensor([1.0]))

In [31]:
# print gradients d(out)/dx
x.grad

Variable containing:
 4.5000  4.5000
 4.5000  4.5000
[torch.FloatTensor of size 2x2]

In [38]:
x = torch.randn(3)
x = Variable(x, requires_grad = True)

In [39]:
y = x * 2
while y.data.norm() < 1000:
    y = y * 2
    print(y.data.norm())

3.77532780548
7.55065561097
15.1013112219
30.2026224439
60.4052448878
120.810489776
241.620979551
483.241959102
966.483918204
1932.96783641


In [40]:
y

Variable containing:
  611.1737
   39.9906
 1833.3663
[torch.FloatTensor of size 3]

In [41]:
y.data.norm()

1932.9678364082624

In [42]:
gradients = torch.FloatTensor([0.1, 1.0, 0.0001])
y.backward(gradients)

In [43]:
x.grad

Variable containing:
  204.8000
 2048.0000
    0.2048
[torch.FloatTensor of size 3]

In [44]:
import torch.optim as optim

In [45]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5) # 1 input image channel, 6 output channels, 5x5 square convolution kernel
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1   = nn.Linear(16*5*5, 120) # an affine operation: y = Wx + b
        self.fc2   = nn.Linear(120, 84)
        self.fc3   = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv2(x)), 2) # If the size is a square you can only specify a single number
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:] # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

net = Net()
net

Net (
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear (400 -> 120)
  (fc2): Linear (120 -> 84)
  (fc3): Linear (84 -> 10)
)

In [46]:
params = list(net.parameters())
print(len(params))
print(params[0].size()) # conv1's .weight

10
torch.Size([6, 1, 5, 5])


In [47]:
print(params[0])

Parameter containing:
(0 ,0 ,.,.) = 
  0.0641 -0.0030  0.0368 -0.0858  0.1985
  0.1514 -0.0624  0.0266  0.0019  0.1382
  0.1897 -0.0267 -0.0002  0.0691 -0.0069
 -0.0959 -0.0108  0.0769  0.1721  0.1047
  0.1119 -0.1052  0.1763 -0.1112  0.1803

(1 ,0 ,.,.) = 
  0.0032 -0.1171 -0.1687 -0.1049 -0.1757
 -0.1715  0.0929  0.1186  0.0709 -0.0158
  0.1471 -0.0748  0.0843  0.1835  0.0935
 -0.0955 -0.0988 -0.1913  0.0143  0.1816
  0.0323 -0.0259  0.1772  0.1642 -0.0305

(2 ,0 ,.,.) = 
 -0.0718  0.1691 -0.1307 -0.1722  0.0305
 -0.0942 -0.0045 -0.1891 -0.0869  0.1893
  0.1228 -0.0970 -0.0613 -0.1811  0.1153
  0.1835 -0.0075  0.1810 -0.1538 -0.1004
 -0.0931  0.1416 -0.1271 -0.1791  0.0464

(3 ,0 ,.,.) = 
 -0.0781  0.0110 -0.0412 -0.1653  0.1553
  0.0666 -0.0733 -0.1901  0.1989  0.1906
  0.0422  0.1438 -0.1773  0.1229 -0.0179
 -0.0402 -0.1672 -0.1811  0.0343  0.0005
  0.1761  0.0653 -0.1588 -0.0897 -0.0556

(4 ,0 ,.,.) = 
 -0.0080 -0.0226 -0.1861 -0.0688  0.0980
  0.1660 -0.1172  0.0175  0.1395  0.04

In [48]:
input = Variable(torch.randn(1, 1, 32, 32))
out = net(input)
out

Variable containing:
-0.0753  0.1570 -0.0989 -0.0335 -0.0813  0.0043 -0.1256 -0.0522  0.0074 -0.0028
[torch.FloatTensor of size 1x10]

In [49]:
net.zero_grad() # zeroes the gradient buffers of all parameters
out.backward(torch.randn(1, 10)) # backprops with random gradients

In [50]:
input.grad

Variable containing:
(0 ,0 ,.,.) = 
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
     ...       ⋱       ...    
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
   0   0   0  ...    0   0   0
[torch.FloatTensor of size 1x1x32x32]

In [51]:
output = net(input)
target = Variable(torch.range(1, 10))  # a dummy target, for example
criterion = nn.MSELoss()
loss = criterion(output, target)
loss

Variable containing:
 38.8728
[torch.FloatTensor of size 1]

In [52]:
loss.creator

<torch.nn._functions.thnn.auto.MSELoss at 0x3e5c1e0>

In [53]:
# For illustration, let us follow a few steps backward
print(loss.creator) # MSELoss
print(loss.creator.previous_functions[0][0]) # Linear
print(loss.creator.previous_functions[0][0].previous_functions[0][0]) # ReLU

<torch.nn._functions.thnn.auto.MSELoss object at 0x3e5c1e0>
<torch.nn._functions.linear.Linear object at 0x3e5c118>
<torch.nn._functions.thnn.auto.Threshold object at 0x3e5c050>


In [54]:
# now we shall call loss.backward(), and have a look at conv1's bias gradients before and after the backward.
net.zero_grad() # zeroes the gradient buffers of all parameters
print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)
loss.backward()
print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
Variable containing:
 0
 0
 0
 0
 0
 0
[torch.FloatTensor of size 6]

conv1.bias.grad after backward
Variable containing:
1.00000e-02 *
 -8.7845
 -5.4330
 -8.5184
 -7.1208
  4.6122
  6.7173
[torch.FloatTensor of size 6]

