In [42]:
from __future__ import print_function
import torch

x = torch.tensor([5.5, 3])
print(x)

x = x.new_ones(5, 3, dtype=torch.double)  # new_* methods take in sizes
print(x)

x = torch.randn_like(x, dtype=torch.float)  # override dtype!
print(x)

print(x.size())

y = torch.rand(5, 3)
print(x + y)

print(torch.add(x, y))

result = torch.empty(5, 3)
torch.add(x, y, out=result)
print(result)

# adds x to y
y.add_(x)
print(y)

## Any operation that mutates a tensor in-place is post-fixed with an _. For example: x.copy_(y), x.t_(), will change x.

print(x[:, 1])

# Resizing:
# If you want to resize/reshape tensor, you can use torch.view:

x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)  # the size -1 is inferred from other dimensions
print(x.size(), y.size(), z.size())

# If you have a one element tensor, use .item() to get the value as a Python number

x = torch.randn(1)
print(x)
print(x.item())

# Converting a Torch Tensor to a NumPy array and vice versa is a breeze.

# The Torch Tensor and NumPy array will share their underlying memory locations (if the Torch Tensor is on CPU), and changing one will change the other

# Converting a Torch Tensor to a NumPy Array

a = torch.ones(5)
print(a)
b = a.numpy()
print(b)

a.add_(1)
print(a)
print(b)

# Converting NumPy Array to Torch Tensor
import numpy as np

a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

if torch.cuda.is_available():
    device = torch.device("cuda")  # a CUDA device object
    y = torch.ones_like(x, device=device)  # directly create a tensor on GPU
    x = x.to(device)  # or just use strings ``.to("cuda")``
    z = x + y
    print(z)
    print(z.to("cpu", torch.double))
else:
    print("cuda is not available")

# Tensor
# torch.Tensor is the central class of the package. If you set its attribute .requires_grad as True,
# it starts to track all operations on it. When you finish your computation you can call .backward()
# and have all the gradients computed automatically. The gradient for this tensor will be accumulated into
# .grad attribute.

# To stop a tensor from tracking history, you can call .detach() to detach it from the computation history, and to prevent future computation from being tracked.

x = torch.ones(2, 2, requires_grad=True)
print(x)

tensor([5.5000, 3.0000])
tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)
tensor([[-2.0078,  2.0668, -0.2910],
        [-0.1582,  0.3994, -0.5833],
        [ 0.4121, -0.3071,  0.9658],
        [ 0.1539, -0.2395,  0.9813],
        [-0.0451, -1.8515, -0.1592]])
torch.Size([5, 3])
tensor([[-1.2633,  3.0122,  0.4877],
        [ 0.0959,  0.9970,  0.1438],
        [ 1.1954,  0.5600,  1.2855],
        [ 0.1863, -0.0060,  1.1196],
        [ 0.9342, -1.0621,  0.6784]])
tensor([[-1.2633,  3.0122,  0.4877],
        [ 0.0959,  0.9970,  0.1438],
        [ 1.1954,  0.5600,  1.2855],
        [ 0.1863, -0.0060,  1.1196],
        [ 0.9342, -1.0621,  0.6784]])
tensor([[-1.2633,  3.0122,  0.4877],
        [ 0.0959,  0.9970,  0.1438],
        [ 1.1954,  0.5600,  1.2855],
        [ 0.1863, -0.0060,  1.1196],
        [ 0.9342, -1.0621,  0.6784]])
tensor([[-1.2633,  3.0122,  0.4877],
        [ 0.0959,  0.9970,  0.1438],
      

In [43]:
y = x + 2
print(y)



tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


In [44]:
print(y.grad_fn)



<AddBackward0 object at 0x12821e650>


In [45]:
z = y * y * 3
out = z.mean()

print(z, out)



tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)


In [46]:
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x12821ed50>


In [47]:
out.backward()

In [48]:
print(x.grad)



tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [49]:
x = torch.randn(3, requires_grad=True)

y = x * 2
while y.data.norm() < 1000:
    y = y * 2

print(y)



tensor([ -286.4637, -1059.5522, -1275.2039], grad_fn=<MulBackward0>)


In [50]:
v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(v)

print(x.grad)


tensor([1.0240e+02, 1.0240e+03, 1.0240e-01])


In [51]:
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)



True
True
False


In [52]:
print(x.requires_grad)
y = x.detach()
print(y.requires_grad)
print(x.eq(y).all())



True
False
tensor(True)


In [53]:
### Neural networks
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 6 * 6, 120)  # 6*6 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [54]:
params = list(net.parameters())
print(len(params))
print(params[0].size())  # conv1's .weight


10
torch.Size([6, 1, 3, 3])


In [55]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)



tensor([[ 0.0790,  0.1140, -0.0736, -0.1083,  0.0638,  0.0681, -0.0500, -0.0523,
         -0.1034, -0.0570]], grad_fn=<AddmmBackward>)


In [56]:
net.zero_grad()
out.backward(torch.randn(1, 10))



In [57]:
output = net(input)
target = torch.randn(10)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(0.9440, grad_fn=<MseLossBackward>)


In [58]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU


<MseLossBackward object at 0x1286dedd0>
<AddmmBackward object at 0x1286de1d0>
<AccumulateGrad object at 0x1286dedd0>


In [59]:
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([-0.0138,  0.0064,  0.0131, -0.0263, -0.0071,  0.0132])


In [61]:
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)



In [62]:
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in your training loop:
optimizer.zero_grad()   # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()    # Does the update

