In [1]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [9]:
torch.manual_seed(1)

<torch._C.Generator at 0x116e10ad0>

In [2]:
# Creating Tensors
# ~~~~~~~~~~~~~~~~
#
# Tensors can be created from Python lists with the torch.tensor()
# function.
#

# torch.tensor(data) creates a torch.Tensor object with the given data.
V_data = [1., 2., 3.]
V = torch.tensor(V_data)
print(V)

tensor([1., 2., 3.])


In [3]:
# Creates a matrix
M_data = [[1., 2., 3.], [4., 5., 6]]
M = torch.tensor(M_data)
print(M)

tensor([[1., 2., 3.],
        [4., 5., 6.]])


In [4]:
# Create a 3D tensor of size 2x2x2.
T_data = [[[1., 2.], [3., 4.]],
          [[5., 6.], [7., 8.]]]
T = torch.tensor(T_data)
print(T)

tensor([[[1., 2.],
         [3., 4.]],

        [[5., 6.],
         [7., 8.]]])


In [5]:
# Index into V and get a scalar (0 dimensional tensor)
print(V[0])
# Get a Python number from it
print(V[0].item())

tensor(1.)
1.0


In [6]:
# Index into M and get a vector
print(M[0])

tensor([1., 2., 3.])


In [7]:
# Index into T and get a matrix
print(T[0])

tensor([[1., 2.],
        [3., 4.]])


In [8]:
# You can also create tensors of other data types. To create a tensor of integer types, try
# torch.tensor([[1, 2], [3, 4]]) (where all elements in the list are integers).
# You can also specify a data type by passing in ``dtype=torch.data_type``.
# Check the documentation for more data types, but
# Float and Long will be the most common.

# You can create a tensor with random data and the supplied dimensionality
# with torch.randn()
#

x = torch.randn((3, 4, 5))
print(x)

tensor([[[ 0.1852,  0.3221, -1.4649,  0.1153,  1.1154],
         [-2.3996, -0.2961, -0.4604,  0.1779,  0.7929],
         [-0.1433,  0.6640,  1.2030,  1.2682,  0.6134],
         [-0.8820,  1.0452,  0.6023,  0.1903,  0.1112]],

        [[ 0.7686,  0.1050,  1.2035, -0.4381, -1.2662],
         [ 1.5489, -0.0979,  1.8058,  0.8630, -0.9722],
         [ 1.4394, -0.5560, -0.2039,  0.1190,  0.2423],
         [ 0.5120,  0.7333,  1.4497,  0.4509,  0.9503]],

        [[-0.1641, -0.4650, -0.3298, -1.0040, -0.9307],
         [-0.7649,  0.6117, -0.3254,  1.9492,  0.7256],
         [-0.2675,  0.5285,  0.2760,  0.8485, -0.0779],
         [-0.1363, -0.1086, -1.1616,  0.5524,  0.1481]]])


In [9]:
# Operations with Tensors
# ~~~~~~~~~~~~~~~~~~~~~~~
#
# You can operate on tensors in the ways you would expect.

x = torch.tensor([1., 2., 3.])
y = torch.tensor([4., 5., 6.])
z = x + y
print(z)

tensor([5., 7., 9.])


In [10]:
# See `the documentation <https://pytorch.org/docs/torch.html>`__ for a
# complete list of the massive number of operations available to you. They
# expand beyond just mathematical operations.
#
# One helpful operation that we will make use of later is concatenation.


# By default, it concatenates along the first axis (concatenates rows)
x_1 = torch.randn(2, 5)
y_1 = torch.randn(3, 5)
z_1 = torch.cat([x_1, y_1])
print(z_1)

tensor([[-0.5800, -0.2400,  1.1887,  0.5137, -0.2237],
        [-2.4069,  0.3051, -1.7837, -1.6008,  0.2391],
        [ 0.4831, -1.7941,  1.0521,  0.4921,  0.3959],
        [-0.8886, -0.1213, -0.3444,  1.1246, -0.8451],
        [ 1.6779,  0.2403, -0.8946,  0.9642,  0.0042]])


In [11]:
# Concatenate columns:
x_2 = torch.randn(2, 3)
y_2 = torch.randn(2, 5)

In [12]:
# second arg specifies which axis to concat along
z_2 = torch.cat([x_2, y_2], 1)
print(z_2)

# If your tensors are not compatible, torch will complain.  Uncomment to see the error
# torch.cat([x_1, x_2])

tensor([[ 1.0060,  0.7094,  0.7479, -1.0113,  1.1192,  0.3528, -0.3043, -0.3187],
        [-1.7720,  0.0824, -1.2363, -0.8834,  0.7879,  0.5873, -1.9456, -0.4122]])


In [13]:
# Reshaping Tensors
# ~~~~~~~~~~~~~~~~~
#
# Use the .view() method to reshape a tensor. This method receives heavy
# use, because many neural network components expect their inputs to have
# a certain shape. Often you will need to reshape before passing your data
# to the component.
#

x = torch.randn(2, 3, 4)
print(x)
print(x.view(2, 12))  # Reshape to 2 rows, 12 columns

tensor([[[ 0.0972,  0.5737,  1.1018, -0.3262],
         [ 1.4643,  0.9163,  0.4538, -1.5773],
         [ 0.1156, -0.0415,  0.2913,  0.2278]],

        [[-0.3871, -0.4712, -0.2860, -0.2035],
         [-0.9943,  1.7137,  0.9367, -1.7576],
         [-0.3883, -0.0700, -0.2071,  1.2493]]])
tensor([[ 0.0972,  0.5737,  1.1018, -0.3262,  1.4643,  0.9163,  0.4538, -1.5773,
          0.1156, -0.0415,  0.2913,  0.2278],
        [-0.3871, -0.4712, -0.2860, -0.2035, -0.9943,  1.7137,  0.9367, -1.7576,
         -0.3883, -0.0700, -0.2071,  1.2493]])


In [14]:
# Same as above.  If one of the dimensions is -1, its size can be inferred
print(x.view(2, -1))

tensor([[ 0.0972,  0.5737,  1.1018, -0.3262,  1.4643,  0.9163,  0.4538, -1.5773,
          0.1156, -0.0415,  0.2913,  0.2278],
        [-0.3871, -0.4712, -0.2860, -0.2035, -0.9943,  1.7137,  0.9367, -1.7576,
         -0.3883, -0.0700, -0.2071,  1.2493]])


In [15]:
# Computation Graphs and Automatic Differentiation
# ================================================
#
# The concept of a computation graph is essential to efficient deep
# learning programming, because it allows you to not have to write the
# back propagation gradients yourself. A computation graph is simply a
# specification of how your data is combined to give you the output. Since
# the graph totally specifies what parameters were involved with which
# operations, it contains enough information to compute derivatives. This
# probably sounds vague, so let's see what is going on using the
# fundamental flag ``requires_grad``.
#
# First, think from a programmers perspective. What is stored in the
# torch.Tensor objects we were creating above? Obviously the data and the
# shape, and maybe a few other things. But when we added two tensors
# together, we got an output tensor. All this output tensor knows is its
# data and shape. It has no idea that it was the sum of two other tensors
# (it could have been read in from a file, it could be the result of some
# other operation, etc.)
#
# If ``requires_grad=True``, the Tensor object keeps track of how it was
# created. Lets see it in action.
#

# Tensor factory methods have a ``requires_grad`` flag
x = torch.tensor([1., 2., 3], requires_grad=True)

In [16]:
# With requires_grad=True, you can still do all the operations you previously
# could
y = torch.tensor([4., 5., 6], requires_grad=True)
z = x + y
print(z)

tensor([5., 7., 9.], grad_fn=<AddBackward0>)


In [17]:
# BUT z knows something extra.
print(z.grad_fn)

<AddBackward0 object at 0x11aa14d10>


In [18]:
# So Tensors know what created them. z knows that it wasn't read in from
# a file, it wasn't the result of a multiplication or exponential or
# whatever. And if you keep following z.grad_fn, you will find yourself at
# x and y.
#
# But how does that help us compute a gradient?
#

# Lets sum up all the entries in z
s = z.sum()
print(s)
print(s.grad_fn)

tensor(21., grad_fn=<SumBackward0>)
<SumBackward0 object at 0x11aa14ed0>


In [19]:
# So now, what is the derivative of this sum with respect to the first
# component of x? In math, we want
#
# .. math::
#
#    \frac{\partial s}{\partial x_0}
#
#
#
# Well, s knows that it was created as a sum of the tensor z. z knows
# that it was the sum x + y. So
#
# .. math::  s = \overbrace{x_0 + y_0}^\text{$z_0$} + \overbrace{x_1 + y_1}^\text{$z_1$} + \overbrace{x_2 + y_2}^\text{$z_2$}
#
# And so s contains enough information to determine that the derivative
# we want is 1!
#
# Of course this glosses over the challenge of how to actually compute
# that derivative. The point here is that s is carrying along enough
# information that it is possible to compute it. In reality, the
# developers of Pytorch program the sum() and + operations to know how to
# compute their gradients, and run the back propagation algorithm. An
# in-depth discussion of that algorithm is beyond the scope of this
# tutorial.
#


######################################################################
# Lets have Pytorch compute the gradient, and see that we were right:
# (note if you run this block multiple times, the gradient will increment.
# That is because Pytorch *accumulates* the gradient into the .grad
# property, since for many models this is very convenient.)
#

# calling .backward() on any variable will run backprop, starting from it.
s.backward()
print(x.grad)

tensor([1., 1., 1.])


In [20]:
# Understanding what is going on in the block below is crucial for being a
# successful programmer in deep learning.
#

x = torch.randn(2, 2)
y = torch.randn(2, 2)
# By default, user created Tensors have ``requires_grad=False``
print(x.requires_grad, y.requires_grad)

False False


In [21]:
z = x + y
# So you can't backprop through z
print(z.grad_fn)

None


In [22]:
# ``.requires_grad_( ... )`` changes an existing Tensor's ``requires_grad``
# flag in-place. The input flag defaults to ``True`` if not given.
x = x.requires_grad_()
y = y.requires_grad_()
# z contains enough information to compute gradients, as we saw above
z = x + y
print(z.grad_fn)

<AddBackward0 object at 0x11aa24690>


In [23]:
# If any input to an operation has ``requires_grad=True``, so will the output
print(z.requires_grad)

True


In [24]:
# Now z has the computation history that relates itself to x and y
# Can we just take its values, and **detach** it from its history?
new_z = z.detach()

In [25]:
# ... does new_z have information to backprop to x and y?
# NO!

print(new_z.grad_fn)

# And how could it? ``z.detach()`` returns a tensor that shares the same storage
# as ``z``, but with the computation history forgotten. It doesn't know anything
# about how it was computed.
# In essence, we have broken the Tensor away from its past history

None


In [26]:
# You can also stop autograd from tracking history on Tensors
# with ``.requires_grad``=True by wrapping the code block in
# ``with torch.no_grad():``
print(x.requires_grad)
print((x ** 2).requires_grad)

True
True


In [27]:
with torch.no_grad():
	print((x ** 2).requires_grad)

False
