In [0]:
%matplotlib inline


Introduction to PyTorch
***********************

Introduction to Torch's tensor library
======================================

All of deep learning is computations on tensors, which are
generalizations of a matrix that can be indexed in more than 2
dimensions. We will see exactly what this means in-depth later. First,
lets look what we can do with tensors.



In [66]:
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7f0a6a90ced0>

## Creating Tensors


### Tensors can be created from Python lists with the torch.Tensor() function.


![alt text](https://www.kdnuggets.com/wp-content/uploads/tensor-examples.jpg)


### A note on terminology:
 * "tensor"  refers to any torch.Tensor object.  
 * Matrices and vectors are special cases of torch.Tensors.
 * 3D tensors,explicitly use the term "3D tensor".


In [67]:
# torch.tensor(data) creates a torch.Tensor object with the given data.
V_data = [1., 2., 3.]
V = torch.tensor(V_data)

V

tensor([1., 2., 3.])

In [68]:
# Creates a matrix
M_data = [[1., 2., 3.], [4., 5., 6]]
M = torch.tensor(M_data)
M

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [69]:
# Create a 3D tensor of size 2x2x2.
T_data = [[[1., 2.], [3., 4.]],
          [[5., 6.], [7., 8.]]]
T = torch.tensor(T_data)
T

tensor([[[1., 2.],
         [3., 4.]],

        [[5., 6.],
         [7., 8.]]])

## Inexing the pytorch tensors

In [70]:
# Index into V and get a scalar (0 dimensional tensor)
V[0]

tensor(1.)

In [71]:
# Get a Python number from it
V[0].item()

1.0

In [72]:
# Index into M and get a vector
M[0]

tensor([1., 2., 3.])

In [0]:
## M[0].item()

In [74]:
# Index into T and get a matrix
T[0]

tensor([[1., 2.],
        [3., 4.]])

### Tensors of other data type is possible
### Defaul is Float

![alt text](https://allenlu2007.files.wordpress.com/2018/11/newimage.png?w=598&h=254)




In [0]:
? torch.tensor

In [76]:
M.type()

'torch.FloatTensor'

### create a tensor with random data and the supplied dimensionality with torch.randn()




In [77]:
x = torch.randn((3, 4, 5))

x

tensor([[[-1.5256, -0.7502, -0.6540, -1.6095, -0.1002],
         [-0.6092, -0.9798, -1.6091, -0.7121,  0.3037],
         [-0.7773, -0.2515, -0.2223,  1.6871,  0.2284],
         [ 0.4676, -0.6970, -1.1608,  0.6995,  0.1991]],

        [[ 0.8657,  0.2444, -0.6629,  0.8073,  1.1017],
         [-0.1759, -2.2456, -1.4465,  0.0612, -0.6177],
         [-0.7981, -0.1316,  1.8793, -0.0721,  0.1578],
         [-0.7735,  0.1991,  0.0457,  0.1530, -0.4757]],

        [[-0.1110,  0.2927, -0.1578, -0.0288,  0.4533],
         [ 1.1422,  0.2486, -1.7754, -0.0255, -1.0233],
         [-0.5962, -1.0055,  0.4285,  1.4761, -1.7869],
         [ 1.6103, -0.7040, -0.1853, -0.9962, -0.8313]]])

# Operations with Tensors


### Addition 
# +



In [78]:
x = torch.tensor([1., 2., 3.])
y = torch.tensor([4., 5., 6.])
z = x + y

z

tensor([5., 7., 9.])

## concatenates


In [79]:
# By default, it concatenates along the first axis (concatenates rows)
x_1 = torch.randn(2, 5)
print(x_1,'\n')
y_1 = torch.randn(3, 5)
print(y_1,'\n')
z_1 = torch.cat([x_1, y_1])

z_1

tensor([[-0.8029,  0.2366,  0.2857,  0.6898, -0.6331],
        [ 0.8795, -0.6842,  0.4533,  0.2912, -0.8317]]) 

tensor([[-0.5525,  0.6355, -0.3968, -0.6571, -1.6428],
        [ 0.9803, -0.0421, -0.8206,  0.3133, -1.1352],
        [ 0.3773, -0.2824, -2.5667, -1.4303,  0.5009]]) 



tensor([[-0.8029,  0.2366,  0.2857,  0.6898, -0.6331],
        [ 0.8795, -0.6842,  0.4533,  0.2912, -0.8317],
        [-0.5525,  0.6355, -0.3968, -0.6571, -1.6428],
        [ 0.9803, -0.0421, -0.8206,  0.3133, -1.1352],
        [ 0.3773, -0.2824, -2.5667, -1.4303,  0.5009]])

In [80]:
# Concatenate columns:
x_2 = torch.randn(2, 3)
print(x_2,'\n')
y_2 = torch.randn(2, 5)
print(y_2,'\n')
# second arg specifies which axis to concat along
z_2 = torch.cat([x_2, y_2], 1)

z_2

tensor([[ 0.5438, -0.4057,  1.1341],
        [-1.1115,  0.3501, -0.7703]]) 

tensor([[-0.1473,  0.6272,  1.0935,  0.0939,  1.2381],
        [-1.3459,  0.5119, -0.6933, -0.1668, -0.9999]]) 



tensor([[ 0.5438, -0.4057,  1.1341, -0.1473,  0.6272,  1.0935,  0.0939,  1.2381],
        [-1.1115,  0.3501, -0.7703, -1.3459,  0.5119, -0.6933, -0.1668, -0.9999]])

## what if tensors are not copatible ??

In [0]:
#torch.cat([x_1, x_2])

## Reshaping Tensors


### We can use  .view() method to reshape a tensor. 



```

This method receives heavy use, because many neural network components expect their inputs to have a certain shape. Often you will need to reshape before passing your data  to the component.

```




In [82]:
x = torch.randn(2, 3, 2)

x

tensor([[[-1.6476,  0.8098],
         [ 0.0554,  1.1340],
         [-0.5326,  0.6592]],

        [[-1.5964, -0.3769],
         [-3.1020, -0.0995],
         [-0.7213,  1.2708]]])

In [83]:
x.view(2, 6)  # Reshape to 2 rows, 12 columns

tensor([[-1.6476,  0.8098,  0.0554,  1.1340, -0.5326,  0.6592],
        [-1.5964, -0.3769, -3.1020, -0.0995, -0.7213,  1.2708]])

In [84]:
# Same as above.  If one of the dimensions is -1, its size can be inferred
x.view(2, -1)

tensor([[-1.6476,  0.8098,  0.0554,  1.1340, -0.5326,  0.6592],
        [-1.5964, -0.3769, -3.1020, -0.0995, -0.7213,  1.2708]])

Computation Graphs and Automatic Differentiation
================================================

* A computation graph is simply a specification of how your data is combined to give you the output.
*  It allows automatic claculation of gradient in  back propagation gradients .


## What is stored in the torch.Tensor objects ??
* data 
* shape
* other.. 


### If ``requires_grad=True``, the Tensor object keeps track of how it was created. Lets see it in action.




In [85]:
# Tensor factory methods have a ``requires_grad`` flag
x = torch.tensor([1., 2., 3], requires_grad=True)

# With requires_grad=True, you can still do all the operations you previously
# could
y = torch.tensor([4., 5., 6], requires_grad=True)
z = x + y
z    #this time z has something extra

tensor([5., 7., 9.], grad_fn=<AddBackward0>)

In [86]:
z.grad_fn

<AddBackward0 at 0x7f0a1ed9ab38>

### Tensors know what created them.

z knows that it wasn't read in from a file, it wasn't the result of a multiplication or exponential or
whatever. And if you keep following z.grad_fn, you will find yourself at
x and y.

### But how does that help us compute a gradient?




In [87]:
# Lets sum up all the entries in z
s = z.sum()
print(s)

s.grad_fn

tensor(21., grad_fn=<SumBackward0>)


<SumBackward0 at 0x7f0a1ed950f0>

So now, what is the derivative of this sum with respect to the first
component of x? In math, we want

\begin{align}\frac{\partial s}{\partial x_0}\end{align}



Well, s knows that it was created as a sum of the tensor z. z knows that it was the sum x + y. So

\begin{align}s = \overbrace{x_0 + y_0}^\text{$z_0$} + \overbrace{x_1 + y_1}^\text{$z_1$} + \overbrace{x_2 + y_2}^\text{$z_2$}\end{align}

And so s contains enough information to determine that the derivative we want is 1!



## opt.zero_grad(), loss.backward(), opt.step()


### zero_grad 
clears old gradients from the last step (otherwise youâ€™d just accumulate the gradients from all loss.backward() calls).

### loss.backward() 
computes the derivative of the loss w.r.t. the parameters (or anything requiring gradients) using backpropagation.

### opt.step()
causes the optimizer to take a step based on the gradients of the parameters.



In [93]:
# calling .backward() on any variable will run backprop, starting from it.
s.backward()

print(x.grad)

None


In [94]:
x = torch.randn(2, 2)
y = torch.randn(2, 2)
# By default, user created Tensors have ``requires_grad=False``
print(x.requires_grad, y.requires_grad)
z = x + y
# So you can't backprop through z
print(z.grad_fn)

False False
None


In [95]:
# ``.requires_grad_( ... )`` changes an existing Tensor's ``requires_grad``
# flag in-place. The input flag defaults to ``True`` if not given.
x = x.requires_grad_()
y = y.requires_grad_()
# z contains enough information to compute gradients, as we saw above
z = x + y
print(z.grad_fn)

<AddBackward0 object at 0x7f0a1ed95fd0>


In [96]:
# If any input to an operation has ``requires_grad=True``, so will the output
print(z.requires_grad)

# Now z has the computation history that relates itself to x and y
# Can we just take its values, and **detach** it from its history?
new_z = z.detach()

# ... does new_z have information to backprop to x and y?
# NO!
print(new_z.grad_fn)
# And how could it? ``z.detach()`` returns a tensor that shares the same storage
# as ``z``, but with the computation history forgotten. It doesn't know anything
# about how it was computed.
# In essence, we have broken the Tensor away from its past history

True
None


You can also stop autograd from tracking history on Tensors
with ``.requires_grad``=True by wrapping the code block in
``with torch.no_grad():``



In [97]:
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
	print((x ** 2).requires_grad)

True
True
False
