In [1]:
'''
This notebook explores pytorch tensors, the autograd function, and NumPy interfaces0
from their instantiation to their properties.
'''
import torch
from torch.autograd import Variable
import numpy as np

# Create a tensor
x = torch.randn(3, 3, requires_grad = True) # Generate a random 3 x 3 tensor
print("Random 2-D tensor")
print(x)

y = (x + 7) * 5 # Add 7 to each value and multiply that total by 5
print("\nRandom 2-D shifted by 7, multiplied by 5")
print(y)

Random 2-D tensor
tensor([[-0.2878,  1.2184,  1.5993],
        [-0.2567,  0.2560, -0.8569],
        [-1.4788,  1.0990, -0.7223]], requires_grad=True)

Random 2-D shifted by 7, multiplied by 5
tensor([[33.5611, 41.0922, 42.9966],
        [33.7165, 36.2799, 30.7157],
        [27.6059, 40.4950, 31.3885]], grad_fn=<MulBackward0>)


In [2]:
'''
How to get the gradient function:
y = (x + 7) * 5 <-- Original Function
y = 5x + 35
dy/dx = 5       <-- Gradient Function
'''
# Create a new 3 x 3 tensor with specified data type as float with values 0-9
v = torch.tensor([[1, 2, 3],
                  [4, 5, 6],
                  [7, 8, 9]], dtype = torch.float)

y.grad_fn(v) # Call the gradient function (grad_fn) attatched to the y tensor on the data of v

(tensor([[ 5., 10., 15.],
         [20., 25., 30.],
         [35., 40., 45.]]), None)

In [3]:
z = y * 3
out = z.mean() # Single reduction of all the z data (mean avg.)

print("Multiplied by 3")
print(z)
print("\nMean of values")
print(out)

Multiplied by 3
tensor([[100.6832, 123.2766, 128.9898],
        [101.1495, 108.8398,  92.1470],
        [ 82.8177, 121.4851,  94.1654]], grad_fn=<MulBackward0>)

Mean of values
tensor(105.9505, grad_fn=<MeanBackward0>)


In [6]:
# You can recursively trace through the previously used gradient functions
grad = out.grad_fn
indent = 1
while True:
    print(" " * indent, "-->", grad)
    if not grad.next_functions:
        break
    grad = grad.next_functions[0][0]
    indent += 1

  --> <MeanBackward0 object at 0x00000197601AAEC8>
   --> <MulBackward0 object at 0x00000197601AAFC8>
    --> <MulBackward0 object at 0x0000019760163748>
     --> <AddBackward0 object at 0x0000019760163388>
      --> <AccumulateGrad object at 0x0000019760163748>


In [7]:
print(out.grad_fn) # Divide by 9

val1 = Variable(torch.tensor([10.])) # Create a zero dimensional tensor with only the value 10.
out.grad_fn(val1) # Broadcasting: val1 is scaled to a 3x3 array

<MeanBackward0 object at 0x000001976014AD08>


tensor([[1.1111, 1.1111, 1.1111],
        [1.1111, 1.1111, 1.1111],
        [1.1111, 1.1111, 1.1111]])

In [8]:
val2 = torch.tensor([[3, 6, 9],
                     [1, 2, 3],
                     [2, 4, 8]], dtype = torch.float)

out.grad_fn(val2)

tensor([[0.3333, 0.6667, 1.0000],
        [0.1111, 0.2222, 0.3333],
        [0.2222, 0.4444, 0.8889]])

In [9]:
val3 = torch.tensor([1.5, 3.33, 9.1]) # 3x1 is broadcasted
out.grad_fn(val3)

tensor([[0.1667, 0.3700, 1.0111],
        [0.1667, 0.3700, 1.0111],
        [0.1667, 0.3700, 1.0111]])

In [10]:
# Broadcasting will not work with this since pytorch doesn't know how to scale a 2x2 into a 3x3
try:
    val4 = torch.tensor([[1., 2.],
                         [3., 4.]])
    out.grad_fn(val4)
except Exception as err:
    print(err)

The expanded size of the tensor (3) must match the existing size (2) at non-singleton dimension 1.  Target sizes: [3, 3].  Tensor sizes: [2, 2]


In [11]:
print("Original tensor\n", val2, '\n')

# Apply out's gradient function then remultiply by the number of elements
size = np.prod(val2.shape)
print("MeanBackward then rescaled manually\n", out.grad_fn(val2) * size)

Original tensor
 tensor([[3., 6., 9.],
        [1., 2., 3.],
        [2., 4., 8.]]) 

MeanBackward then rescaled manually
 tensor([[3., 6., 9.],
        [1., 2., 3.],
        [2., 4., 8.]])
