In [1]:
import torch
from torch.autograd import Variable

In [2]:
x = Variable(torch.Tensor([1,2,3,4]), requires_grad=True) # requires_grad: need autograd

In [3]:
print(x.data, x.shape, x.grad_fn, x.grad) 
# Variable.grad_fn: the creator function of the var; x comes from Tensor, so grad_fn is None

tensor([1., 2., 3., 4.]) torch.Size([4]) None None


In [4]:
y = Variable(torch.randn(2,4), True) # Variable is Tensor that from which computation graph can be created
# torch.randn: random number from normal dist;
# torch.rand: random number from uniform dist;
# other methods: torch.ones, torch.zeros, torch.eye

In [5]:
print(y.data, y.shape, y.grad_fn, y.grad) 

tensor([[-0.2925,  0.0732, -0.9787, -1.2007],
        [-0.5189,  0.6909,  2.7205,  1.0611]]) torch.Size([2, 4]) None None


In [6]:
print(y.view(-1,1)) # Tensor.view: flatten the tensor and make new one by the dim args given (like numpy)
print(y.view(1,8))
print(y.view(4,-1))

tensor([[-0.2925],
        [ 0.0732],
        [-0.9787],
        [-1.2007],
        [-0.5189],
        [ 0.6909],
        [ 2.7205],
        [ 1.0611]], grad_fn=<ViewBackward0>)
tensor([[-0.2925,  0.0732, -0.9787, -1.2007, -0.5189,  0.6909,  2.7205,  1.0611]],
       grad_fn=<ViewBackward0>)
tensor([[-0.2925,  0.0732],
        [-0.9787, -1.2007],
        [-0.5189,  0.6909],
        [ 2.7205,  1.0611]], grad_fn=<ViewBackward0>)


In [7]:
import numpy as np

In [8]:
w = np.ndarray((2,1))
w = torch.from_numpy(w)  # torch.from_numpy: make a torch.Tensor object from numpy array

In [9]:
print(w.data, type(w))

tensor([[2.0583e-312],
        [2.3342e-312]], dtype=torch.float64) <class 'torch.Tensor'>


In [10]:
if torch.cuda.is_available(): # if cuda gpu is avaiable,
    w = w.cuda()  # Tensor.cuda: migrate tensor to cuda gpu

In [11]:
w = w.cpu() # migrate w to cpu

In [12]:
print(w.data, w.shape, w.grad_fn, w.grad) 

tensor([[2.0583e-312],
        [2.3342e-312]], dtype=torch.float64) torch.Size([2, 1]) None None


In [13]:
wx = w * x
# the other way to do the same thing is: wx=torch.multiply(w,x)
# for inplace computation w = w * x is same as: w.multiply_(x)

In [23]:
print(wx.data, wx.shape, wx.grad_fn, wx.grad) 
# wx is computed by w multiplies x, so the grad_fn is 'MulBackward'

tensor([[2.0583e-312, 4.1167e-312, 6.1750e-312, 8.2333e-312],
        [2.3342e-312, 4.6684e-312, 7.0026e-312, 9.3368e-312]],
       dtype=torch.float64) torch.Size([2, 4]) <MulBackward0 object at 0x105d8bd00> None


In [15]:
y_wx = torch.add(y, wx) # the same as: y_wx = y + wx

In [16]:
print(y_wx.data, y_wx.shape, y_wx.grad_fn, y_wx.grad)
# y_wx id computed by adding y and wx, so the grad_fn is 'AddBackward'

tensor([[-0.2925,  0.0732, -0.9787, -1.2007],
        [-0.5189,  0.6909,  2.7205,  1.0611]], dtype=torch.float64) torch.Size([2, 4]) <AddBackward0 object at 0x14d63a7f0> None


In [17]:
try:
    y_wx.backward() # try to compute the grad of y_wx
except Exception as e:
    print(e)
# the y_wx is tensor, not scalar(1*1 tensor)

grad can be implicitly created only for scalar outputs


In [18]:
y_wx.backward(torch.ones_like(y_wx), retain_graph=True)
# 'retain_graph': retain the computation graph of y_wx to backward through the graph a second time
# torch.ones_like: make a all-one tensor the same shape as the input arg

In [19]:
print(x.grad, y.grad) # get grad of x and y, i.e. d(y_wx)/dx, d(y_wx)/dy 

tensor([0., 0., 0., 0.]) tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.]])


In [20]:
print(w.grad)  # w is not set as 'requires_grad'

None


In [21]:
s_y_wx = y_wx.sum() # the other way of making y_wx a scalar is summing up the y_wx
s_y_wx.backward() # the computation graph of y_wx is retained; else, an exception will be raised

In [22]:
print(x.grad, y.grad) 

tensor([0., 0., 0., 0.]) tensor([[2., 2., 2., 2.],
        [2., 2., 2., 2.]])
