In [1]:
%matplotlib inline


PyTorch: Defining new autograd functions
----------------------------------------

A fully-connected ReLU network with one hidden layer and no biases, trained to
predict y from x by minimizing squared Euclidean distance.

This implementation computes the forward pass using operations on PyTorch
Variables, and uses PyTorch autograd to compute gradients.

In this implementation we implement our own custom autograd function to perform
the ReLU function.  
在这个实现里我们使用自定义的自动求梯度函数来实现ReLU函数。



In [2]:
import torch
from torch.autograd import Variable


class MyReLU(torch.autograd.Function):  #继承torch.autograd.Function
    """
    We can implement our own custom autograd Functions by subclassing
    torch.autograd.Function and implementing the forward and backward passes
    which operate on Tensors.  
    我们可以实现自定义的自动求导函数，通过继承torch.autograd.Function，然后重写在 Tensor 上运行的 forward 和 backward 方法。
    """

    def forward(self, input):  #实现自己的forward操作
        """
        In the forward pass we receive a Tensor containing the input and return a
        Tensor containing the output. You can cache arbitrary Tensors for use in the
        backward pass using the save_for_backward method.  
        在前向计算中我们接受一个 Tensor 作为输入参数，并返回一个 Tensor 作为输出参数。
        你可以使用 save_for_backward 方法缓存任意的 Tensor 在反向计算中使用。
        """
        self.save_for_backward(input)  #saved for use in the first line of the backward pass
        return input.clamp(min=0)

    def backward(self, grad_output):    #实现自己的backward操作
        """
        In the backward pass we receive a Tensor containing the gradient of the loss
        with respect to the output, and we need to compute the gradient of the loss
        with respect to the input.  
        在反向计算中，我们接受一个包含dL/doutput的梯度的Tensor，我们需要计算dL/dinput的梯度，即为(dL/doutput)*(doutput/dinput)。
        grad_output 应该是计算图后面反向传过来的，就像这个函数反向传到前面一样。
        """
        input, = self.saved_tensors
        grad_input = grad_output.clone()
        grad_input[input < 0] = 0
        return grad_input


dtype = torch.FloatTensor
dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU

# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = 64, 1000, 100, 10

# Create random Tensors to hold input and outputs, and wrap them in Variables.
x = Variable(torch.randn(N, D_in).type(dtype), requires_grad=False)
y = Variable(torch.randn(N, D_out).type(dtype), requires_grad=False)

# Create random Tensors for weights, and wrap them in Variables.
w1 = Variable(torch.randn(D_in, H).type(dtype), requires_grad=True)
w2 = Variable(torch.randn(H, D_out).type(dtype), requires_grad=True)

learning_rate = 1e-6
for t in range(500):
    # Construct an instance of our MyReLU class to use in our network
    relu = MyReLU()

    # Forward pass: compute predicted y using operations on Variables; we compute
    # ReLU using our custom autograd operation.
    y_pred = relu(x.mm(w1)).mm(w2)

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum()
    print(t, loss.data[0])

    # Use autograd to compute the backward pass.
    loss.backward()

    # Update weights using gradient descent
    w1.data -= learning_rate * w1.grad.data
    w2.data -= learning_rate * w2.grad.data

    # Manually zero the gradients after updating weights
    w1.grad.data.zero_()
    w2.grad.data.zero_()

0 28735342.0
1 25246106.0
2 27532936.0
3 31889838.0
4 33569268.0
5 29356610.0
6 19958198.0
7 10998290.0
8 5355120.0
9 2658540.5
10 1478403.375
11 955759.625
12 698090.1875
13 551156.625
14 454357.59375
15 383542.5625
16 328383.375
17 283668.375
18 246613.234375
19 215509.34375
20 189178.484375
21 166770.90625
22 147557.671875
23 130992.8828125
24 116646.1484375
25 104190.0625
26 93331.625
27 83818.6875
28 75473.5
29 68111.4296875
30 61598.8984375
31 55818.98046875
32 50676.921875
33 46098.1875
34 42014.83984375
35 38355.72265625
36 35068.59765625
37 32109.373046875
38 29443.935546875
39 27039.138671875
40 24862.625
41 22893.318359375
42 21106.31640625
43 19481.04296875
44 18000.00390625
45 16649.380859375
46 15415.1494140625
47 14285.85546875
48 13251.541015625
49 12302.4248046875
50 11430.7529296875
51 10629.4423828125
52 9892.138671875
53 9215.3759765625
54 8590.830078125
55 8014.16845703125
56 7481.2080078125
57 6988.34130859375
58 6532.08447265625
59 6109.15625
60 5717.05517578125


420 0.0005435514030978084
421 0.0005301890196278691
422 0.0005146274925209582
423 0.0005001294193789363
424 0.000486225908389315
425 0.00047359461314044893
426 0.0004609685856848955
427 0.00044915496255271137
428 0.0004376848228275776
429 0.00042648735688999295
430 0.00041561017860658467
431 0.0004047971742693335
432 0.0003946475626435131
433 0.0003849180357065052
434 0.00037521147169172764
435 0.0003658151545096189
436 0.00035692405072040856
437 0.00034788562334142625
438 0.00033956323750317097
439 0.0003313202760182321
440 0.0003237763012293726
441 0.0003159889602102339
442 0.0003081181494053453
443 0.0003007858758792281
444 0.0002939070691354573
445 0.00028729953919537365
446 0.0002804831601679325
447 0.00027427496388554573
448 0.0002672542759682983
449 0.0002615601406432688
450 0.0002556566323619336
451 0.00025069070397876203
452 0.00024463451700285077
453 0.00023971364134922624
454 0.00023463134129997343
455 0.00022954642190597951
456 0.00022464692301582545
457 0.00022003029880579