# Derivatives of composition of functions with PyTorch

For computation of gradient

In [1]:
import torch
import numpy as np
from math import exp

In [2]:
def set_grad(var):
    def hook(grad):
        var.grad = grad
    return hook

Set values of data and variables

In [3]:
x1 = 1.0
x2 = 2.0
w1 = 2.0
w2 = 1.0
b = 1.0
y = 1

In [4]:
X = torch.tensor([x1, x2]) # X = given fixed data
W = torch.tensor([w1, w2], requires_grad=True) # W and B = variable to be computed
B = torch.tensor([b], requires_grad=True)

Let us define $z = f(w_1, w_2, b) = w_1 x_1 + w_2 x_2 + b$. We have $\frac{\partial f}{\partial w_i} = x_i$ and $\frac{\partial f}{\partial b} = 1$.

In [5]:
z = W.dot(X) + B

In [6]:
z.register_hook(set_grad(z))

<torch.utils.hooks.RemovableHandle at 0x10e981550>

Let us define $\sigma = g(z) = \frac{1}{1 + e^{-z}} = g(f(w_1, w_2, b)) = (g \circ f) (w_1, w_2, b)$. We have $g'(z) = \frac{e^{-z}}{(1 + e^{-z})^2}$.

In [7]:
sigma = 1.0 / (1.0 + torch.exp(- z))

In [8]:
sigma.register_hook(set_grad(sigma))    

<torch.utils.hooks.RemovableHandle at 0x10e9812b0>

Let us define $L = h(\sigma) = - (y \log(\sigma) + (1 - y) \log(1 - \sigma)) = h(g(z)) = h(g(f(w_1, w_2, b))) = (h \circ g \circ f) (w_1, w_2, b)$. We have $L'(\sigma) = - (\frac{y}{\sigma} - \frac{1 - y}{1 - \sigma})$.

In [9]:
L = - (y * torch.log(sigma) + (1 - y) * torch.log(1 - sigma))

In [10]:
L.backward()

We have $\frac{\partial L}{\partial \sigma} = - (\frac{y}{\sigma} - \frac{1 - y}{1 - \sigma})$. Let us compute the result with PyTorch and using the exact mathematical formula.

In [11]:
print(sigma.grad, - (y / sigma - (1 - y) / (1 - sigma)))

tensor([-1.0067]) tensor([-1.0067], grad_fn=<NegBackward>)


We have $\frac{\partial L}{\partial z} = g'(z) h'(g(z))$. Let us compute the result with PyTorch and using the exact mathematical formula.

In [12]:
print(z.grad, (exp(-z) / ((1 + exp(-z)) ** 2.0)) * (- (y / sigma - (1 - y) / (1 - sigma))))

tensor([-0.0067]) tensor([-0.0067], grad_fn=<MulBackward0>)


We have $\frac{\partial L}{\partial b} = \frac{\partial f}{\partial b} g'(f(w_1, w_2, b)) h'(g(f(w_1, w_2, b)))$ that is $\frac{\partial L}{\partial b} = \frac{\partial f}{\partial b} g'(z) h'(\sigma)$. Similarly, we have $\frac{\partial L}{\partial w_i} = \frac{\partial f}{\partial w_i} g'(f(w_1, w_2, b)) h'(g(f(w_1, w_2, b)))$ that is $\frac{\partial L}{\partial w_i} = \frac{\partial f}{\partial w_i} g'(z) h'(\sigma)$. Let us compute the result with PyTorch and using the exact mathematical formula.

In [13]:
print(B.grad, 1 * (exp(-z) / ((1 + exp(-z)) ** 2.0)) * (- (y / sigma - (1 - y) / (1 - sigma))))

tensor([-0.0067]) tensor([-0.0067], grad_fn=<MulBackward0>)


In [14]:
print(W.grad, X * (exp(-z) / ((1 + exp(-z)) ** 2.0)) * (- (y / sigma - (1 - y) / (1 - sigma))))

tensor([-0.0067, -0.0134]) tensor([-0.0067, -0.0134], grad_fn=<MulBackward0>)
