### Reverse mode autodiff

In [1]:
from typing import List
from collections import defaultdict
import numpy as np

In [2]:
class Variable:
    def __init__(self, value, local_gradients=[]):
        self.value = value
        self.local_gradients = local_gradients
    
    def __add__(self, other):
        return add(self, other)
    
    def __mul__(self, other):
        return mul(self, other)
    
    def __sub__(self, other):
        return add(self, neg(other))
    
    def __truediv__(self, other):
        return mul(self, inv(other))
    
    def __pow__(self, other):
        return pow(self, other)
    
    def __str__(self):
        return f"Value {self.value}"

# constants
ONE = Variable(1.)
NEG_ONE = Variable(-1.)    

def add(a, b):
    value = a.value + b.value    
    local_gradients = (
        (a, 1),
        (b, 1)
    )
    return Variable(value, local_gradients)


def mul(a, b):
    value = a.value * b.value    
    local_gradients = (
        (a, b.value),
        (b, a.value)
    )
    return Variable(value, local_gradients)

def neg(a):
    value = -1 * a.value
    local_gradients = (
        (a, -1),
    )
    return Variable(value, local_gradients)

def inv(a):
    value = 1. / a.value
    local_gradients = (
        (a, -1 / a.value**2),
    )
    return Variable(value, local_gradients)

def sqrt(a):
    value = np.sqrt(a.value)
    local_gradients = (
        (a, 1 / (2 * np.sqrt(a.value)))
    )
    return Variable(value, local_gradients)

def pow(a, b):
    value = a.value ** b
    local_gradients = (
        (a, b * (a.value ** (b - 1))),
    )

    return Variable(value, local_gradients)

In [15]:
def sin(a):
    value = np.sin(a.value)
    local_gradients = (
        (a, np.cos(a.value)),
    )
    return Variable(value, local_gradients)

def exp(a):
    value = np.exp(a.value)
    local_gradients = (
        (a, value),
    )
    return Variable(value, local_gradients)
    
def log(a):
    value = np.log(a.value)
    local_gradients = (
        (a, 1. / a.value),
    )
    return Variable(value, local_gradients)


In [16]:
def get_gradients(variable):
    """ Compute the first derivatives of `variable` 
    with respect to child variables.
    """
    gradients = defaultdict(lambda: 0)
    
    def compute_gradients(variable, path_value):
        for child_variable, local_gradient in variable.local_gradients:
            # "Multiply the edges of a path":
            value_of_path_to_child = path_value * local_gradient
            # "Add together the different paths":
            gradients[child_variable] += value_of_path_to_child
            # recurse through graph:
            compute_gradients(child_variable, value_of_path_to_child)
    
    compute_gradients(variable, path_value=1)
    # (path_value=1 is from `variable` differentiated w.r.t. itself)
    return gradients

In [27]:
def f(a, b, c):
    f = sin(a * b) + exp(Variable(-3) * c - (a / b))
    return log(f * f) * c

def f1(a, b, c):
    f = exp(Variable(-2) * (a * a + b * b + c * c) ** 0.5)
    return f

def vanilla_E(r1, r2, alpha):
    norm_r1 = np.linalg.norm(r1)
    norm_r2 = np.linalg.norm(r2)
    r12 = np.linalg.norm(r1 - r2)

    dot_product = np.dot(r1 / norm_r1 - r2 / norm_r2, r1 - r2)

    energy = - 4 + dot_product / (r12 * (1 + alpha * r12)**2) - 1 / (r12 * (1 + alpha * r12)**3) - 1/(4 * (1 + alpha * r12)**4) + 1 / r12 
    return energy

def E(a1, b1, c1, a2, b2, c2, alpha):
    r1 = (a1 ** 2 + b1 ** 2 + c1 ** 2) ** 0.5
    r2 = (a2 ** 2 + b2 ** 2 + c2 ** 2) ** 0.5
    r12 = ((a1 - a2) ** 2 + (b1 - b2) ** 2 + (c1 - c2) ** 2) ** 0.5
    #dot_product = (a1 / r1) * (a2 / r2) + (b1 / r1) * (b2 / r2) + (c1 / r1) * (c2 / r2)

    dot_product = ((a1 / r1) - (a2 / r2)) * (a1 - a2) + ((b1 / r1) - (b2 / r2)) * (b1 - b2) + ((c1 / r1) - (c2 / r2)) * (c1 - c2)


    E = Variable(-4) + dot_product / (r12 * (Variable(1) + alpha * r12) ** 2) - Variable(1) / (r12 * (Variable(1) + alpha * r12) ** 3) - Variable(1)/(Variable(4) * (Variable(1) + alpha * r12)**4) + Variable(1) / r12

    return E

In [28]:
a1 = Variable(2)
b1 = Variable(3)
c1 = Variable(2)
a2 = Variable(5)
b2 = Variable(3)
c2 = Variable(-3)

r1 = np.array([a1.value, b1.value, c1.value])
r2 = np.array([a2.value, b2.value, c2.value])

alpha = 0.3

In [29]:
f = a1 ** 3

In [30]:
get_gradients(f)[a1]

12

In [31]:
alpha_ = Variable(0.3)

In [32]:
t = E(a1, b1, c1, a2, b2, c2, alpha_)

In [33]:
vanilla_E(r1, r2, alpha)

np.float64(-3.715315687004636)

In [35]:
t.value

-3.715315687004636

In [36]:
get_gradients(t)[alpha_]

-0.44404586887760766

## Nth derivatives

In [49]:

class Variable_:
    def __init__(self, value, local_gradients=()):
        self.value = value
        self.local_gradients = local_gradients

    def __add__(a, b):
        return add(a, b)
    
    def __sub__(a, b):
        return add(a, neg(b))
    
    def __mul__(a, b):
        return mul(a, b)
    
    def __truediv__(a, b):
        return div(a, b)
    
    def __pow__(a, b):
        return pow(a, b)

# Useful constants:
ONE = Variable_(1.)
NEG_ONE = Variable_(-1.)

def add(a, b):
    value = a.value + b.value    
    local_gradients = (
        # Note that local_gradients now contains lambda functions.
        (a, lambda path_value: path_value),
        # local gradient is 1, so multiply path_value by 1.
        (b, lambda path_value: path_value)
        # local gradient is 1, so multiply path_value by 1.
    )
    return Variable_(value, local_gradients)

def mul(a, b):
    value = a.value * b.value
    local_gradients = (
        (a, lambda path_value: path_value * b),
        # local gradient for a is b, so multiply path_value by b.
        (b, lambda path_value : path_value * a)
        # local gradient for b is a, so multiply path_value by a.
    )
    return Variable_(value, local_gradients)

def div(a, b):
    value = a.value / b.value
    local_gradients = (
        (a, lambda path_value : path_value * ONE/b),
        (b, lambda path_value : path_value * NEG_ONE * a/(b*b))
    )
    return Variable_(value, local_gradients)

def exp(a):
    value = np.exp(a.value)
    local_gradients = (
        (a, lambda path_value: path_value * a),
    )
    return Variable_(value, local_gradients)

def pow(a, b):
    value = np.power(a.value, b.value)
    local_gradients = (
        (a, lambda path_value: path_value * b * Variable(np.power(a, (b + NEG_ONE)))),
        (b, lambda path_value: 0)
    )

    return Variable_(value, local_gradients)

def neg(a):
    value = NEG_ONE * a.value
    local_gradients = (
        (a, lambda path_value: path_value * NEG_ONE),
    )
    return Variable_(value, local_gradients)


In [50]:
a.value

2

In [51]:
b.value

2

In [52]:
def get_gradients(variable):
    """ Compute the first derivatives of `variable` 
    with respect to child variables.
    """
    gradients = defaultdict(lambda: Variable(0))
    
    def compute_gradients(variable, path_value):
        for child_variable, multiply_by_locgrad in variable.local_gradients:
            # "Multiply the edges of a path":
            value_of_path_to_child = multiply_by_locgrad(path_value)  # Now a function is used here.
            # "Add together the different paths":
            gradients[child_variable] += value_of_path_to_child
            # recurse through graph:
            compute_gradients(child_variable, value_of_path_to_child)
    
    compute_gradients(variable, path_value=ONE)  # Now path_value is a Variable.
    # (path_value=1 is from `variable` differentiated w.r.t. itself)
    return gradients

In [53]:
a = Variable_(2)
b = Variable_(2)
c = Variable_(-0.3)

In [54]:
f = a ** Variable_(2)
get_gradients(f)[a]

TypeError: unsupported operand type(s) for *: 'float' and 'Variable_'

In [107]:
def f1(a, b, c):
    f = exp(Variable(-2) * (a * a + b * b + c * c) ** Variable_(0.5))
    return f

In [108]:
deriv_1 = get_gradients(f1(a, b, c))

AttributeError: 'float' object has no attribute 'value'

In [106]:
deriv_1[c].value

-0.336