In [211]:
"""
This file contains the central data structure and functions related to the
forward mode auto differentiation. We may want to separate the code into 
multiple files later.
"""
import numpy as np

class Expression:
    def __init__(self, ele_func, sub_expr1, sub_expr2=None):
        self._ele_func  = ele_func
        self._sub_expr1 = sub_expr1
        self._sub_expr2 = sub_expr2
        self.val = None
        self.bder=None
    
    def evaluation_at(self, val_dict):
        
        # self._sub_expr2 is None implies that self._ele_func is an unary operator
        if self._sub_expr2 is None: 
            return self._ele_func.evaluation_at(
                self._sub_expr1, val_dict)
        
        # self._sub_expr2 not None implies that self._ele_func is a binary operator
        else:
            return self._ele_func.evaluation_at(
                self._sub_expr1, self._sub_expr2, val_dict)
    
    def derivative_at(self, var, val_dict):
        
        if var is self: return 1.0
        
        # sub_expr2 is None implies that _ele_func is an unary operator
        if self._sub_expr2 is None:
            return self._ele_func.derivative_at(
                self._sub_expr1, var, val_dict)
        
        # sub_expr2 not None implies that _ele_func is a binary operator
        else:
            return self._ele_func.derivative_at(
                self._sub_expr1, self._sub_expr2, var, val_dict)
    
    def back_derivative(self,var,val_dict):
        if var is self: return 1.0
        if self._sub_expr2 is None:
            return self._ele_func.backderivative_at(self._sub_expr1,var)
        else:
            return self._ele_func.backderivative_at(self._sub_expr1,
                                                    self._sub_expr2,var)
    
    def __neg__(self):
        return Expression(Neg, self)

                
    def __add__(self, another):
        if isinstance(another, Expression):
            return Expression(Add, self, another)
        # if the other operand is not an Expression, then it must be a number
        # the number then should be converted to a Constant
        else:
            return Expression(Add, self, Constant(another))
    
    
    def __radd__(self, another):
        if isinstance(another, Expression):
            return Expression(Add, another, self)
        else:
            return Expression(Add, Constant(another), self)
    
    def __sub__(self, another):
        if isinstance(another, Expression):
            return Expression(Sub, self, another)
        else:
            return Expression(Sub, self, Constant(another))
    
    def __rsub__(self, another):
        if isinstance(another, Expression):
            return Expression(Sub, another, self)
        else:
            return Expression(Sub, Constant(another), self)
        

    def __mul__(self, another):
        if isinstance(another, Expression):
            return Expression(Mul,self,another)
        else:
            return Expression(Mul, self, Constant(another))

    def __rmul__(self, another):
        if isinstance(another, Expression):
            return Expression(Mul,another,self)
        else:
            return Expression(Mul, Constant(another),self)
    
    def __truediv__(self, another):
        if isinstance(another, Expression):
            return Expression(Div,self,another)
        else:
            return Expression(Div, self, Constant(another))

    def __rtruediv__(self, another):
        if isinstance(another, Expression):
            return Expression(Div,another,self)
        else:
            return Expression(Div, Constant(another),self)
    
    def __pow__(self,another):
        if isinstance(another, Expression):
            return Expression(Pow,self,another)
        else:
            return Expression(Pow, self, Constant(another))
    
    def __rpow__(self,another):
        if isinstance(another, Expression):
            return Expression(Pow,another,self)
        else:
            return Expression(Pow, Constant(another),self)


class Variable(Expression):
    def __init__(self):
        self.val = None
        self.bder = None
        return
    
    def evaluation_at(self, val_dict):
        return val_dict[self]
    
    def derivative_at(self, var, val_dict):
        return 1.0 if var is self else 0.0


class Constant(Expression):
    def __init__(self, val):
        self.val = val
        
    def evaluation_at(self, val_dict):
        return self.val
    
    def derivative_at(self, var, val_dict):
        return 0.0


class VectorFunction:
    
    def __init__(self, exprlist):
        self._exprlist = exprlist.copy()
    
    def evaluation_at(self, val_dict):
        return np.array([expr.evaluation_at(val_dict) 
                        for expr in self._exprlist])
    
    def gradient_at(self, var, val_dict):
        return np.array([f.derivative_at(var, val_dict) for f in self._exprlist])
    
    def jacobian_at(self, val_dict):
        return np.array([self.gradient_at(var, val_dict)
                         for var in val_dict.keys()]).transpose()


class Add:
    @staticmethod
    def evaluation_at(sub_expr1, sub_expr2, val_dict):
        return sub_expr1.evaluation_at(val_dict) + \
               sub_expr2.evaluation_at(val_dict)
    @staticmethod
    def derivative_at(sub_expr1, sub_expr2, var, val_dict):
        return sub_expr1.derivative_at(var, val_dict) + \
               sub_expr2.derivative_at(var, val_dict)
    @staticmethod
    def backderivative_at(sub_expr1,sub_expr2,var):
        return 1

class Sub:
    @staticmethod
    def evaluation_at(sub_expr1, sub_expr2, val_dict):
        return sub_expr1.evaluation_at(val_dict) - \
               sub_expr2.evaluation_at(val_dict)
    @staticmethod
    def derivative_at(sub_expr1, sub_expr2, var, val_dict):
        return sub_expr1.derivative_at(var, val_dict) - \
               sub_expr2.derivative_at(var, val_dict)
    
    @staticmethod
    def backderivative_at(sub_expr1,sub_expr2,var):
        if var == sub_expr1:
            return 1
        if var == sub_expr2:
            return -1 
class Mul:
    @staticmethod
    def evaluation_at(sub_expr1, sub_expr2, val_dict):
        return sub_expr1.evaluation_at(val_dict) *\
               sub_expr2.evaluation_at(val_dict)
    @staticmethod
    def derivative_at(sub_expr1, sub_expr2, var, val_dict):
        return sub_expr1.derivative_at(var, val_dict) * \
               sub_expr2.evaluation_at(val_dict)+ \
               sub_expr1.evaluation_at(val_dict) *\
               sub_expr2.derivative_at(var, val_dict)
    @staticmethod
    def backderivative_at(sub_expr1,sub_expr2,var):
        if var == sub_expr1:
            return sub_expr2.val
        else:
            return sub_expr1.val
        
               
class Div:
    @staticmethod
    def evaluation_at(sub_expr1, sub_expr2, val_dict):
        return sub_expr1.evaluation_at(val_dict) /\
               sub_expr2.evaluation_at(val_dict)
    @staticmethod
    def derivative_at(sub_expr1, sub_expr2, var, val_dict):
        return  sub_expr1.derivative_at(var, val_dict) / \
                sub_expr2.evaluation_at(val_dict)- \
                sub_expr1.evaluation_at(val_dict) *\
                sub_expr2.derivative_at(var, val_dict)/\
                sub_expr2.evaluation_at(val_dict)/\
                sub_expr2.evaluation_at(val_dict)
    @staticmethod
    def backderivative_at(sub_expr1,sub_expr2,var):
        if var == sub_expr1:
            return 1/sub_expr2.val
        elif var == sub_expr2:
            return -sub_expr1.val/sub_expr2/sub_expr2
#class Pow:
#    
#    @staticmethod
#    def evaluation_at(sub_expr1, sub_expr2, val_dict):
#        return sub_expr1.evaluation_at(val_dict) **\
#               sub_expr2.evaluation_at(val_dict)
#    @staticmethod
#    #f(x)^g(x) * g‘(x)  * ln( f(x) )+ f(x)^( g(x)-1 ) * g(x) * f’(x) 
#    def derivative_at(sub_expr1, sub_expr2, var, val_dict):
#        return  sub_expr1.evaluation_at(val_dict)** \
#                sub_expr2.evaluation_at(val_dict)* \
#                sub_expr2.derivative_at(var, val_dict)*\
#                np.log(sub_expr1.evaluation_at(val_dict))+ \
#                sub_expr1.evaluation_at(val_dict) **\
#                (sub_expr2.evaluation_at(val_dict)-1)*\
#                sub_expr2.evaluation_at(val_dict)*\
#                sub_expr1.derivative_at(var, val_dict)

# a simplified version: assuming sub_expr2 is a constant
class Pow:

    @staticmethod
    def evaluation_at(sub_expr1, sub_expr2, val_dict):
        return np.power(sub_expr1.evaluation_at(val_dict), 
                        sub_expr2.evaluation_at(val_dict))
    
    @staticmethod
    def derivative_at(sub_expr1, sub_expr2, var, val_dict):
        p = sub_expr2.evaluation_at(val_dict)
        return p*np.power(sub_expr1.evaluation_at(val_dict), p-1.0) \
               * sub_expr1.derivative_at(var, val_dict)
    @staticmethod
    def backderivative_at(sub_expr1,sub_expr2,var):
        p = sub_expr2.evaluation_at(val_dict)
        return p*np.power(sub_expr1.val, p-1.0)

def pow(expr1, expr2):
    return Expression(Pow, expr1, expr2)

class Exp:
    @staticmethod
    def evaluation_at(sub_expr1, val_dict):
        return np.exp(sub_expr1.evaluation_at(val_dict))
    
    @staticmethod
    def derivative_at(sub_expr1, var, val_dict):
        return sub_expr1.derivative_at(var, val_dict) * \
               np.exp(sub_expr1.evaluation_at(val_dict))
    @staticmethod
    def backderivative_at(sub_expr1,var):
        return sub_expr1.val

class Neg:
    @staticmethod
    def evaluation_at(sub_expr1, val_dict):
        return -sub_expr1.evaluation_at(val_dict)
    
    @staticmethod
    def derivative_at(sub_expr1, var, val_dict):
        return -sub_expr1.derivative_at(var, val_dict)
    @staticmethod
    def back_derivative(sub_expr1,var):
        return -1

def exp(expr):
    return Expression(Exp, expr)


class Sin:
    @staticmethod
    def evaluation_at(sub_expr1,val_dict):
        return np.sin(sub_expr1.evaluation_at(val_dict))
    
    @staticmethod
    def derivative_at(sub_expr1,var,val_dict):
        return sub_expr1.derivative_at(var, val_dict) * \
        np.cos(sub_expr1.evaluation_at(val_dict)) 
    
    @staticmethod
    def backderivative_at(sub_expr1,var):
        return np.cos(sub_expr1.val)
def sin(expr):
    return Expression(Sin, expr)

class Cos:
    @staticmethod
    def evaluation_at(sub_expr1,val_dict):
        return np.cos(sub_expr1.evaluation_at(val_dict))
    
    @staticmethod
    def derivative_at(sub_expr1,var,val_dict):
        return -sub_expr1.derivative_at(var, val_dict) * \
               np.sin(sub_expr1.evaluation_at(val_dict)) 
    @staticmethod
    def backderivative_at(sub_expr1,var):
        return -np.sin(sub_expr1.val)

def cos(expr):
    return Expression(Cos, expr)
    
class Tan:
    @staticmethod
    def evaluation_at(sub_expr1,val_dict):
        return np.tan(sub_expr1.evaluation_at(val_dict))
    
    @staticmethod
    def derivative_at(sub_expr1,var,val_dict):
        return sub_expr1.derivative_at(var, val_dict) /(np.cos(sub_expr1.evaluation_at(val_dict))**2)
    
    @staticmethod
    def backderivative_at(sub_expr1,var):
        return 1/(np.cos(sub_expr1.val)**2)
    
def tan(expr):
    return Expression(Tan, expr)
    
class Cotan:
    @staticmethod
    def evaluation_at(sub_expr1,val_dict):
        return 1/np.tan(sub_expr1.evaluation_at(val_dict))
    
    @staticmethod
    def derivative_at(sub_expr1,var,val_dict):    
        return -sub_expr1.derivative_at(var, val_dict)/(np.sin(sub_expr1.evaluation_at(val_dict))**2)
    
    @staticmethod
    def backderivative_at(sub_expr1,var):
        return -1/(np.sin(sub_expr1.val)**2)

def cotan(expr):
    return Expression(Cotan, expr)
    
class Sec:
    @staticmethod
    def evaluation_at(sub_expr1,val_dict):
        return 1/np.cos(sub_expr1.evaluation_at(val_dict))
    
    @staticmethod
    def derivative_at(sub_expr1,var,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        return sub_expr1.derivative_at(var, val_dict) * \
               np.tan(x) * (1/np.cos(x))
    
    @staticmethod
    def backderivative_at(sub_expr1,var):
        x =sub_expr1.val
        return np.tan(x)/np.cos(x)
                      
def sec(expr):
    return Expression(Sec, expr) 

class Csc:
    @staticmethod
    def evaluation_at(sub_expr1,val_dict):
        return 1/np.sin(sub_expr1.evaluation_at(val_dict))
    
    @staticmethod
    def derivative_at(sub_expr1,var,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        return -sub_expr1.derivative_at(var, val_dict) * \
               (1/np.tan(x)) * (1/np.sin(x))
    
    @staticmethod
    def backderivative_at(sub_expr1,var):
        x = sub_expr1.val
        return -(1/np.tan(x)) * (1/np.sin(x))

def csc(expr):
    return Expression(Csc, expr) 

class Sinh:
    @staticmethod
    def evaluation_at(sub_expr1,val_dict):
        return np.sinh(sub_expr1.evaluation_at(val_dict))
    
    @staticmethod
    def derivative_at(sub_expr1,var,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        return sub_expr1.derivative_at(var, val_dict) * np.cosh(x)
    @staticmethod
    def backderivative_at(sub_expr1,var):
        x = sub_expr1.val
        return np.cosh(x)
        
def sinh(expr):
    return Expression(Sinh, expr) 

class Cosh:
    @staticmethod
    def evaluation_at(sub_expr1,val_dict):
        return np.cosh(sub_expr1.evaluation_at(val_dict))
    
    @staticmethod
    def derivative_at(sub_expr1,var,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        return sub_expr1.derivative_at(var, val_dict) * np.sinh(x)
    @staticmethod
    def backderivative_at(sub_expr1,var):
        return np.sinh(sub_expr1.val)
                      
def cosh(expr):
    return Expression(Cosh, expr) 
    
class Tanh:
    @staticmethod
    def evaluation_at(sub_expr1,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        return np.sinh(x)/np.cosh(x)
    
    @staticmethod
    def derivative_at(sub_expr1,var,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        tanh = np.sinh(x)/np.cosh(x)
        return sub_expr1.derivative_at(var, val_dict) * (1-tanh*tanh)
    
    @staticmethod
    def backderivative_at(sub_expr1,var):
        x = sub_expr1.val
        tanh = np.sinh(x)/np.cosh(x)
        return 1-tanh*tanh
                      
def tanh(expr):
    return Expression(Tanh,expr) 

class Csch:
    @staticmethod
    def evaluation_at(sub_expr1,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        return 1/np.sinh(x)
    @staticmethod
    def derivative_at(sub_expr1,var,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        # d = -csch(x)*cot(x)
        d = -(1/np.sinh(x)) * (np.cosh(x)/np.sinh(x))
        return sub_expr1.derivative_at(var, val_dict) * d
    @staticmethod
    def backderivative_at(sub_expr1,var):
        x = sub_expr1.val
        return -(np.cosh(x)/np.sinh(x))*(1/np.sinh(x))
                      
def csch(expr):
    return Expression(Csch, expr) 

class Sech:
    @staticmethod
    def evaluation_at(sub_expr1,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        return 1/np.cosh(x)
    @staticmethod
    def derivative_at(sub_expr1,var,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        # d = -sech(x)tanh(x)
        d = -(1/np.cosh(x)) * (np.sinh(x)/np.cosh(x))
        return sub_expr1.derivative_at(var, val_dict)*d
    @staticmethod
    def backderivative_at(sub_expr1,var):
        x = sub_expr1.val
        return -(1/np.cosh(x)) * (np.sinh(x)/np.cosh(x))

def sech(expr):
    return Expression(Sech, expr) 

class Coth:
    @staticmethod
    def evaluation_at(sub_expr1,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        return np.cosh(x)/np.sinh(x)
    
    @staticmethod
    def derivative_at(sub_expr1,var,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        coth = np.cosh(x)/np.sinh(x)
        # d = 1-coth^2(x)
        return sub_expr1.derivative_at(var, val_dict) * (1-coth**2)
    
    @staticmethod
    def backderivative_at(sub_expr1,var):
        x = sub_expr1.val
        coth = np.cosh(x)/np.sinh(x)            
        return 1-coth**2

def coth(expr):
    return Expression(Coth, expr)    

class Arcsin:
    @staticmethod
    def evaluation_at(sub_expr1,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        return np.arcsin(x)
    
    @staticmethod
    def derivative_at(sub_expr1,var,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        d = 1/np.sqrt(1-x**2)
        #1/sqrt(1-x^2)
        return sub_expr1.derivative_at(var, val_dict) * d
    
    @staticmethod
    def backderivative_at(sub_expr1,var):
        x = sub_expr1.val
        return 1/np.sqrt(1-x**2)
                      

def arcsin(expr):
    return Expression(Arcsin, expr)
    
class Arccos:
    @staticmethod
    def evaluation_at(sub_expr1,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        return np.arccos(x)
    
    @staticmethod
    def derivative_at(sub_expr1,var,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        d = 1/np.sqrt(1-x**2)
        #-1/sqrt(1-x^2)
        return -sub_expr1.derivative_at(var, val_dict) * d
    @staticmethod
    def backderivative_at(sub_expr1,var):
        x = sub_expr1.val
        return -1/np.sqrt(1-x**2)

def arccos(expr):
    return Expression(Arccos, expr)
    
class Arctan:
    @staticmethod
    def evaluation_at(sub_expr1,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        return np.arctan(x)
    
    @staticmethod
    def derivative_at(sub_expr1,var,val_dict):
        x = sub_expr1.evaluation_at(val_dict)
        d = 1/(1+x**2)
        # d =1/1+x^2
        return sub_expr1.derivative_at(var, val_dict) * d
    @staticmethod
    def backderivative_at(sub_expr1,var):
        x = sub_expr1.val
        return 1/(1+x**2)

def arctan(expr):
    return Expression(Arctan, expr)

In [224]:
def forward(y,val_dict):
    # forward pass, store values
    if type(y) == Expression:
        y.val = y.evaluation_at(val_dict)
        if y._sub_expr1 != None:
            forward(y._sub_expr1,val_dict)
        if y._sub_expr2!=None:
            forward(y._sub_expr2,val_dict)
    elif isinstance(y,Variable):
        y.val = val_dict[y]
    return 

In [225]:
def initialize(top,y):
    #print(y.val)
    if y == top:
        y.bder = 1     
    elif y!=None:
        y.bder = 0
    if type(y)==Expression:
        initialize(top,y._sub_expr1)
        initialize(top,y._sub_expr2)
    return

In [226]:
def backward(y,val_dict,depth = 0):
    # val_dict stores the basic variables
    # (atomic,cannot be further decomposed)
    if type(y)== Expression:
        if y._sub_expr1 != None:
            y._sub_expr1.bder += y.bder*y.back_derivative(y._sub_expr1,val_dict)
            backward(y._sub_expr1,val_dict,depth+1)
        if y._sub_expr2 !=None:
            y._sub_expr2.bder+=y.bder*y.back_derivative(y._sub_expr2,val_dict)
            backward(y._sub_expr2,val_dict,depth+1)
    return 

In [221]:
def back_propagation(y,val_dict):
    # get backprop derivative with respect to y at every node lower than y
    forward(y,val_dict)
    initialize(y,y)
    backward(y,val_dict)

**Demo** Compare BackPropagation to Forward Mode Autodifferentiation

In [192]:
a= Variable()
b = Variable()
c = a+b
d = Variable()
e = c*d
f = a+e
val_dict = {b:1,a:2,d:4}
back_propagation(f,val_dict)
print('result')
print(f.bder)
print(e.bder)
print(d.bder)
print(c.bder)
print(a.bder)
print(b.bder)

result
1
1
3
4
5
4


In [193]:
print(f.derivative_at(f,val_dict))
print(f.derivative_at(e,val_dict))
print(f.derivative_at(d,val_dict))
print(f.derivative_at(c,val_dict))
print(f.derivative_at(a,val_dict))
print(f.derivative_at(b,val_dict))

1.0
1.0
3.0
4.0
5.0
4.0


In [194]:
a = Variable()
b = Variable()
e = b-a
c = cos(e)
d = a+c
val_dict = {b:1,a:2}
back_propagation(d,val_dict)
print(a.bder)
print(b.bder)
print(e.bder)
print(d.bder)
print(c.bder)

0.1585290151921035
0.8414709848078965
0.8414709848078965
1
1


In [195]:
print(d.derivative_at(a,val_dict))
print(d.derivative_at(b,val_dict))
print(d.derivative_at(e,val_dict))
print(d.derivative_at(d,val_dict))
print(d.derivative_at(c,val_dict))

0.1585290151921035
0.8414709848078965
0.8414709848078965
1.0
1.0


In [228]:
a = Variable()
b = Variable()
c = csc(a)
d = sec(a)
e = tan(c)
f = cotan(d)
g = sinh(f+e)

In [229]:
val_dict = {b:1,a:2}
back_propagation(g,val_dict)
var_list = [a,b,c,d,e,f,g]
for i in var_list:
    print(i.bder)
    print(g.derivative_at(i,val_dict))

-97.86132760728495
-97.86132760728493
None
0.0
51.98629466872728
51.98629466872728
-23.621186465077585
-23.621186465077585
10.706653521655742
10.706653521655742
10.706653521655742
10.706653521655742
1
1.0
