In [36]:
"""
The basis for a framework of automatic differentiation such as PyTorch.

What we want is to:
1. Build a forward pass manually, passing tensors / variable through blocks
2. Make sure each variable remember which block uses it (to get its gradients)
"""

import abc
import functools
import itertools
import math
import numpy as np
import operator
from typing import List

In [48]:
class Variable:
    def __init__(self, value, from_op=None):
        self.value = value
        self.gradient = None
        self.from_op = from_op
        self.gradient_fcts = []
    
    def compute_gradient(self):
        self.gradient = sum(fct.derivative_by(self) for fct in self.gradient_fcts) if self.gradient_fcts else 1
        return self.gradient
    
    def backward(self):
        # TODO - in need for a graph to do a kind of topological sort?
        pass

    
class Function(abc.ABC):
    # TODO - make it a metaclass
    # TODO - you could make it a Monad in Haskell
    
    def __init__(self, arguments: List[Variable]):
        self.arguments = arguments
        self.output = None
    
    def __call__(self) -> Variable:
        result = self.apply(arg.value for arg in self.arguments)
        self.output = Variable(result, self)
        for arg in self.arguments:
            arg.gradient_fcts.append(self)
        return self.output
    
    @abc.abstractmethod
    def apply(self, argument_values) -> Variable:
        pass
    
    @abc.abstractmethod
    def derivative_by(self, by: Variable) -> float:
        pass
    
    
class AddFct(Function):
    def __init__(self, arguments):
        super().__init__(arguments)
    
    def apply(self, argument_values):
        return functools.reduce(operator.add, argument_values, 0)
    
    def derivative_by(self, x: Variable):
        if x in self.arguments:
            return self.output.gradient
        return 0


class MultiplyFct(Function):
    def __init__(self, arguments):
        super().__init__(arguments)
    
    def apply(self, argument_values):
        return functools.reduce(operator.mult, argument_values, 1)
    
    def derivative_by(self, x: Variable):
        total = 1.
        for arg in self.arguments:
            if arg != x:
                total *= arg.value
        return total
    
    
def add(v1: Variable, v2: Variable):
    op = AddFct([v1, v2])
    return op()


def add(v1: Variable, v2: Variable):
    op = AddFct([v1, v2])
    return op()


x1 = Variable(1)
x2 = Variable(2)
x3 = Variable(3)
y1 = add(x1, x2)
y2 = add(x2, x3)
z = multiply(y1, y2)
print(z.compute_gradient())
print(y1.compute_gradient())
print(y2.compute_gradient())
print(x1.compute_gradient())
print(x2.compute_gradient())
print(x3.compute_gradient())

TypeError: __init__() takes 2 positional arguments but 3 were given

In [10]:
import torch
x = torch.zeros((1, 1), requires_grad=True)
print(x)
y = torch.exp(x)
print(y)

tensor([[0.]], requires_grad=True)
tensor([[1.]], grad_fn=<ExpBackward>)
