In [1]:
import numpy as np

class Tensor (object):
    
    def __init__(self,data,
                 autograd=False,
                 creators=None,
                 creation_op=None,
                 id=None):
        
        self.data = np.array(data)
        self.autograd = autograd
        self.grad = None
        if(id is None):
            self.id = np.random.randint(0,100000)
        else:
            self.id = id
        
        self.creators = creators
        self.creation_op = creation_op
        self.children = {}
        
        if(creators is not None):
            for c in creators:
                if(self.id not in c.children):
                    c.children[self.id] = 1
                else:
                    c.children[self.id] += 1

    def all_children_grads_accounted_for(self):
        for id,cnt in self.children.items():
            if(cnt != 0):
                return False
        return True 
        
    def backward(self,grad=None, grad_origin=None):
        if(self.autograd):
 
            if(grad is None):
                grad = Tensor(np.ones_like(self.data))

            if(grad_origin is not None):
                if(self.children[grad_origin.id] == 0):
                    raise Exception("cannot backprop more than once")
                else:
                    self.children[grad_origin.id] -= 1

            if(self.grad is None):
                self.grad = grad
            else:
                self.grad += grad
            
            assert grad.autograd == False
            
            if(self.creators is not None and 
               (self.all_children_grads_accounted_for() or 
                grad_origin is None)):

                if(self.creation_op == "add"):
                    self.creators[0].backward(self.grad, self)
                    self.creators[1].backward(self.grad, self)
                    
                if(self.creation_op == "sub"):
                    self.creators[0].backward(Tensor(self.grad.data), self)
                    self.creators[1].backward(Tensor(self.grad.__neg__().data), self)

                if(self.creation_op == "mul"):
                    new = self.grad * self.creators[1]
                    self.creators[0].backward(new , self)
                    new = self.grad * self.creators[0]
                    self.creators[1].backward(new, self)                    
                    
                if(self.creation_op == "mm"):
                    c0 = self.creators[0]
                    c1 = self.creators[1]
                    new = self.grad.mm(c1.transpose())
                    c0.backward(new)
                    new = self.grad.transpose().mm(c0).transpose()
                    c1.backward(new)
                    
                if(self.creation_op == "transpose"):
                    self.creators[0].backward(self.grad.transpose())

                if("sum" in self.creation_op):
                    dim = int(self.creation_op.split("_")[1])
                    self.creators[0].backward(self.grad.expand(dim,
                                                               self.creators[0].data.shape[dim]))

                if("expand" in self.creation_op):
                    dim = int(self.creation_op.split("_")[1])
                    self.creators[0].backward(self.grad.sum(dim))
                    
                if(self.creation_op == "neg"):
                    self.creators[0].backward(self.grad.__neg__())
                    
                if(self.creation_op == "sigmoid"):
                    ones = Tensor(np.ones_like(self.grad.data))
                    self.creators[0].backward(self.grad * (self * (ones - self)))
                
                if(self.creation_op == "tanh"):
                    ones = Tensor(np.ones_like(self.grad.data))
                    self.creators[0].backward(self.grad * (ones - (self * self)))
                    
    def __add__(self, other):
        if(self.autograd and other.autograd):
            return Tensor(self.data + other.data,
                          autograd=True,
                          creators=[self,other],
                          creation_op="add")
        return Tensor(self.data + other.data)

    def __neg__(self):
        if(self.autograd):
            return Tensor(self.data * -1,
                          autograd=True,
                          creators=[self],
                          creation_op="neg")
        return Tensor(self.data * -1)
    
    def __sub__(self, other):
        if(self.autograd and other.autograd):
            return Tensor(self.data - other.data,
                          autograd=True,
                          creators=[self,other],
                          creation_op="sub")
        return Tensor(self.data - other.data)
    
    def __mul__(self, other):
        if(self.autograd and other.autograd):
            return Tensor(self.data * other.data,
                          autograd=True,
                          creators=[self,other],
                          creation_op="mul")
        return Tensor(self.data * other.data)    

    def sum(self, dim):
        if(self.autograd):
            return Tensor(self.data.sum(dim),
                          autograd=True,
                          creators=[self],
                          creation_op="sum_"+str(dim))
        return Tensor(self.data.sum(dim))
    
    def expand(self, dim,copies):

        trans_cmd = list(range(0,len(self.data.shape)))
        trans_cmd.insert(dim,len(self.data.shape))
        new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)
        
        if(self.autograd):
            return Tensor(new_data,
                          autograd=True,
                          creators=[self],
                          creation_op="expand_"+str(dim))
        return Tensor(new_data)
    
    def transpose(self):
        if(self.autograd):
            return Tensor(self.data.transpose(),
                          autograd=True,
                          creators=[self],
                          creation_op="transpose")
        
        return Tensor(self.data.transpose())
    
    def mm(self, x):
        if(self.autograd):
            return Tensor(self.data.dot(x.data),
                          autograd=True,
                          creators=[self,x],
                          creation_op="mm")
        return Tensor(self.data.dot(x.data))
    
    def sigmoid(self):
        if(self.autograd):
            return Tensor(1 / (1 + np.exp(-self.data)),
                          autograd=True,
                          creators=[self],
                          creation_op="sigmoid")
        return Tensor(1 / (1 + np.exp(-self.data)))

    def tanh(self):
        if(self.autograd):
            return Tensor(np.tanh(self.data),
                          autograd=True,
                          creators=[self],
                          creation_op="tanh")
        return Tensor(np.tanh(self.data))
        
    
    def __repr__(self):
        return str(self.data.__repr__())
    
    def __str__(self):
        return str(self.data.__str__())  
    
class SGD(object):
    def __init__(self, parameters, alpha = 0.1):
        self.parameters = parameters
        self.alpha = alpha
    
    def zero(self):
        for p in self.parameters:
            p.grad.data *= 0
            
    def step(self, zero = True):
        for p in self.parameters:
            p.data -= p.grad.data * self.alpha
            if(zero):
                p.grad.data *= 0
                
class Layer(object):
    def __init__(self):
        self.parameters = list()
        
    def get_parameters(self):
        return self.parameters
    
class Linear(Layer):
    def __init__(self, n_inputs, n_outputs):
        super().__init__()
        w = np.random.randn(n_inputs, n_outputs) * np.sqrt(2/(n_inputs))
        self.weight = Tensor(w, autograd = True)
        self.bias = Tensor(np.zeros(n_outputs), autograd = True)    
        self.parameters.append(self.weight)
        self.parameters.append(self.bias)
            
    def forward(self, input):
        return input.mm(self.weight) + self.bias.expand(0, len(input.data))
    
class Sequential(Layer):
    def __init__(self, layers = list()):
        super().__init__()
        self.layers = layers
            
    def add(self, layer):
        self.layers.append(layer)
            
    def forward(self, input):
        for layer in self.layers:
            input = layer.forward(input)
        return input
        
    def get_parameters(self):
        params = list()
        for l in self.layers:
            params += l.get_parameters()
        return params
        
class MSELoss(Layer):
    def __init__(self):
        super().__init__()
    
    def forward(self, pred, target):
        return ((pred - target)*(pred - target)).sum(0)
    
class Tanh(Layer):
    def __init__(self):
        super().__init__()
    
    def forward(self, input):
        return input.tanh()
    
class Sigmoid(Layer):
    def __init__(self):
        super().__init__()
        
    def forward(self, input):
        return input.sigmoid()

In [2]:
a = Tensor([1,2,3,4,5], autograd = True)
b = Tensor([2,2,2,2,2], autograd = True)
c = Tensor([5,4,3,2,1], autograd = True)

d = a + (-b)
e = (-b) + c
f = d + e

f.backward(Tensor(np.array([1,1,1,1,1])))

print(a.grad.data)
print(b.grad.data)
print(c.grad.data)
print(d.grad.data)
print(e.grad.data)
print(f.grad.data)

[1 1 1 1 1]
[-2 -2 -2 -2 -2]
[1 1 1 1 1]
[1 1 1 1 1]
[1 1 1 1 1]
[1 1 1 1 1]


In [3]:
np.random.seed(1)

data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

weights = list()
weights.append(Tensor(np.random.rand(2,3), autograd=True))
weights.append(Tensor(np.random.rand(3,1), autograd=True))

for i in range(10):
    pred = data.mm(weights[0]).mm(weights[1])
    loss = ((pred - target)*(pred - target)).sum(0)
    loss.backward(Tensor(np.ones_like(loss.data)))
    
    for w in weights:
        w.data -= w.grad.data * 0.1
        w.grad.data *= 0

    print(loss)

[1.12427324]
[0.64112616]
[0.44318917]
[0.31387083]
[0.20897697]
[0.12969368]
[0.07493533]
[0.04045744]
[0.02057729]
[0.00996295]


In [4]:
np.random.seed(1)

data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

weights = list()
weights.append(Tensor(np.random.rand(2,3), autograd=True))
weights.append(Tensor(np.random.rand(3,1), autograd=True))

optim = SGD(parameters=weights, alpha=0.1)

for i in range(10):
    pred = data.mm(weights[0]).mm(weights[1])
    loss = ((pred - target)*(pred - target)).sum(0)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()

    print(loss)

[1.12427324]
[0.64112616]
[0.44318917]
[0.31387083]
[0.20897697]
[0.12969368]
[0.07493533]
[0.04045744]
[0.02057729]
[0.00996295]


In [5]:
data = Tensor(np.array([[0,0], [0,1], [1,0], [1,1]]), autograd = True)
target = Tensor(np.array([[0], [1], [0], [1]]), autograd = True)

model = Sequential([Linear(2,3), Linear(3,1)])
optim = SGD(parameters = model.get_parameters(), alpha = 0.05)

for i in range(10):
    pred = model.forward(data)
    loss = ((pred - target) * (pred - target)).sum(0)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
    print(loss)

[0.58851814]
[0.53258841]
[4.65643674]
[35.63491234]
[36.18671332]
[99.85775706]
[39.46443576]
[10060.88863906]
[2.96794571e+09]
[1.06242054e+26]


In [6]:
data = Tensor(np.array([[0,0], [0,1], [1,0], [1,1]]), autograd = True)
target = Tensor(np.array([[0], [1], [0], [1]]), autograd = True)

model = Sequential([Linear(2,3), Linear(3,1)])
optim = SGD(parameters = model.get_parameters(), alpha = 0.05)
criterion = MSELoss()

for i in range(10):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
    print(loss)

[2.32867133]
[0.38326249]
[0.17697789]
[0.12255879]
[0.09078094]
[0.06911117]
[0.05373838]
[0.04245233]
[0.03390907]
[0.02728052]


In [7]:
data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

model = Sequential([Linear(2,3), Tanh(), Linear(3,1), Sigmoid()])
criterion = MSELoss()

optim = SGD(parameters=model.get_parameters(), alpha=1)

for i in range(10):
    pred = model.forward(data)
    loss = criterion.forward(pred, target)
    loss.backward(Tensor(np.ones_like(loss.data)))
    optim.step()
    print(loss)

[1.18121562]
[0.77282441]
[0.54610058]
[0.35495787]
[0.23000602]
[0.15698963]
[0.11390411]
[0.08712864]
[0.06943514]
[0.0570973]
