In [1]:
#The purpose of this notebook is to create a tensor class for use with neural network training

In [2]:
import numpy as np

class Tensor (object):
    '''Defines a tensor for use with neural networks'''
    def __init__(self, data, creators = None, creation_op = None):
        self.data = np.array(data)
        self.creation_op = creation_op #String to store the instructions used by creators in the creation process
        self.creators = creators #List of tensors used in the creation of the current tensor
        self.grad = None
            
    def backward(self, grad=None, grad_origin=None):
        self.grad = grad
        
        if(self.creation_op == 'add'):
            self.creators[0].backward(grad)
            self.creators[1].backward(grad)
        
        
        
    def __add__(self, other): #Adds a tensor
        return Tensor(self.data + other.data,
                     creators = [self,other],
                     creation_op = 'add')
    
    def __repr__(self): 
        return str(self.data.__repr__())
    
    def __str__(self):
        return str(self.data.__str__())


In [19]:
x = Tensor([1,2,3,4,5])
#print(x)
y = x + x
#print(y)

[1 2 3 4 5]
[ 2  4  6  8 10]


In [21]:
x = Tensor([1,2,3,4,5])
y = Tensor([2,2,2,2,2])
z = x + y #Create a new computation graph with 3 nodes, 2 edges
z.backward(Tensor(np.array([1,1,1,1,1]))) #send the gradient for x and y from z through backprop and set that as grad

#print('{}\n{}\n{}\n{}'.format(x.grad,y.grad,z.creators,z.creation_op))

[1 1 1 1 1]
[1 1 1 1 1]
[array([1, 2, 3, 4, 5]), array([2, 2, 2, 2, 2])]
add


In [79]:
#Overwrite previous tensor
class Tensor (object):
    '''Defines a tensor for use with neural networks'''
    def __init__(self, data, 
                 autograd = False,
                 creators = None, 
                 creation_op = None,
                id = None):
        
        self.data = np.array(data)
        self.creation_op = creation_op #String to store the instructions used by creators in the creation process
        self.creators = creators #List of tensors used in the creation of the current tensor
        self.grad = None #Assign the current gradient
        self.autograd = autograd #Automated gradient update
        self.children = {} #Dictionary of all child nodes. Updated later
        if(id is None):
            id = np.random.randint(0,100000) 
        self.id = id #Assign random ID if necessary
        
        if(creators is not None): #If this is not the root node
            for c in creators: #For each parent node 
                if(self.id not in c.children): #if the ID is not in the child ID
                    c.children[self.id] = 1 #Update the list of children for those tensors to include this tensor
                else:
                    c.children[self.id] += 1 #Increment that list to keep track of number of child tensors
        
    def all_children_grads_accounted_for(self):
        '''Compute whether a tensor has recieved all gradients from all its children'''
        for id,cnt in self.children.items():
            if(cnt != 0):
                return False #Count if there are gradients in child nodes
        return True
            
    def backward(self, grad=None, grad_origin = None):
        if(self.autograd): #If the autogradient needs to be computed
            
            if(grad is None):
                grad = Tensor(np.ones_like(self.data))
            
            if(grad_origin is not None): #If there is an origin tensorflow to this backprop
                if(self.children[grad_origin.id] == 0): #If there are no children corresponding to the ID of the origin
                    raise Exception("Cannot backprop multiple times") #Check if you can backprop or you need a gradient
                else:
                    self.children[grad_origin.id] -= 1 #Otherwise, decrement the ID of the gradient origin
                    
            if(self.grad is None):
                self.grad = grad #Apply the new gradient directly to the current gradient, overwriting
            else:
                self.grad += grad #Otherwise, add the new gradient to the current gradient
                
            if(self.creators is not None and #If there are no creators of this tensor and
              (self.all_children_grads_accounted_for() or #There are no items in the children dictionary
              grad_origin is None)): #There is no origin from which the gradient propogated
                
                if(self.creation_op == 'add'): #Compute the addition of the gradients
                    self.creators[0].backward(grad)
                    self.creators[1].backward(grad)
                    
                if(self.creation_op == 'neg'):
                    self.creators[0].backward(self.grad.__neg__())
                    
                if(self.creation_op == "sub"):
                    new = Tensor(self.grad.data)
                    self.creators[0].backward(new, self)
                    new = Tensor(self.grad.__neg__().data)
                    self.creators[1].backward(new, self)

                if(self.creation_op == "mul"):
                    new = self.grad * self.creators[1]
                    self.creators[0].backward(new , self)
                    new = self.grad * self.creators[0]
                    self.creators[1].backward(new, self)

                if(self.creation_op == "mm"):
                    act = self.creators[0]             #Output of the activation function       
                    weights = self.creators[1]         #assign the weights
                    new = self.grad.mm(weights.transpose())
                    act.backward(new)
                    new = self.grad.transpose().mm(act).transpose()
                    weights.backward(new)

                if(self.creation_op == "transpose"):
                    self.creators[0].backward(self.grad.transpose())

                if("sum" in self.creation_op):
                    dim = int(self.creation_op.split("_")[1])
                    ds = self.creators[0].data.shape[dim]
                    self.creators[0].backward(self.grad.expand(dim,ds))

                if("expand" in self.creation_op):
                    dim = int(self.creation_op.split("_")[1])
                    self.creators[0].backward(self.grad.sum(dim))
                    
                if(self.creation_op == 'sigmoid'):
                    ones = Tensor(np.ones_like(self.grad.data))
                    self.creators[0].backward(self.grad * (self * (ones - self)))
                    
                if(self.creation_op == 'tanh'):
                    ones = Tensor(np.ones_like(self.grad.data))
                    self.creators[0].backward(self.grad * (self * (ones - self)))
                    
                if(self.creation_op == "index_select"):
                    new_grad = np.zeros_like(self.creators[0].data)
                    indices_ = self.index_select_indices.data.flatten()
                    grad_ = grad.data.reshape(len(indices_), -1)
                    for i in range(len(indices_)):
                        new_grad[indices_[i]] += grad_[i]
                    self.creators[0].backward(Tensor(new_grad))
                    
                if(self.creation_op == "cross_entropy"):
                    dx = self.softmax_output - self.target_dist
                    self.creators[0].backward(Tensor(dx))
        
    def __add__(self, other): #Adds a tensor
        if(self.autograd and other.autograd):
            return Tensor(self.data + other.data,
                          autograd = True,
                         creators = [self,other],
                         creation_op = 'add')
        return Tensor(self.data + other.data) #Return a regular Tensor addition
    
    def __neg__(self):
        if(self.autograd):
            return Tensor(self.data * -1,
                         autograd = True,
                         creators = [self],
                         creation_op = 'neg')
        
        return Tensor(self.data * -1)
    
    def __sub__(self,other):
        if(self.autograd and other.autograd):
            return Tensor(self.data - other.data,
                         autograd = True, 
                         creators = [self,other],
                         creation_op = 'sub')
        return Tensor(self.data - other.data)
    
    def __mul__(self,other):
        if(self.autograd and other.autograd):
            return Tensor(self.data * other.data,
                         autograd = True, 
                         creators = [self,other],
                         creation_op = 'mul')
        return Tensor(self.data * other.data)
    
    
    def sum(self,dim):
        '''sums a tensor accross a single dimension'''
        if(self.autograd):
            return Tensor(self.data.sum(dim),
                         autograd = True, 
                         creators = [self],
                         creation_op = 'sum_' + str(dim))
        return Tensor(self.data.sum(dim))
    
    def expand(self, dim, copies):
        '''This furntion returns multiple copies of the tensor'''
        trans_cmd = list(range(0,len(self.data.shape)))
        trans_cmd.insert(dim,len(self.data.shape))
        new_shape = list(self.data.shape) + [copies]
        new_data = self.data.repeat(copies).reshape(new_shape)
        new_data = new_data.transpose(trans_cmd)
        
        
        if(self.autograd):
            return Tensor(new_data,
                         autograd = True, 
                         creators = [self],
                         creation_op = 'expand_'+str(dim))
        return Tensor(new_data)
    
    def transpose(self):
        '''transpose the tensor'''
        if(self.autograd):
            return Tensor(self.data.transpose(),
                         autograd = True, 
                         creators = [self],
                         creation_op = 'transpose')
        return Tensor(self.data.transpose())
    
    def mm(self,x):
        '''return dot product of the tensor and a scalar'''
        if(self.autograd):
            return Tensor(self.data.dot(x.data),
                         autograd = True,
                         creators = [self,x],
                         creation_op = 'mm')
        return Tensor(self.data.dot(x.data))
    
    def __repr__(self): 
        return str(self.data.__repr__())
    
    def __str__(self):
        return str(self.data.__str__())
    
    def sigmoid(self):
        if(self.autograd):
            return Tensor(1 / (1 + np.exp(-self.data)),
                         autograd = True,
                         creators = [self],
                         creation_op = 'sigmoid')
        return Tensor(1 / (1 + np.exp(-self.data)))
    
    def tanh(self):
        if(self.autograd):
            return Tensor(np.tanh(self.data),
                          autograd=True,
                          creators=[self],
                          creation_op="tanh")
        return Tensor(np.tanh(self.data))
    
    def index_select(self, indices):
        '''This function is used for embedding layers in order to support indexing of words'''
        if(self.autograd):
            new = Tensor(self.data[indices.data],
                         autograd=True,
                         creators=[self],
                         creation_op="index_select")
            new.index_select_indices = indices
            return new
        return Tensor(self.data[indices.data])
    
    def cross_entropy(self, target_indices):
        temp = np.exp(self.data)
        softmax_output = temp / np.sum(temp,
                                      axis = len(self.data.shape) - 1,
                                      keepdims = True)
        t = target_indices.data.flatten()
        p = softmax_output.reshape(len(t),-1)
        target_dist = np.eye(p.shape[1])[t]
        loss = -(np.log(p) * (target_dist)).sum(1).mean()

        if(self.autograd):
            out = Tensor(loss,
                            autograd=True,
                            creators=[self],
                            creation_op="cross_entropy")
            out.softmax_output = softmax_output
            out.target_dist = target_dist
            return out

        return Tensor(loss)

In [None]:
class Tensor (object):
    
    def __init__(self,data,
                 autograd=False,
                 creators=None,
                 creation_op=None,
                 id=None):
        
        self.data = np.array(data)
        self.autograd = autograd
        self.grad = None

        if(id is None):
            self.id = np.random.randint(0,1000000000)
        else:
            self.id = id
        
        self.creators = creators
        self.creation_op = creation_op
        self.children = {}
        
        if(creators is not None):
            for c in creators:
                if(self.id not in c.children):
                    c.children[self.id] = 1
                else:
                    c.children[self.id] += 1

    def all_children_grads_accounted_for(self):
        for id,cnt in self.children.items():
            if(cnt != 0):
                return False
        return True 
        
    def backward(self,grad=None, grad_origin=None):
        if(self.autograd):
 
            if(grad is None):
                grad = Tensor(np.ones_like(self.data))

            if(grad_origin is not None):
                if(self.children[grad_origin.id] == 0):
                    return
                    print(self.id)
                    print(self.creation_op)
                    print(len(self.creators))
                    for c in self.creators:
                        print(c.creation_op)
                    raise Exception("cannot backprop more than once")
                else:
                    self.children[grad_origin.id] -= 1

            if(self.grad is None):
                self.grad = grad
            else:
                self.grad += grad
            
            # grads must not have grads of their own
            assert grad.autograd == False
            
            # only continue backpropping if there's something to
            # backprop into and if all gradients (from children)
            # are accounted for override waiting for children if
            # "backprop" was called on this variable directly
            if(self.creators is not None and 
               (self.all_children_grads_accounted_for() or 
                grad_origin is None)):

                if(self.creation_op == "add"):
                    self.creators[0].backward(self.grad, self)
                    self.creators[1].backward(self.grad, self)
                    
                if(self.creation_op == "sub"):
                    self.creators[0].backward(Tensor(self.grad.data), self)
                    self.creators[1].backward(Tensor(self.grad.__neg__().data), self)

                if(self.creation_op == "mul"):
                    new = self.grad * self.creators[1]
                    self.creators[0].backward(new , self)
                    new = self.grad * self.creators[0]
                    self.creators[1].backward(new, self)                    
                    
                if(self.creation_op == "mm"):
                    c0 = self.creators[0]
                    c1 = self.creators[1]
                    new = self.grad.mm(c1.transpose())
                    c0.backward(new)
                    new = self.grad.transpose().mm(c0).transpose()
                    c1.backward(new)
                    
                if(self.creation_op == "transpose"):
                    self.creators[0].backward(self.grad.transpose())

                if("sum" in self.creation_op):
                    dim = int(self.creation_op.split("_")[1])
                    self.creators[0].backward(self.grad.expand(dim,
                                                               self.creators[0].data.shape[dim]))

                if("expand" in self.creation_op):
                    dim = int(self.creation_op.split("_")[1])
                    self.creators[0].backward(self.grad.sum(dim))
                    
                if(self.creation_op == "neg"):
                    self.creators[0].backward(self.grad.__neg__())
                    
                if(self.creation_op == "sigmoid"):
                    ones = Tensor(np.ones_like(self.grad.data))
                    self.creators[0].backward(self.grad * (self * (ones - self)))
                
                if(self.creation_op == "tanh"):
                    ones = Tensor(np.ones_like(self.grad.data))
                    self.creators[0].backward(self.grad * (ones - (self * self)))
                
                if(self.creation_op == "index_select"):
                    new_grad = np.zeros_like(self.creators[0].data)
                    indices_ = self.index_select_indices.data.flatten()
                    grad_ = grad.data.reshape(len(indices_), -1)
                    for i in range(len(indices_)):
                        new_grad[indices_[i]] += grad_[i]
                    self.creators[0].backward(Tensor(new_grad))
                    
                if(self.creation_op == "cross_entropy"):
                    dx = self.softmax_output - self.target_dist
                    self.creators[0].backward(Tensor(dx))
                    
    def __add__(self, other):
        if(self.autograd and other.autograd):
            return Tensor(self.data + other.data,
                          autograd=True,
                          creators=[self,other],
                          creation_op="add")
        return Tensor(self.data + other.data)

    def __neg__(self):
        if(self.autograd):
            return Tensor(self.data * -1,
                          autograd=True,
                          creators=[self],
                          creation_op="neg")
        return Tensor(self.data * -1)
    
    def __sub__(self, other):
        if(self.autograd and other.autograd):
            return Tensor(self.data - other.data,
                          autograd=True,
                          creators=[self,other],
                          creation_op="sub")
        return Tensor(self.data - other.data)
    
    def __mul__(self, other):
        if(self.autograd and other.autograd):
            return Tensor(self.data * other.data,
                          autograd=True,
                          creators=[self,other],
                          creation_op="mul")
        return Tensor(self.data * other.data)    

    def sum(self, dim):
        if(self.autograd):
            return Tensor(self.data.sum(dim),
                          autograd=True,
                          creators=[self],
                          creation_op="sum_"+str(dim))
        return Tensor(self.data.sum(dim))
    
    def expand(self, dim,copies):

        trans_cmd = list(range(0,len(self.data.shape)))
        trans_cmd.insert(dim,len(self.data.shape))
        new_data = self.data.repeat(copies).reshape(list(self.data.shape) + [copies]).transpose(trans_cmd)
        
        if(self.autograd):
            return Tensor(new_data,
                          autograd=True,
                          creators=[self],
                          creation_op="expand_"+str(dim))
        return Tensor(new_data)
    
    def transpose(self):
        if(self.autograd):
            return Tensor(self.data.transpose(),
                          autograd=True,
                          creators=[self],
                          creation_op="transpose")
        
        return Tensor(self.data.transpose())
    
    def mm(self, x):
        if(self.autograd):
            return Tensor(self.data.dot(x.data),
                          autograd=True,
                          creators=[self,x],
                          creation_op="mm")
        return Tensor(self.data.dot(x.data))
    
    def sigmoid(self):
        if(self.autograd):
            return Tensor(1 / (1 + np.exp(-self.data)),
                          autograd=True,
                          creators=[self],
                          creation_op="sigmoid")
        return Tensor(1 / (1 + np.exp(-self.data)))

    def tanh(self):
        if(self.autograd):
            return Tensor(np.tanh(self.data),
                          autograd=True,
                          creators=[self],
                          creation_op="tanh")
        return Tensor(np.tanh(self.data))
    
    def index_select(self, indices):

        if(self.autograd):
            new = Tensor(self.data[indices.data],
                         autograd=True,
                         creators=[self],
                         creation_op="index_select")
            new.index_select_indices = indices
            return new
        return Tensor(self.data[indices.data])
    
    def softmax(self):
        temp = np.exp(self.data)
        softmax_output = temp / np.sum(temp,
                                       axis=len(self.data.shape)-1,
                                       keepdims=True)
        return softmax_output
    
    def cross_entropy(self, target_indices):

        temp = np.exp(self.data)
        softmax_output = temp / np.sum(temp,
                                       axis=len(self.data.shape)-1,
                                       keepdims=True)
        
        t = target_indices.data.flatten()
        p = softmax_output.reshape(len(t),-1)
        target_dist = np.eye(p.shape[1])[t]
        loss = -(np.log(p) * (target_dist)).sum(1).mean()
    
        if(self.autograd):
            out = Tensor(loss,
                         autograd=True,
                         creators=[self],
                         creation_op="cross_entropy")
            out.softmax_output = softmax_output
            out.target_dist = target_dist
            return out

        return Tensor(loss)
        
    
    def __repr__(self):
        return str(self.data.__repr__())
    
    def __str__(self):
        return str(self.data.__str__())  

In [61]:
#Stochastic gradient descent computer
class SGD(object):
    
    def __init__(self,parameters, alpha = 0.1):
        self.parameters = parameters #Define the list of parameters to be optimized
        self.alpha = alpha
        
    def zero(self):
        for p in self.parameters:
            p.grad.data *= 0 #For each parameter, set to 0
            
    def step(self,zero = True):
        for p in self.parameters:
            p.data -= p.grad.data * self.alpha #For each parameter, step using the gradient
            
            if(zero):
                p.grad.data *= 0
                


In [4]:
#Define linear and sequential layers
class Layer(object):

    def __init__(self):
        self.parameters = list()

    def get_parameters(self):
        return self.parameters 


class Linear(Layer):

    def __init__(self, n_inputs, n_outputs, bias = True):
        super().__init__()
        
        self.use_bias = bias
        
        W = np.random.randn(n_inputs, n_outputs)*np.sqrt(2.0/(n_inputs))
        self.weight = Tensor(W, autograd=True)
        if(self.use_bias):
            self.bias = Tensor(np.zeros(n_outputs), autograd=True)

        self.parameters.append(self.weight)
        if(self.use_bias):
            self.parameters.append(self.bias)

    def forward(self, input):
        if(self.use_bias):
            return input.mm(self.weight)+self.bias.expand(0,len(input.data))
        return input.mm(self.weight)
    
class Sequential(Layer):

    def __init__(self, layers=list()):
        super().__init__()

        self.layers = layers

    def add(self, layer):
        self.layers.append(layer)

    def forward(self, input):
        for layer in self.layers:
            input = layer.forward(input)
        return input

    def get_parameters(self):
        params = list()
        for l in self.layers:
            params += l.get_parameters()
        return params
    
class RNNCell(Layer):
    
    def __init__(self, n_inputs,n_hidden,n_output,activation='sigmoid'):
        super().__init__()

        self.n_inputs = n_inputs
        self.n_hidden = n_hidden
        self.n_output = n_output

        if(activation == 'sigmoid'):
            self.activation = Sigmoid()
        elif(activation == 'tanh'):
            self.activation == Tanh()
        else:
            raise Exception("Non-linearity not found")

        self.w_ih = Linear(n_inputs, n_hidden)
        self.w_hh = Linear(n_hidden, n_hidden)
        self.w_ho = Linear(n_hidden, n_output)

        self.parameters += self.w_ih.get_parameters()
        self.parameters += self.w_hh.get_parameters()
        self.parameters += self.w_ho.get_parameters()

    def forward(self, input, hidden):
        from_prev_hidden = self.w_hh.forward(hidden)
        combined = self.w_ih.forward(input) + from_prev_hidden
        new_hidden = self.activation.forward(combined)
        output = self.w_ho.forward(new_hidden)
        return output, new_hidden

    def init_hidden(self, batch_size=1):
        return Tensor(np.zeros((batch_size,self.n_hidden)),autograd=True)
    
class LSTMCell(Layer):
    '''
    This cell creates the next hidden state by copying the previous hidden
    state, then adding or removing information through gates
    LSTM has two hidden states, denoted by 'h' and 'cell'
    f is a forget gate, where if f == 0, the cell forgets what it saw
    i is the weight to multiply new information by
    o is the output gate to control the output prediction based on the cell
    if o is a matrix of 0s, predictions will ignore the cell state
    u is the update matrix
    using these gates, information can be stored in the cell without calling
    matrix multiplication to weights and the cell
    LSTM can store information across time without vanishing gradients
    '''
    def __init__(self,n_inputs,n_hidden, n_output):
        super().__init__()
        
        self.n_inputs = n_inputs
        self.n_hidden = n_hidden
        self.n_output = n_output
        self.xf = Linear(n_inputs, n_hidden)
        self.xi = Linear(n_inputs, n_hidden)
        self.xo = Linear(n_inputs, n_hidden)
        self.xc = Linear(n_inputs, n_hidden)
        self.hf = Linear(n_hidden, n_hidden, bias=False)
        self.hi = Linear(n_hidden, n_hidden, bias=False)
        self.ho = Linear(n_hidden, n_hidden, bias=False)
        self.hc = Linear(n_hidden, n_hidden, bias=False)
        
        self.w_ho = Linear(n_hidden, n_output, bias=False)
        
        self.parameters += self.xf.get_parameters()
        self.parameters += self.xi.get_parameters()
        self.parameters += self.xo.get_parameters()
        self.parameters += self.xc.get_parameters()
        self.parameters += self.hf.get_parameters()
        self.parameters += self.hi.get_parameters()
        self.parameters += self.ho.get_parameters()
        self.parameters += self.hc.get_parameters()
        
        self.parameters += self.w_ho.get_parameters()
        
    def forward(self, input, hidden):

        prev_hidden = hidden[0]
        prev_cell = hidden[1]

        f=(self.xf.forward(input)+self.hf.forward(prev_hidden)).sigmoid()
        i=(self.xi.forward(input)+self.hi.forward(prev_hidden)).sigmoid()
        o=(self.xo.forward(input)+self.ho.forward(prev_hidden)).sigmoid()
        g = (self.xc.forward(input) +self.hc.forward(prev_hidden)).tanh()
        c = (f * prev_cell) + (i * g)
        h = o * c.tanh()

        output = self.w_ho.forward(h)
        return output, (h, c)

    def init_hidden(self, batch_size=1):
        h = Tensor(np.zeros((batch_size, self.n_hidden)), autograd=True)
        c = Tensor(np.zeros((batch_size, self.n_hidden)), autograd=True)
        h.data[:,0] += 1
        c.data[:,0] += 1
        return (h, c)
    
class MSELoss(Layer):
    '''Computes mean squared error'''
    def __init__(self):
        super().__init__()

    def forward(self, pred, target):
        return ((pred - target)*(pred - target)).sum(0)
    
class Tanh(Layer):
    def __init__(self):
        super().__init__()
        
    def forward(self,input):
        return input.tanh()
    
class Sigmoid(Layer):
    def __init__(self):
        super().__init__()
        
    def forward(self,input):
        return input.sigmoid()
    
class Embedding(Layer):
    '''Creates word embedings'''
    def __init__(self, vocab_size, dim):
        super().__init__()

        self.vocab_size = vocab_size
        self.dim = dim

        weight = (np.random.rand(vocab_size, dim) - 0.5) / dim
        self.weight = Tensor(weight,autograd = True)
        
        self.parameters.append(self.weight)
        
    def forward(self,input):
        return self.weight.index_select(input)

class CrossEntropyLoss(object):

    def __init__(self):
        super().__init__()

    def forward(self, input, target):
        return input.cross_entropy(target)

TypeError: __init_subclass__() takes no keyword arguments

In [72]:
a = Tensor([1,2,3,4,5], autograd=True)
b = Tensor([2,2,2,2,2], autograd=True)
c = Tensor([5,4,3,2,1], autograd=True)

d = a + b
e = b + c
f = d + e

f.backward(Tensor(np.array([1,1,1,1,1])))

#print(b.grad.data == np.array([2,2,2,2,2]))

[ True  True  True  True  True]


In [73]:
a = Tensor([1,2,3,4,5], autograd=True)
b = Tensor([2,2,2,2,2], autograd=True)
c = Tensor([5,4,3,2,1], autograd=True)

d = a + (-b)
e = (-b) + c
f = d + e

f.backward(Tensor(np.array([1,1,1,1,1])))

#print(b.grad.data == np.array([-2,-2,-2,-2,-2]))

[ True  True  True  True  True]


In [74]:
#Backprop on a toy network
np.random.seed(0)

data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

model = Sequential([Linear(2,3), Tanh(), Linear(3,1), Sigmoid()])
criterion = MSELoss()

optim = SGD(parameters=model.get_parameters(), alpha=0.05)

for i in range(10):

    pred = model.forward(data)                         

    loss = criterion.forward(pred, target)    

    loss.backward(Tensor(np.ones_like(loss.data)))     
    optim.step()
    #print(loss)

[1.06372865]
[1.04544331]
[1.02758008]
[1.01015346]
[0.9931742]
[0.97664928]
[0.96058196]
[0.944972]
[0.92981586]
[0.91510699]


In [78]:
#Backprop with embedding
data = Tensor(np.array([1,2,1,2]), autograd=True)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)

embed = Embedding(5,3)
model = Sequential([embed, Tanh(), Linear(3,1), Sigmoid()])
criterion = MSELoss()

optim = SGD(parameters=model.get_parameters(), alpha=0.5)

for i in range(10):

    pred = model.forward(data)                        

    loss = criterion.forward(pred, target)            

    loss.backward(Tensor(np.ones_like(loss.data)))    
    optim.step()
    #print(loss)

[1.1827452]
[1.21674946]
[1.26244409]
[1.31590338]
[1.36587375]
[1.3895886]
[1.36098655]
[1.27380658]
[1.14851184]
[1.01118198]


In [81]:
#Backprop with crossentropy
# data indices
data = Tensor(np.array([1,2,1,2]), autograd=True)

# target indices
target = Tensor(np.array([0,1,0,1]), autograd=True)

model = Sequential([Embedding(3,3), Tanh(), Linear(3,4)])
criterion = CrossEntropyLoss()

optim = SGD(parameters=model.get_parameters(), alpha=0.1)

for i in range(10):

    pred = model.forward(data)                      

    loss = criterion.forward(pred, target)          

    loss.backward(Tensor(np.ones_like(loss.data)))  
    optim.step()
    #print(loss)

1.3642176502352554
1.2649510935479185
1.1854405915193045
1.1212570573855833
1.0689324514831613
1.0257911051028679
0.989777709046456
0.9593079654876886
0.9331490127008373
0.9103276766315664
