In [15]:
import numpy as np
from scipy.stats import bernoulli

In [16]:
class Module(object):
    
    def __init__ (self):
        self.output = None
        self.gradInput = None
    
    def forward(self, input):
        return self.updateOutput(input)

    def backward(self, input, gradOutput):
        self.updateGradInput(input, gradOutput)
        self.accGradParameters(input, gradOutput)
        return self.gradInput
    
    def updateOutput(self, input):
        
        pass

    def updateGradInput(self, input, gradOutput):
        
        pass   
    
    def accGradParameters(self, input, gradOutput):
        pass
    
    def zeroGradParameters(self): 
        pass
        
    def getParameters(self):
        return []
        
    def getGradParameters(self):
        return []
    
    def __repr__(self):
        return "Module"

# Sequential container

In [29]:
class Sequential(Module):
    
    def __init__ (self):
        super(Sequential, self).__init__()
        self.modules = []
   
    def add(self, module):
        self.modules.append(module)

    def updateOutput(self, input):
        if len(self.modules) == 0:
            self.output = input
        else:
            y = self.modules[0].forward(input)
            for m in self.modules[1:]:
                y = m.forward(y)
            self.output = y
        
        return self.output

    def backward(self, input, gradOutput):       
        if len(self.modules) == 0:
            self.gradInput = input
        else:
            if len(self.modules) > 1:
                g = self.modules[-1].backward(self.modules[-2].output, gradOutput)
                for i in range(len(self.modules) - 2, 0, -1):
                    g = self.modules[i].backward(self.modules[i-1].output, g)
                g = self.modules[0].backward(input, g)
            else:
                g = self.modules[0].backward(input, gradOutput) 
            self.gradInput = g
        
        return self.gradInput
      

    def zeroGradParameters(self): 
        for module in self.modules:
            module.zeroGradParameters()
    
    def getParameters(self):
        return [x.getParameters() for x in self.modules]
    
    def getGradParameters(self):
        return [x.getGradParameters() for x in self.modules]
    
    def __repr__(self):
        string = "".join([str(x) + '\n' for x in self.modules])
        return string
    
    def __getitem__(self,x):
        return self.modules.__getitem__(x)

# Layers

In [18]:
class Linear(Module):
    def __init__(self, n_in, n_out):
        super(Linear, self).__init__()
        stdv = 1./np.sqrt(n_in)
        self.W = np.random.uniform(-stdv, stdv, size = (n_out, n_in))
        self.b = np.random.uniform(-stdv, stdv, size = n_out)
        self.gradW = np.zeros_like(self.W)
        self.gradb = np.zeros_like(self.b)
        
    def updateOutput(self, input):
        self.output = np.zeros((input.shape[0], self.W.shape[0]))
        for i in range(input.shape[0]):
            self.output[i] = (self.W).dot(input[i].T) + self.b
        return self.output
    
    def updateGradInput(self, input, gradOutput):
        self.gradInput = gradOutput.dot(self.W)
        return self.gradInput
    
    def accGradParameters(self, input, gradOutput):
        self.gradW = gradOutput.T.dot(input)
        self.gradb = np.sum(gradOutput, axis = 0)
    
    def zeroGradParameters(self):
        self.gradW.fill(0)
        self.gradb.fill(0)
        
    def getParameters(self):
        return [self.W, self.b]
    
    def getGradParameters(self):
        return [self.gradW, self.gradb]
    
    def __repr__(self):
        s = self.W.shape
        q = 'Linear %d -> %d' %(s[1],s[0])
        return q

In [19]:
class SoftMax(Module):
    def __init__(self):
         super(SoftMax, self).__init__()
    
    def updateOutput(self, input):
        self.output = np.subtract(input, input.max(axis=1, keepdims=True))
        
        self.output = np.exp(self.output)
        self.output = self.output / np.sum(self.output, axis=1, keepdims=True)
        return self.output
    
    def updateGradInput(self, input, gradOutput):
        self.gradInput = np.zeros(input.shape)
        for i in range(input.shape[0]):
            g = np.zeros((input.shape[1], input.shape[1]))
            for j in range(input.shape[1]):
                for k in range(input.shape[1]):
                    g[j][k] = - self.output[i][j] * self.output[i][k]
                    if j == k:
                        g[j][j] += self.output[i][j]
            self.gradInput[i] = gradOutput[i].dot(g)
        return self.gradInput
    
    def __repr__(self):
        return "SoftMax"

In [20]:
class BatchMeanNormalization(Module):
    def __init__(self):
        super(BatchMeanNormalization, self).__init__()
        self.mean = 0

    def updateOutput(self, input):
        self.mean = (self.mean + np.mean(input, axis = 0))/2.
        if input.shape[0] == 1:
            self.output = input - self.mean
        else:
            self.output = input - np.mean(input, axis = 0)
        return self.output
    
    def updateGradInput(self, input, gradOutput):
        if input.shape[0] == 1:
            self.gradInput = gradOutput
        else:
            self.gradInput = gradOutput - np.mean(gradOutput, axis = 0)
        return self.gradInput
    
    def __repr__(self):
        return "BatchMeanNormalization"

In [21]:
class Dropout(Module):
    def __init__(self, p):
        super(Dropout, self).__init__()
        self.p = p
        self.mask = None
    
    def updateOutput(self, input):
        if input.shape[0] == 1:
            self.output = p * input
        else:    
            self.mask = bernoulli.rvs(self.p, size = input.shape)
            self.output = input * self.mask
        return  self.output
    
    def updateGradInput(self, input, gradOutput):
        self.gradInput = gradOutput * self.mask
        return self.gradInput
    
    def __repr__(self):
        return "Dropout with p = %d" %(self.p)

# Activation functions

In [22]:
class ReLU(Module):
    def __init__(self):
         super(ReLU, self).__init__()
    
    def updateOutput(self, input):
        self.output = np.maximum(input, 0)
        return self.output
    
    def updateGradInput(self, input, gradOutput):
        self.gradInput = np.multiply(gradOutput , input > 0)
        return self.gradInput
    
    def __repr__(self):
        return "ReLU"

In [23]:
class LeakyReLU(Module):
    def __init__(self, alpha):
        super(LeakyReLU, self).__init__()
        self.alpha = alpha
    
    def updateOutput(self, input):
        self.output = input * (input > 0) + self.alpha * input * (input < 0)
        return  self.output
    
    def updateGradInput(self, input, gradOutput):
        self.gradInput = np.multiply(gradOutput , input > 0) + self.alpha * np.multiply(gradOutput , input < 0)
        return self.gradInput
    
    def __repr__(self):
        return "LeakyReLU"

In [24]:
class ELU(Module):
    def __init__(self, alpha):
        super(ELU, self).__init__()
        self.alpha = alpha
    
    def updateOutput(self, input):
        self.output = input * (input > 0) + self.alpha * (np.exp(input) - 1) * (input < 0)
        return  self.output
    
    def updateGradInput(self, input, gradOutput):
        self.gradInput = np.multiply(gradOutput , input > 0) + gradOutput * self.alpha * np.exp(input) * (input < 0)
        return self.gradInput
    
    def __repr__(self):
        return "ELU"

In [25]:
class SoftPlus(Module):
    def __init__(self):
        super(SoftPlus, self).__init__()
    
    def updateOutput(self, input):
        self.output = np.log(1 + np.exp(input))
        return  self.output
    
    def updateGradInput(self, input, gradOutput):
        self.gradInput = gradOutput * 1./ (1 + np.exp(-input))
        return self.gradInput
    
    def __repr__(self):
        return "SoftPlus"

# Criterions

In [26]:
class Criterion:
    
    def forward(self, input, target):
        return self.updateOutput(input, target)

    def backward(self, input, target):
        return self.updateGradInput(input, target)
    
    def updateOutput(self, input, target):
        return self.output

    def updateGradInput(self, input, target):
        return self.gradInput   

    def __repr__(self):
        return "Criterion"

In [27]:
class MSECriterion(Criterion):
    def updateOutput(self, input, target):   
        self.output = np.sum(np.power(input - target,2)) / input.shape[0]
        return self.output 
 
    def updateGradInput(self, input, target):
        self.gradInput  = (input - target) / input.shape[0]
        return self.gradInput

    def __repr__(self):
        return "MSECriterion"

In [28]:
class ClassNLLCriterion(Criterion):
    def updateOutput(self, input, target):  
        self.output = -1./ input.shape[0] * np.sum(target * np.log(input))
        return self.output

    def updateGradInput(self, input, target):
        self.gradInput = -1./ input.shape[0] * target / input
        return self.gradInput
    
    def __repr__(self):
        return "ClassNLLCriterion"