In [2]:
class Module(object):
    
     def forward(self,*input):
        raise NotImplementedError
    
     def backward(self,*gradwrtoutput):
        raise NotImplementedError
    
     def param(self):
        return []

In [3]:
class Sequential(Module):
    
    def __init__(self, modules):
        self.modules = modules
        self.input = None
    
    def forward(self, input):
        self.input = input
        output = self.input
        
        for module in self.modules:
            output = module.forward(output)
        return output
    
    def backward(self,*gradwrtouput):
        gradient = gradwrtouput
        for module in reversed(self.modules):
            gradient = module.backward(gradient)
        self.input = None
        return gradient
    
    def param(self):
        params = []
        for module in self.modules:
            params.append(module.param())
        return params
            

In [4]:
def Optimizer(object):
    def step(self):
        return NotImplementedError
    
    def zero_grad(self):
        return NotImplementedError

In [1]:
def SGD(Optimizer):
    
    def __init__(self,params,lr):
        self.params = params
        self.lr = lr
        
    def step(self):
        for x, grad in self.params:
            x.add_(-self.lr * grad) ## .add_() =??
    
    def zero_grad(self):
        for x, grad in self.params:
            grad = grad.zero_()  ## .zero_() ??

In [2]:
def MSE(Module):
    
    def forward(self,input,target):
        self.input = input
        self.target = target
        return (self.input - self.target).pow(2).mean() ## .pow() and .mean() ??
    
    def backward(self,*gradwrtouput):
        return 2*gradwrtouput*(self.input - self.target).div_(self.input.size())

In [3]:
def Sigmoid(Module):
    
    def forward(self,input):
        self.input = input
        self.sigmoid = 1./(1+(-self.input).exp())
        return  self.sigmoid
    
    def backward(self,*gradwrtouput):
        return gradwrtouput*self.sigmoid*(1-self.sigmoid)
    

In [4]:
def ReLU(Module):
    def forward(self, input):
        self.input = input
        return (self.input>0.)*self.input
    
    def backward(self, *gradwrtouput):
        return gradwrtouput*(self.input>=0.)