In [1]:
import torch
from torch import Tensor
import numpy as np

# Auxiliary Classes

In [2]:
class layer_par:
    """
    This class contains parameters of each layer. We initialize them in constructor.
    
    inputs:
        dim_in      :  the input_ dimension of fully connected layer
        dim_out     :  the output_ dimension of fully connected layer
        
    returns:
        Nothing
    """
    
    def __init__(self, dim_in, dim_out):
        self.dim_in  = dim_in
        self.dim_out = dim_out
        self.b = Tensor(dim_out,1).fill_(0)         # bias of each layer
        self.w = Tensor(dim_out,dim_in).normal_()   # w of each layer
    # TODO: I suggest that we add activation to this as well
    
    
class forward_par:    # change the name to: "forward_par_of_layer
    """
    This class keeps track of all the variables produced in forward pass of some layer. i.e, x and s.
    
    inputs:
        layer       :  the layer number
        dim_in      :  the input_ dimension of fully connected layer
        dim_out     :  the output_ dimension of fully connected layer
        
    returns:
        Nothing
    """
    def __init__(self, dim_out):
        self.s = Tensor(dim_out).fill_(0)           # s after each layer
        self.x = Tensor(dim_out).fill_(0)           # x after each layer:   x = Activation (s)
    
        
class backward_par:   # or backward par
    """
    This class keeps track of all the variables we need to evaluate the damn gradients for each layer...
    
    inputs:
        dim_in      :  the input_ dimension of fully connected layer
        dim_out     :  the output_ dimension of fully connected layer
        
    returns:
        Nothing
    """
    def __init__(self, dim_in, dim_out):
        self.db = Tensor(dim_out,1).fill_(0)         # bias of each layer
        self.dw = Tensor(dim_out,dim_in).fill_(0)   # w of each layer
        self.ds = Tensor(dim_out).fill_(0)           # s after each layer
        self.dx = Tensor(dim_out).fill_(0)
        
        
class Linear:
    """
    An class that contains objects which only store layar's in/out connections dimension
    
    input_s:
        dim_in      :  the input_ dimension of fully connected layer
        dim_out     :  the output_ dimension of fully connected layer
        
    returns:
        Nothing 
    """
    
    def __init__(self,dim_in, dim_out):
        self.input_ = dim_in
        self.output_ = dim_out          
    # TODO: The linear is really wierd thing... all we get here is already in the upper class. we may omit this somehow

        
def Activation(code,input_):
    """
    A class that specify the needed activation with respect to the following code conversion
        0: Relu(x)
        1: Tanh(x)
        2: Sigmoid(x)
    
    This class works as functional package of pytorch
    
    input_s:
        code        :  the code for each activation (0,1,2)
        input__tensor:  the input_ tensor
        
    returns:
        result      :  the output_ of requested activation function with the same shape as input_ tensor
    """
    result = Tensor(input_.shape)
    # Relu
    if code ==0:
        result = input_ - (input_<0).float()*input_
    # Tanh
    elif code ==1:
        result = torch.tanh(input_)
    # Sig
    elif code ==2:
        result = 1.0/(1 + torch.exp(-input_))
    # error
    else: raise ValueError('Unknown Code For Activation')
        
    return result 


def dActivation(code,input_):
    """
    A class that specify the needed derivative of activation with the same encoding convenstion
        0: dRelu(x)
        1: dTanh(x)
        2: dSigmoid(x)
    
    This class works as functional package of pytorch
    
    input_s:
        code        :  the code for each activation (0,1,2)
        input__tensor:  the input_ tensor
        
    returns:
        result      :  the output_ of requested activation function with the same shape as input_ tensor
    """
    result = Tensor.new(input_)
    # dRelu
    if code ==0:
        result = Tensor(input_.shape).fill_(1.0) - (input_<0).float()*Tensor(input_.shape).fill_(1.0)
    # dTanh
    elif code ==1:
        result = 1-(torch.tanh(input_))**2
    # dSig
    elif code ==2:
        result = Activation(code,input_)*(1-Activation(code,input_))
    # error
    else: raise ValueError('Unknown Code For derivative of Activation')
    
    return result 



# Network Class

In [88]:
class Net:
    """
    The network class. It has the following methods:
        param      :  returns the parameter which is asked for. Not the data! The object... 
                        Data is accessible through object.data method)
        make_arch  :  makes the architecture of the network by taking a sequential list of [fc1,act1,fc2,act2,...]
    """
    
    #TODO:
    
    
    def __init__(self, seq=[]):
        self.param_list    = [] # Stores parameters of each layer (W,b).                               type: layer_par
        self.forward_list  = [] # Stores forward pass parameters of each layer (s,x).                  type: forward_par
        self.backward_list = [] # Stores backward pass parameters of each layer (grads) (ds,dx,dW,db)  type: backward_par
        
        self.n_layer = 0
        self.act_list = []      # stores the requested activation functions in codes. Elements are "0","1" or "2"
        self.make_arch(seq)     # makes the architecture based on the the list "seq"
        
        
    # a user-friendly-named method fo access w,b and s at each layer
    def get_param_of_layer(self,layer):  
        return self.param_list[layer]
    
    # a user-friendly-named method fo access grad w.r.t. w,b and s at each layer
    def get_forward_par_of_layer(self,layer):
        return self.forward_list[layer]
    
    # a user-friendly-named method fo access grad w.r.t. w,b and s at each layer
    def get_grad_of_layer(self,layer):
        return self.backward_list[layer]
        

    
    
    def make_arch(self,seq):
        
        seq_len = len(seq)                  # number of layer *2 (because of the activations...)
        self.n_layer = seq_len/2            # I just want to have it :)
        
        for layer in range (0,seq_len,2):
            
            # seq[layer] is an instance of object "Linear". Here we get the in/out dim of the layer
            dim_in, dim_out = seq[layer].input_ , seq[layer].output_ 
            
            # initialize the weights of layer
            self.param_list.append ( layer_par(dim_in, dim_out) ) 
            
            # activation recognition : encode activations in "act_list"
            if seq[layer+1]=='relu':
                self.act_list.append(0)
            elif seq[layer+1]=='tanh':
                self.act_list.append(1)
            elif seq[layer+1]=='sig':
                self.act_list.append(2)
            else: raise ValueError('Unknown Activation')
                
            
    def forward(self,x): 
        for layer, prm in enumerate(self.param_list):       # parameter = param[layer]  
            self.forward_list.append( forward_par( self.param_list[layer].dim_out) )
            
            s = (x.mm(prm.w.t()) + prm.b.t())     # written consistant for batch
            self.get_forward_par_of_layer(layer).s = s
            x = Activation(self.act_list[layer], s)    
            self.get_forward_par_of_layer(layer).x =x
            
        return x
    
    
    def backward (self):
        pass
        #for layer in range (self.n_layer-1,-1,-1):
            
            
            
    """# some aliasing to get rid of nastiness
    Forward  = get_forward_par_of_layer
    Backward = get_grad_of_layer
    Parameter= get_param_of_layer """   


# Loss and SGD

In [89]:
def loss(v, t):
    l_ = torch.sum(torch.pow(v-t,2))
    return l_

# Draft, tests, and other stuff

In [90]:
seq = [Linear(7,2),'tanh',Linear(2,3),'relu']    
model = Net(seq)

In [91]:
x = Tensor(7).normal_()
X = torch.cat((x.view(1,-1),x.view(1,-1)*3,x.view(1,-1)*x.view(1,-1)),0)
X


 0.2387  0.6157 -1.1335 -1.3465  0.7350 -0.0715 -0.0611
 0.7161  1.8471 -3.4005 -4.0396  2.2050 -0.2144 -0.1832
 0.0570  0.3791  1.2848  1.8132  0.5402  0.0051  0.0037
[torch.FloatTensor of size 3x7]

In [92]:
model.forward(X)


 1.1586  0.9911  0.7207
 1.2792  1.0473  0.7426
 0.0000  0.0000  0.0000
[torch.FloatTensor of size 3x3]

In [93]:
w1= model.get_param_of_layer(0).w
b1= model.get_param_of_layer(0).b
s1= model.get_forward_par_of_layer(0).s
x1= model.get_forward_par_of_layer(0).x
w2= model.get_param_of_layer(1).w
b2= model.get_param_of_layer(1).b
s2= model.get_forward_par_of_layer(1).s
x2= model.get_forward_par_of_layer(1).x

In [85]:
X,w1,b1

(
  0.3274  0.6671 -0.5155 -2.0677  1.7135  0.1942 -1.4277
  0.9822  2.0012 -1.5464  2.0000  5.1405  0.5827 -4.2832
  0.1072  0.4450  0.2657  4.2753  2.9361  0.0377  2.0384
 [torch.FloatTensor of size 3x7], 
  0.8610 -0.2548  0.4226  0.8903  0.3311 -2.4686  1.0081
 -1.5119 -0.2753  0.8157  1.5613 -0.0144 -0.3429 -1.2424
 [torch.FloatTensor of size 2x7], 
  0
  0
 [torch.FloatTensor of size 2x1])

In [86]:
x1,w2,b2

(
 -0.9973 -0.9900
 -1.0000 -1.0000
  1.0000  0.9994
 [torch.FloatTensor of size 3x2], 
 -0.1924  0.9060
 -0.6849 -0.4472
  2.2275  1.1698
 [torch.FloatTensor of size 3x2], 
  0
  0
  0
 [torch.FloatTensor of size 3x1])

In [97]:
x2,model.forward(X)

(
  1.1586  0.9911  0.7207
  1.2792  1.0473  0.7426
  0.0000  0.0000  0.0000
 [torch.FloatTensor of size 3x3], 
  1.1586  0.9911  0.7207
  1.2792  1.0473  0.7426
  0.0000  0.0000  0.0000
 [torch.FloatTensor of size 3x3])