In [1]:
import torch
from torch import Tensor
import numpy as np

# Auxiliary Classes

In [2]:
class layer_par:
    """
    This class contains parameters of each layer. We initialize them in constructor.
    
    inputs:
        dim_in      :  the input_ dimension of fully connected layer
        dim_out     :  the output_ dimension of fully connected layer
        
    returns:
        Nothing
    """
    
    def __init__(self, dim_in, dim_out):
        self.dim_in  = dim_in
        self.dim_out = dim_out
        self.b = Tensor(dim_out,1).fill_(0)         # bias of each layer
        self.w = Tensor(dim_out,dim_in).normal_()   # w of each layer
    # TODO: I suggest that we add activation to this as well
    
    
class forward_par:    # change the name to: "forward_par_of_layer
    """
    This class keeps track of all the variables produced in forward pass of some layer. i.e, x and s.
    
    inputs:
        layer       :  the layer number
        dim_in      :  the input_ dimension of fully connected layer
        dim_out     :  the output_ dimension of fully connected layer
        
    returns:
        Nothing
    """
    def __init__(self, dim_out, N):
        self.s = Tensor(N,dim_out).fill_(0)           # s after each layer
        self.x = Tensor(N,dim_out).fill_(0)           # x after each layer:   x = Activation (s)
        self.N = N
        
class backward_par:   # or backward par
    """
    This class keeps track of all the variables we need to evaluate the damn gradients for each layer...
    
    inputs:
        dim_in      :  the input_ dimension of fully connected layer
        dim_out     :  the output_ dimension of fully connected layer
        
    returns:
        Nothing
    """
    def __init__(self, dim_in, dim_out, N):
        self.db = Tensor(N, dim_out,1).fill_(0)         # bias of each layer
        self.dw = Tensor(N, dim_out,dim_in).fill_(0)    # w of each layer      # WARNING: out*in or in*out
        self.ds = Tensor(N, dim_out).fill_(0)           # s after each layer
        self.dx = Tensor(N, dim_out).fill_(0)
        
        
class Linear:
    """
    An class that contains objects which only store layar's in/out connections dimension
    
    input_s:
        dim_in      :  the input_ dimension of fully connected layer
        dim_out     :  the output_ dimension of fully connected layer
        
    returns:
        Nothing 
    """
    
    def __init__(self,dim_in, dim_out):
        self.input_ = dim_in
        self.output_ = dim_out          
    # TODO: The linear is really wierd thing... all we get here is already in the upper class. we may omit this somehow

        
def Activation(code,input_):
    """
    A class that specify the needed activation with respect to the following code conversion
        0: Relu(x)
        1: Tanh(x)
        2: Sigmoid(x)
    
    This class works as functional package of pytorch
    
    input_s:
        code        :  the code for each activation (0,1,2)
        input__tensor:  the input_ tensor
        
    returns:
        result      :  the output_ of requested activation function with the same shape as input_ tensor
    """
    result = Tensor(input_.shape)
    # Relu
    if code ==0:
        result = input_ - (input_<0).float()*input_
    # Tanh
    elif code ==1:
        result = torch.tanh(input_)
    # Sig
    elif code ==2:
        result = 1.0/(1 + torch.exp(-input_))
    # linear
    elif code ==3:
        result = input_
    # error
    else: raise ValueError('Unknown Code For Activation')
        
    return result 


def dActivation(code,input_):
    """
    A class that specify the needed derivative of activation with the same encoding convenstion
        0: dRelu(x)
        1: dTanh(x)
        2: dSigmoid(x)
    
    This class works as functional package of pytorch
    
    input_s:
        code        :  the code for each activation (0,1,2)
        input__tensor:  the input_ tensor
        
    returns:
        result      :  the output_ of requested activation function with the same shape as input_ tensor
    """
    result = Tensor.new(input_)
    # dRelu
    if code ==0:
        result = Tensor(input_.shape).fill_(1.0) - (input_<=0).float()*Tensor(input_.shape).fill_(1.0)
    # dTanh
    elif code ==1:
        result = 1-(torch.tanh(input_))**2
    # dSig
    elif code ==2:
        result = Activation(code,input_)*(1-Activation(code,input_))
    # linear
    elif code ==3:
        result = Tensor(input_.shape).fill_(1.0)
    else: raise ValueError('Unknown Code For derivative of Activation')
    
    return result 



# Loss and SGD

In [3]:
def loss(v, t):
    l_ = torch.sum(torch.pow(v-t,2))/(len(v))
    return l_

def dloss(v,t):
    return 2.*(v-t)


# Network Class

In [48]:
class Net:
    """
    The network class. It has the following methods:
        param      :  returns the parameter which is asked for. Not the data! The object... 
                        Data is accessible through object.data method)
        make_arch  :  makes the architecture of the network by taking a sequential list of [fc1,act1,fc2,act2,...]
    """
    
    #TODO:
    
    
    def __init__(self, seq,X,Y=None):
        self.param_list    = [] # Stores parameters of each layer (W,b).                               type: layer_par
        self.forward_list  = [] # Stores forward pass parameters of each layer (s,x).                  type: forward_par
        self.backward_list = [] # Stores backward pass parameters of each layer (grads) (ds,dx,dW,db)  type: backward_par
        self.grad_list     = []
        
        self.n_layer = 0
        self.act_list = []      # stores the requested activation functions in codes. Elements are "0","1" or "2"
        self.make_arch(seq)     # makes the architecture based on the the list "seq"
        self.N = len(X)         # nb of batch
        
    # a user-friendly-named method fo access w,b and s at each layer
    def get_param_of_layer(self,layer):  
        return self.param_list[layer]
    
    # a user-friendly-named method fo access grad w.r.t. w,b and s at each layer
    def get_forward_par_of_layer(self,layer):
        return self.forward_list[layer]
    
    # a user-friendly-named method fo access grad w.r.t. w,b and s at each layer
    def get_grad_of_layer(self,layer):
        return self.backward_list[layer]
        
    
    def make_arch(self,seq):
        
        seq_len = len(seq)                  # number of layer *2 (because of the activations...)
        self.n_layer = seq_len/2            # I just want to have it :)
        
        for layer in range (0,seq_len,2):
            
            # seq[layer] is an instance of object "Linear". Here we get the in/out dim of the layer
            dim_in, dim_out = seq[layer].input_ , seq[layer].output_ 
            
            # initialize the weights of layer
            self.param_list.append (layer_par(dim_in, dim_out) ) 
            
            # activation recognition : encode activations in "act_list"
            if seq[layer+1]=='relu':
                self.act_list.append(0)
            elif seq[layer+1]=='tanh':
                self.act_list.append(1)
            elif seq[layer+1]=='sig':
                self.act_list.append(2)
            elif seq[layer+1]=='lin':
                self.act_list.append(3)
            else: raise ValueError('Unknown Activation')
                
    
    
    def forward(self,X): 
        x=X
        for layer, prm in enumerate(self.param_list):       # prm = param_list[layer]  
            s = (x.mm(prm.w.t()) + prm.b.t())           # written consistant for batch :  s = (Wx+b).t()  (N,d_out) 
            x = Activation(self.act_list[layer], s)     # size = (N,d_out)       
        return x,s
    
    
    def backward (self,X,Y):
        self.backward_list = range(self.n_layer+1)     # this list will be filled with backward_par objects
        
        # add X0 to the forward list
        self.forward_list.append( forward_par(self.param_list[0].dim_in, self.N) )    # Note that dim = dim_in for inputs
        self.forward_list[0].x =X    # x0
        #self.forward_list[0].s =X    # s0 is set to be x0
        
        
        
        # this computes forward
        x=X
        for layer, prm in enumerate(self.param_list):       # parameter = param[layer]  
            self.forward_list.append( forward_par(prm.dim_out, self.N) )

            s = (x.mm(prm.w.t()) + prm.b.t())           # written consistant for batch :  s = (Wx+b).t()
            self.forward_list[layer+1].s = s             
            x = Activation(self.act_list[layer], s)    
            self.forward_list[layer+1].x = x
            
            
            
        # this computes backward
        for layer in range (self.n_layer,0,-1):
            #print ('layer is {}'.format(layer))
            # this guy makes a backward_par (dim_in,dim_out, N) at component "layer" of backward_list
            self.backward_list[layer] = backward_par( self.param_list[layer-1].dim_in , self.param_list[layer-1].dim_out ,self.N)
            
            #dl/dx 
            # WARNING
            if layer == self.n_layer:
                self.backward_list[layer].dx = dloss(x,Y) 
            else:
                #self.backward_list[layer].dx = self.param_list[layer].w.t().mm(self.backward_list[layer+1].ds)
                self.backward_list[layer].dx = self.backward_list[layer+1].ds.mm(self.param_list[layer].w)
                
                
            #dl/ds
            #print (self.backward_list[layer].dx.shape)
            #print (dActivation(self.act_list[layer-1], self.forward_list[layer].s ).shape)
            self.backward_list[layer].ds = self.backward_list[layer].dx * dActivation(self.act_list[layer-1], self.forward_list[layer].s ) 
            
            #dl/dw
            #print( self.forward_list[layer].x.shape)
            # x:  X of layer (l-1) -> x.size = N*d_in
            # ds: dL/ds of layer (l) -> ds.size = N*d_out
            
            arash=self.backward_list[layer].ds.unsqueeze(1)
            ehsan=self.forward_list[layer-1].x.unsqueeze(2)
            """
            print(arash.shape)
            print(ehsan.shape)
            """
            #print (self.backward_list[layer].ds.shape)
            self.backward_list[layer].dw = arash*ehsan
            #print ('said obi-wan kenobi')
            
            #dl/db
            self.backward_list[layer].db = self.backward_list[layer].ds
            
            #print(self.backward_list[layer].db)
            """
            print (self.backward_list[layer].ds.shape)
            print (self.backward_list[layer].dw.shape)
            print (self.backward_list[layer].db.shape)
            print (self.backward_list[layer].dx.shape)
            print ('hello there')
"""
        # summing all batch grads
        for layer in range (1,self.n_layer+1):
            #print (layer,self.n_layer)
            
            #print (self.backward_list[layer])
            #print (self.backward_list[layer].dw)
            self.grad_list.append(layer_par(self.param_list[layer-1].dim_in,self.param_list[layer-1].dim_out) )
            
            self.grad_list[layer-1].b = self.backward_list[layer].db.sum(0)/self.N   # watchout! these are grad!
            self.grad_list[layer-1].w = self.backward_list[layer].dw.sum(0)/self.N   # watchout! these are grad!
            
        return self.grad_list


# Draft, tests, and other stuff

In [49]:
x = Tensor(7).normal_()
X = torch.cat((x.view(1,-1),x.view(1,-1)*3,x.view(1,-1)*x.view(1,-1)),0)
Y= Tensor(3,10).normal_();

In [50]:
seq = [Linear(7,2),'lin',Linear(2,5),'relu',Linear(5,10),'lin']    
model = Net(seq,X,Y)

In [51]:
win = model.backward(X,Y)

In [53]:
win[2].w



Columns 0 to 7 
 -0.2330  -1.6584   0.4471  -0.2246  -0.2770   0.7003  -0.7584  -0.0651
  0.0000   0.0000   0.0000   0.0000   0.0000   0.0000   0.0000   0.0000
 -0.1047  -0.7456   0.2010  -0.1010  -0.1245   0.3149  -0.3410  -0.0293
 -3.5068 -47.0963  27.6213 -16.2701  10.3904   1.4681  -4.2070 -17.2292
 -1.2886 -18.6242  11.3941  -6.7463   4.6853  -0.0011  -1.1164  -7.2989

Columns 8 to 9 
 -0.0849   0.4276
  0.0000   0.0000
 -0.0382   0.1922
 13.3894 -26.2503
  5.7937 -11.5929
[torch.FloatTensor of size 5x10]

In [9]:
model.forward(X)

(
 
 Columns 0 to 7 
   2.2719  -0.2327  -0.8425  -3.0341  -9.4195  -4.0008  -5.4038   4.0141
   6.8156  -0.6981  -2.5276  -9.1022 -28.2586 -12.0025 -16.2113  12.0423
  -6.5716   7.6118   7.8077  -5.4867 -14.6678  -7.0281   6.7408   7.9006
 
 Columns 8 to 9 
   3.3794  -1.8302
  10.1382  -5.4907
  -2.4201   0.6710
 [torch.FloatTensor of size 3x10], 
 
 Columns 0 to 7 
   2.2719  -0.2327  -0.8425  -3.0341  -9.4195  -4.0008  -5.4038   4.0141
   6.8156  -0.6981  -2.5276  -9.1022 -28.2586 -12.0025 -16.2113  12.0423
  -6.5716   7.6118   7.8077  -5.4867 -14.6678  -7.0281   6.7408   7.9006
 
 Columns 8 to 9 
   3.3794  -1.8302
  10.1382  -5.4907
  -2.4201   0.6710
 [torch.FloatTensor of size 3x10])

In [10]:
b= Tensor(2,4).normal_()

In [11]:
a*b

NameError: name 'a' is not defined

In [12]:
range(1,5)

[1, 2, 3, 4]

In [23]:
a= Tensor (3,1,5).normal_()
b= Tensor (3,2,1).normal_()

In [24]:
a*b


(0 ,.,.) = 
  2.6336  0.5373  0.3234 -0.8952  2.7713
  0.0638  0.0130  0.0078 -0.0217  0.0671

(1 ,.,.) = 
  0.0563 -0.5344 -0.3692  0.5568  0.3351
 -0.1579  1.4984  1.0352 -1.5614 -0.9397

(2 ,.,.) = 
 -1.9301 -0.2083  1.3914  2.1775 -1.8134
  1.9941  0.2152 -1.4375 -2.2496  1.8735
[torch.FloatTensor of size 3x2x5]

In [25]:
a


(0 ,.,.) = 
 -2.0070 -0.4095 -0.2464  0.6822 -2.1119

(1 ,.,.) = 
  0.1641 -1.5579 -1.0764  1.6235  0.9771

(2 ,.,.) = 
 -1.1079 -0.1195  0.7986  1.2499 -1.0409
[torch.FloatTensor of size 3x1x5]

In [26]:
b


(0 ,.,.) = 
 -1.3122
 -0.0318

(1 ,.,.) = 
  0.3430
 -0.9618

(2 ,.,.) = 
  1.7422
 -1.7999
[torch.FloatTensor of size 3x2x1]

In [None]:
a