In [30]:
import torch
from torch import Tensor
import numpy as np

# Auxiliary Classes

In [288]:
class layer_par:
    """
    This class contains parameters of each layer. We initialize them in constructor.
    
    input_s:
        dim_in      :  the input_ dimension of fully connected layer
        dim_out     :  the output_ dimension of fully connected layer
        
    returns:
        Nothing
    """
    
    def __init__(self, dim_in, dim_out):
        self.b = Tensor(dim_out,1).fill_(0)         # bias of each layer
        self.w = Tensor(dim_out,dim_in).normal_()   # w of each layer
        self.s = Tensor(dim_out).fill_(0)           # s after each layer


class Linear:
    """
    An class that contains objects which only store layar's in/out connections dimension
    
    input_s:
        dim_in      :  the input_ dimension of fully connected layer
        dim_out     :  the output_ dimension of fully connected layer
        
    returns:
        Nothing 
    """
    
    def __init__(self,dim_in, dim_out):
        self.input_ = dim_in
        self.output_ = dim_out          


        
def Activation(code,input_):
    """
    A class that specify the needed activation with respect to the following code conversion
        0: Relu(x)
        1: Tanh(x)
        2: Sigmoid(x)
    
    This class works as functional package of pytorch
    
    input_s:
        code        :  the code for each activation (0,1,2)
        input__tensor:  the input_ tensor
        
    returns:
        result      :  the output_ of requested activation function with the same shape as input_ tensor
    """
    result = Tensor.new(input_)
    # Relu
    if code ==0:
        result = input_ - (input_<0).float()*input_
    # Tanh
    elif code ==1:
        result = torch.tanh(input_)
    # Sig
    elif code ==2:
        result = 1.0/(1 + torch.exp(-input_))
    # error
    else: raise ValueError('Unknown Code For Activation')
    
    return result 

# Network Class

In [289]:
class Net:
    """
    The network class. It has the following methods:
        param      :  returns the parameter which is asked for. Not the data! The object... 
                        Data is accessible through object.data method)
        make_arch  :  makes the architecture of the network by taking a sequential list of [fc1,act1,fc2,act2,...]
    """
    
    def __init__(self, seq=[]):
        self.param_list = []    # Stores the parameters of each layer.Elements are "layer_par" objects
        self.act_list = []      # stores the requested activation functions in codes. Elements are "0","1" or "2"
        self.grad_list = []     # stores grad wrt parameters at each layer. Elemenst are matrices of size (d_in,d_out +1)
        self.make_arch(seq)     # makes the architecture based on the the list "seq"
        
    
    # a user-friendly-named method fo access w,b and s at each layer
    def get_param_of_layer(self,layer):      
        return self.param_list[layer]
    
    # a user-friendly-named method fo access grad w.r.t. w,b and s at each layer
    def get_grad_of_layer(self,layer):
        pass
    
    # a user-friendly-named method fo access w,b and s at each layer
    def make_arch(self,seq):
        seq_len = len(seq)                  # number of layer *2 (because of the activations...)
        
        for layer in range (0,seq_len,2):
            
            # seq[layer] is an instance of object "Linear". Here we get the in/out dim of the layer
            dim_in, dim_out = seq[layer].input_ , seq[layer].output_ 
            
            # initialize the weights of layer
            self.param_list.append ( layer_par(dim_in, dim_out) ) 
            
            # activation recognition : encode activations in "act_list"
            if seq[layer+1]=='relu':
                self.act_list.append(0)
            elif seq[layer+1]=='tanh':
                self.act_list.append(1)
            elif seq[layer+1]=='sig':
                self.act_list.append(2)
            else: raise ValueError('Unknown Activation')
                
            
    def forward(self,x): 
        for layer, prm in enumerate(self.param_list):       # parameter = param[layer]  
            prm.s = (x.mm(prm.w.t()) + prm.b.t())                  # written consistant for batch
            x = Activation(self.act_list[layer],prm.s)
        return x
        
    def backward ()

# Loss and SGD

In [297]:
def loss(v, t):
    l_ = torch.sum(torch.pow(v-t,2))
    return l_

# Draft, tests, and other stuff

In [290]:
seq = [Linear(7,2),'tanh',Linear(2,3),'relu']    
model = Net(seq)

In [291]:
x = Tensor(7).normal_()
X = torch.cat((x.view(1,-1),x.view(1,-1)*3,x.view(1,-1)*x.view(1,-1)),0)
X


-0.1421  0.9410 -0.8262  0.5169  0.3800  0.9679 -0.6186
-0.4264  2.8229 -2.4787  1.5506  1.1399  2.9036 -1.8558
 0.0202  0.8854  0.6827  0.2671  0.1444  0.9368  0.3826
[torch.FloatTensor of size 3x7]

In [292]:
model.forward(X)


 0.0000  0.0000  0.6616
 0.0000  0.0491  0.5999
 0.0000  0.0000  2.6707
[torch.FloatTensor of size 3x3]

In [293]:
w1= model.get_param_of_layer(0).w
b1= model.get_param_of_layer(0).b
s1= model.get_param_of_layer(0).s
w2= model.get_param_of_layer(1).w
b2= model.get_param_of_layer(1).b
s2= model.get_param_of_layer(1).s

In [294]:
S1 = X.mm(w1.t())+b1.t()

In [295]:
S2 = Activation(1,S1).mm(w2.t())+b2.t()

In [296]:
S2


-0.9463 -0.0716  0.6616
-0.9924  0.0491  0.5999
-1.8549 -1.9566  2.6707
[torch.FloatTensor of size 3x3]

In [318]:
Y=Tensor.new(X)
print (Y.shape)

torch.Size([])


In [317]:
loss(X,Y)

142.234962331946