In [1]:
import torch
from torch import Tensor
import numpy as np

# Auxiliary Classes

In [28]:
class layer_par:
    """
    This class contains parameters of each layer. We initialize them in constructor.
    
    inputs:
        dim_in      :  the input_ dimension of fully connected layer
        dim_out     :  the output_ dimension of fully connected layer
        
    returns:
        Nothing
    """
    
    def __init__(self, dim_in, dim_out):
        self.b = Tensor(dim_out,1).fill_(0)         # bias of each layer
        self.w = Tensor(dim_out,dim_in).normal_()   # w of each layer
        self.s = Tensor(dim_out).fill_(0)           # s after each layer
        self.x = Tensor(dim_out).fill_(0)           # x after each layer:   x = Activation (s)
    # TODO: I suggest that we add activation to this as well
    
class grad_wrt:
    """
    This class keeps track of all the variables we need to evaluate the damn gradients for each layer...
    
    inputs:
        dim_in      :  the input_ dimension of fully connected layer
        dim_out     :  the output_ dimension of fully connected layer
        
    returns:
        Nothing
    """
    def __init__(self, dim_in, dim_out):
        self.db = Tensor(dim_out,1).fill_(0)         # bias of each layer
        self.dw = Tensor(dim_out,dim_in).fill_(0)   # w of each layer
        self.ds = Tensor(dim_out).fill_(0)           # s after each layer
        self.dx = Tensor(dim_out).fill_(0)
    
    def call_grad(self):
        
        
        
        
        
        
class Linear:
    """
    An class that contains objects which only store layar's in/out connections dimension
    
    input_s:
        dim_in      :  the input_ dimension of fully connected layer
        dim_out     :  the output_ dimension of fully connected layer
        
    returns:
        Nothing 
    """
    
    def __init__(self,dim_in, dim_out):
        self.input_ = dim_in
        self.output_ = dim_out          
    # TODO: The linear is really wierd thing... all we get here is already in the upper class. we may omit this somehow

        
def Activation(code,input_):
    """
    A class that specify the needed activation with respect to the following code conversion
        0: Relu(x)
        1: Tanh(x)
        2: Sigmoid(x)
    
    This class works as functional package of pytorch
    
    input_s:
        code        :  the code for each activation (0,1,2)
        input__tensor:  the input_ tensor
        
    returns:
        result      :  the output_ of requested activation function with the same shape as input_ tensor
    """
    result = Tensor(input_.shape)
    # Relu
    if code ==0:
        result = input_ - (input_<0).float()*input_
    # Tanh
    elif code ==1:
        result = torch.tanh(input_)
    # Sig
    elif code ==2:
        result = 1.0/(1 + torch.exp(-input_))
    # error
    else: raise ValueError('Unknown Code For Activation')
        
    return result 


def dActivation(code,input_):
    """
    A class that specify the needed derivative of activation with the same encoding convenstion
        0: dRelu(x)
        1: dTanh(x)
        2: dSigmoid(x)
    
    This class works as functional package of pytorch
    
    input_s:
        code        :  the code for each activation (0,1,2)
        input__tensor:  the input_ tensor
        
    returns:
        result      :  the output_ of requested activation function with the same shape as input_ tensor
    """
    result = Tensor.new(input_)
    # dRelu
    if code ==0:
        result = Tensor(input_.shape).fill_(1.0) - (input_<0).float()*Tensor(input_.shape).fill_(1.0)
    # dTanh
    elif code ==1:
        result = 1-(torch.tanh(input_))**2
    # dSig
    elif code ==2:
        result = Activation(code,input_)*(1-Activation(code,input_))
    # error
    else: raise ValueError('Unknown Code For derivative of Activation')
    
    return result 

# Network Class

In [29]:
class Net:
    """
    The network class. It has the following methods:
        param      :  returns the parameter which is asked for. Not the data! The object... 
                        Data is accessible through object.data method)
        make_arch  :  makes the architecture of the network by taking a sequential list of [fc1,act1,fc2,act2,...]
    """
    
    def __init__(self, seq=[]):
        self.param_list = []    # Stores the parameters of each layer.Elements are "layer_par" objects
        self.act_list = []      # stores the requested activation functions in codes. Elements are "0","1" or "2"
        self.grad_list = []     # stores grad wrt parameters at each layer. Elemenst are matrices of size (d_in,d_out +1)
        self.make_arch(seq)     # makes the architecture based on the the list "seq"
        
    
    # a user-friendly-named method fo access w,b and s at each layer
    def get_param_of_layer(self,layer):      
        return self.param_list[layer]
    
    # a user-friendly-named method fo access grad w.r.t. w,b and s at each layer
    def get_grad_of_layer(self,layer):
        pass
    
    # a user-friendly-named method fo access w,b and s at each layer
    def make_arch(self,seq):
        seq_len = len(seq)                  # number of layer *2 (because of the activations...)
        
        for layer in range (0,seq_len,2):
            
            # seq[layer] is an instance of object "Linear". Here we get the in/out dim of the layer
            dim_in, dim_out = seq[layer].input_ , seq[layer].output_ 
            
            # initialize the weights of layer
            self.param_list.append ( layer_par(dim_in, dim_out) ) 
            
            # activation recognition : encode activations in "act_list"
            if seq[layer+1]=='relu':
                self.act_list.append(0)
            elif seq[layer+1]=='tanh':
                self.act_list.append(1)
            elif seq[layer+1]=='sig':
                self.act_list.append(2)
            else: raise ValueError('Unknown Activation')
                
            
    def forward(self,x): 
        for layer, prm in enumerate(self.param_list):       # parameter = param[layer]  
            prm.s = (x.mm(prm.w.t()) + prm.b.t())                  # written consistant for batch
            x = Activation(self.act_list[layer],prm.s)
        return x
        
    def backward ():
        pass

# Loss and SGD

In [30]:
def loss(v, t):
    l_ = torch.sum(torch.pow(v-t,2))
    return l_

# Draft, tests, and other stuff

In [31]:
seq = [Linear(7,2),'tanh',Linear(2,3),'relu']    
model = Net(seq)

In [32]:
x = Tensor(7).normal_()
X = torch.cat((x.view(1,-1),x.view(1,-1)*3,x.view(1,-1)*x.view(1,-1)),0)
X


-0.4180  0.7239  3.1533  0.5672 -2.0508 -0.0750  1.0284
-1.2541  2.1717  9.4600  1.7017 -6.1525 -0.2251  3.0853
 0.1748  0.5240  9.9435  0.3218  4.2059  0.0056  1.0577
[torch.FloatTensor of size 3x7]

In [33]:
model.forward(X)


 0.3425  0.0122  0.0000
 0.4596  0.0000  0.0000
 0.0000  0.6200  0.0000
[torch.FloatTensor of size 3x3]

In [34]:
w1= model.get_param_of_layer(0).w
b1= model.get_param_of_layer(0).b
s1= model.get_param_of_layer(0).s
w2= model.get_param_of_layer(1).w
b2= model.get_param_of_layer(1).b
s2= model.get_param_of_layer(1).s

In [35]:
S1 = X.mm(w1.t())+b1.t()

In [36]:
S2 = Activation(1,S1).mm(w2.t())+b2.t()

In [37]:
S2


 0.3425  0.0122 -0.6994
 0.4596 -0.0652 -0.7664
-0.1775  0.6200 -0.9608
[torch.FloatTensor of size 3x3]

In [53]:
Y=Tensor(X.shape).normal_()
Y


-0.4247 -1.3250 -0.0681 -0.5092 -0.5179  0.0591  0.5718
-0.3701 -1.6274 -0.7035  0.3733 -0.4113 -0.5786  0.4245
 0.0884 -0.0916  2.0080  0.2671 -2.0022  0.3206  1.6533
[torch.FloatTensor of size 3x7]

In [54]:
loss(X,Y)

281.9885527333354

In [55]:
X



-0.4180  0.7239  3.1533  0.5672 -2.0508 -0.0750  1.0284
-1.2541  2.1717  9.4600  2.0000 -6.1525 -0.2251  3.0853
 0.1748  0.5240  9.9435  0.3218  4.2059  0.0056  1.0577
[torch.FloatTensor of size 3x7]

In [56]:
Z = Tensor(X)

In [57]:
Z[1,3]=2

In [58]:
X


-0.4180  0.7239  3.1533  0.5672 -2.0508 -0.0750  1.0284
-1.2541  2.1717  9.4600  2.0000 -6.1525 -0.2251  3.0853
 0.1748  0.5240  9.9435  0.3218  4.2059  0.0056  1.0577
[torch.FloatTensor of size 3x7]

In [59]:
Z= Tensor(3,2)

In [60]:
q= Tensor(4,7).normal_()
z= Tensor(q.shape)
Z= Tensor(q.size())

In [61]:
dActivation(0,X)


    0     1     1     1     0     0     1
    0     1     1     1     0     0     1
    1     1     1     1     1     1     1
[torch.FloatTensor of size 3x7]

In [47]:
dActivation(1,X)


 0.8438  0.6164  0.0073  0.7365  0.0640  0.9944  0.4020
 0.2785  0.0506  0.0000  0.0707  0.0000  0.9510  0.0083
 0.9701  0.7688  0.0000  0.9032  0.0009  1.0000  0.3841
[torch.FloatTensor of size 3x7]

In [48]:
dActivation(2,X)


 0.2394  0.2199  0.0393  0.2309  0.1010  0.2496  0.1940
 0.1727  0.0919  0.0001  0.1050  0.0021  0.2469  0.0418
 0.2481  0.2336  0.0000  0.2436  0.0145  0.2500  0.1913
[torch.FloatTensor of size 3x7]

In [49]:
X


-0.4180  0.7239  3.1533  0.5672 -2.0508 -0.0750  1.0284
-1.2541  2.1717  9.4600  2.0000 -6.1525 -0.2251  3.0853
 0.1748  0.5240  9.9435  0.3218  4.2059  0.0056  1.0577
[torch.FloatTensor of size 3x7]