In [1]:
import torch

## Building blocks of a neural network

In [65]:
class Module(object):
    """
    abstract class used for our layers
    """

    def forward(self, *input):
        raise NotImplementedError

    def backward(self, *gradwrtoutput):
        raise NotImplementedError

    def param(self):
        return []

# Tanh

### Our function

In [20]:
a = torch.FloatTensor([2.0]) 

def tanh(x: torch.FloatTensor):
    
    numerator = torch.exp(x) - torch.exp(-x)
    denominator = torch.exp(x) + torch.exp(-x)
    
    return numerator/denominator

# Derivative
def tanh_p(x: torch.FloatTensor):
    
    return (1 - torch.pow(tanh(x),2))

In [3]:
print(tanh(a))
print(tanh_p(a))

tensor([0.9640])
tensor([0.0707])


### Torch function

In [4]:
b = torch.FloatTensor([2.0]) 
b.requires_grad_(True)

y = torch.tanh(b)
print(y)

y.backward()
print(b.grad)

tensor([0.9640], grad_fn=<TanhBackward>)
tensor([0.0707])


# Relu

### Our function

In [21]:
def relu(x: torch.FloatTensor):
    return torch.clamp(x,min =0)

def relu_p(x: torch.FloatTensor):
    
    x[x>0] = 1
    x[x<=0] = 0
    
    return x

In [28]:
a = torch.FloatTensor([2.0]) 
print(relu(a))
print(relu_p(a))

tensor([2.])
tensor([1.])


### Torch function

In [29]:
b = torch.FloatTensor([2.0]) 
b.requires_grad_(True)

y = torch.relu(b)
print(y)

y.backward()
print(b.grad)

tensor([2.], grad_fn=<ReluBackward0>)
tensor([1.])


# Sigmoid function

### Our function

In [34]:
def sigmoid(x: torch.FloatTensor):
    return (1 / (1 + torch.exp(-x)))

def sigmoid_p(x: torch.FloatTensor):

    return (sigmoid(x)*(1 - sigmoid(x)))

In [35]:
a = torch.FloatTensor([2.0]) 
print(sigmoid(a))
print(sigmoid_p(a))

tensor([0.8808])
tensor([0.1050])


In [36]:
b = torch.FloatTensor([2.0]) 
b.requires_grad_(True)

y = torch.sigmoid(b)
print(y)

y.backward()
print(b.grad)

tensor([0.8808], grad_fn=<SigmoidBackward>)
tensor([0.1050])


# LeakyRelu Function

In [37]:
torch.clamp(torch.FloatTensor([1.0,2.0,3.0]), max = 0)

tensor([0., 0., 0.])

In [38]:
def LRelu(x: torch.FloatTensor, slope: float):
    return torch.clamp(x, min = 0) + slope*torch.clamp(x, max=0)

def LRelu_p(x: torch.FloatTensor, slope: float):
    
    x[x>0] = 1
    x[x<=0] = slope
    
    return x
    

In [50]:
a = torch.FloatTensor([2.0]) 
print(LRelu(a, slope = 0.01))
print(LRelu_p(a, slope = 0.01))

tensor([2.])
tensor([1.])


# Mean Squared Error

In [55]:
output = torch.FloatTensor([1.0,1.0,1.0,0.0,0.0,0.0])
target = torch.FloatTensor([1.0,1.0,0.0,1.0,0.0,0.0])

In [56]:
def mse(predicted_output: torch.FloatTensor, target_output: torch.FloatTensor):
    
    return torch.pow(predicted_output - target_output, 2).sum()

def mse_p(predicted_output: torch.FloatTensor, target_output: torch.FloatTensor):
    return 2*(predicted_output - target_output)

# Linear fully connected layer

In [69]:
mat = torch.randn(2, 3)
vec = torch.randn(3)
torch.mv(mat, vec)

tensor([-0.0662,  4.4729])

In [94]:
class Relu(Module):
    
    def forward(self, *input):
        
        self.output = input[0]
        
        return self.relu(self.output)
    
    
    def backward(self, *gradwrtoutput):
        
        derivatives = self.relu_p(self.output)
        
        return derivatives * gradwrtoutput[0]
    
    
    def relu(self, x):
        
        return torch.clamp(x, min =0)
    
    def relu_p(self, x):

        x[x>0] = 1
        x[x<=0] = 0

        return x


In [None]:
class Tanh(Module):
    
    def forward(self, *input):
        
        self.output = input[0]
        
        return self.tanh(self.output)
    
    
    def backward(self, *gradwrtoutput):
        
        derivatives = self.tanh_p(self.output)
        
        return derivatives * gradwrtoutput[0]
    
    def tanh(self, to_compute):
        
        numerator = torch.exp(to_compute) - torch.exp(-to_compute)
        denominator = torch.exp(to_compute) + torch.exp(-to_compute)
        
        return numerator/denominator
        
        
    def tanh_p(self, x):

        return (1 - torch.pow(self.tanh(x),2))


In [None]:
class Linear(Module):
    
    def __init__(self, in_features, out_features):
        
        # Number of input neurons
        self.in_features = in_features
        # Number of output neurons
        self.out_features = out_features
        
        # Initializing the weights with Xavier’s initialization
        # First generate the weights from a normal distribution with mean 0 and std 1
        # Then multiply the samples by sqrt(1 / (number_of_input_neurons + number_of_output_neurons))
        self.weight = torch.mul(torch.Tensor(out_features, in_features).normal_(mean=0, std=1), \ 
                                torch.sqrt(torch.FloatTensor([1/ (self.in_features + self.out_features)])))
        
        # Zero bias initialization
        self.bias = torch.Tensor(out_features).zero_()

        
        
    def forward(self, *input):
        
        # Input from the layer
        self.input_from_layer = input[0]
        
        # Calculating the output, which is basically the multiplication
        # of the weights with the input layer and adding the bias
        self.output = torch.mv(weights, self.input_from_layer) + self.bias
        
        
        return self.output

    def backward(self, *gradwrtoutput):
        raise NotImplementedError

    def param(self):
        return []
    

In [None]:
a=[1,2,3]

In [None]:
a

In [None]:
class Sequential:
    
    def __init__(self, layers):
        
        self.layers = layers
        
    def forward(self, initial_input):
        
        
        output_single_layer = initial_input
        
        for layer in self.layers:
            output_single_layer = layer.forward(output_single_layer)
            
        return output_single_layer
    
    def backward(self, initial_backward_input):
        
        output_single_layer_backward = initial_backward_input
        
        for layer in self.layers
        
        

In [None]:
torch

In [None]:
class Linear(Module):
    r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b`

    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        bias: If set to False, the layer will not learn an additive bias.
            Default: ``True``

    Shape:
        - Input: :math:`(N, *, \text{in\_features})` where :math:`*` means any number of
          additional dimensions
        - Output: :math:`(N, *, \text{out\_features})` where all but the last dimension
          are the same shape as the input.

    Attributes:
        weight: the learnable weights of the module of shape
            :math:`(\text{out\_features}, \text{in\_features})`. The values are
            initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
            :math:`k = \frac{1}{\text{in\_features}}`
        bias:   the learnable bias of the module of shape :math:`(\text{out\_features})`.
                If :attr:`bias` is ``True``, the values are initialized from
                :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
                :math:`k = \frac{1}{\text{in\_features}}`

    Examples::

        >>> m = nn.Linear(20, 30)
        >>> input = torch.randn(128, 20)
        >>> output = m(input)
        >>> print(output.size())
        torch.Size([128, 30])
    """
    __constants__ = ['bias']

    def __init__(self, in_features, out_features, bias=True):
        super(Linear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.Tensor(out_features, in_features))
        if bias:
            self.bias = Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)

    def forward(self, input):
        return F.linear(input, self.weight, self.bias)

    def extra_repr(self):
        return 'in_features={}, out_features={}, bias={}'.format(
            self.in_features, self.out_features, self.bias is not None
        )