In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
import torchvision
from torch.nn.parameter import Parameter
from torch import Tensor

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tarfile
import os
import csv
import shutil

In [56]:
class LP_ReLU1(nn.Module):
    def __init__(self, alpha):
        super(LP_ReLU1, self).__init__()
        # self.positive_flag = positive_flag
        self.a = alpha
        # self.C1 = beta
    def forward(self, x):
        # x = x.detach().numpy()
        x=x.clone().detach().requires_grad_(True)

        if (x.all() <= 0.0):
            out = 0

        if (x.all() <= self.a and x.all() > 0):
            out = x

        if (x.all() > self.a):
            out = (self.a + 0.05*(x - self.a))
        out = torch.tensor(out)
        return out


In [82]:
class LP_ReLU1_helper(nn.Module):
    def __init__(self, alpha):
        super(LP_ReLU1_helper, self).__init__()
        self.a = alpha
        
    def forward(self, x):
        x0 = x.clone().detach()
        f1 = LP_ReLU1(alpha=self.a)
        out = f1(x0)
        # f2 = log_act(alpha=self.a)
        # out2 = f2(x0)
        # out = torch.where(np.greater(x0, 0), out1, out2)
        out = out.squeeze()
        out = torch.tensor(out)
        return out


In [30]:

x = torch.rand(3, 3, 448, 448)
conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                           bias=False)
x = conv1(x)
bn1 = nn.BatchNorm2d(64)
x = bn1(x)
activation = LP_ReLU1_helper(alpha=6)
out = activation(x)
print(out.shape)


torch.Size([3, 64, 224, 224])




In [108]:
# Imports

# Import basic libraries
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from collections import OrderedDict

# Import PyTorch
import torch # import main library
from torch.autograd import Variable
import torch.nn as nn # import modules
from torch.autograd import Function # import Function to create custom activations
from torch.nn.parameter import Parameter # import Parameter to create custom activations with learnable parameters
from torch import optim # import optimizers for demonstrations
import torch.nn.functional as F # import torch functions
from torchvision import datasets, transforms # import transformations to use for demo

# helper function to train a model
def train_model(model, trainloader):
    '''
    Function trains the model and prints out the training log.
    INPUT:
        model - initialized PyTorch model ready for training.
        trainloader - PyTorch dataloader for training data.
    '''
    #setup training

    #define loss function
    criterion = nn.NLLLoss()
    #define learning rate
    learning_rate = 0.003
    #define number of epochs
    epochs = 5
    #initialize optimizer
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    #run training and print out the loss to make sure that we are actually fitting to the training set
    print('Training the model. Make sure that loss decreases after each epoch.\n')
    for e in range(epochs):
        running_loss = 0
        for images, labels in trainloader:
            images = images.view(images.shape[0], -1)
            log_ps = model(images)
            loss = criterion(log_ps, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
        else:
            # print out the loss to make sure it is decreasing
            print(f"Training loss: {running_loss}")

# simply define a silu function
def silu(input):
    '''
    Applies the Sigmoid Linear Unit (SiLU) function element-wise:

        SiLU(x) = x * sigmoid(x)
    '''
    return input * torch.sigmoid(input) # use torch.sigmoid to make sure that we created the most efficient implemetation based on builtin PyTorch functions

# create a class wrapper from PyTorch nn.Module, so
# the function now can be easily used in models
class SiLU(nn.Module):
    '''
    Applies the Sigmoid Linear Unit (SiLU) function element-wise:

        SiLU(x) = x * sigmoid(x)

    Shape:
        - Input: (N, *) where * means, any number of additional
          dimensions
        - Output: (N, *), same shape as the input

    References:
        -  Related paper:
        https://arxiv.org/pdf/1606.08415.pdf

    Examples:
        >>> m = silu()
        >>> input = torch.randn(2)
        >>> output = m(input)

    '''
    def __init__(self):
        '''
        Init method.
        '''
        super().__init__() # init the base class

    def forward(self, input):
        '''
        Forward pass of the function.
        '''
        return silu(input) # simply apply already implemented SiLU

# create class for basic fully-connected deep neural network
class ClassifierSiLU(nn.Module):
    '''
    Demo classifier model class to demonstrate SiLU
    '''
    def __init__(self):
        super().__init__()

        # initialize layers
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)

    def forward(self, x):
        # make sure the input tensor is flattened
        x = x.view(x.shape[0], -1)

        # apply silu function
        x = silu(self.fc1(x))

        # apply silu function
        x = silu(self.fc2(x))

        # apply silu function
        x = silu(self.fc3(x))

        x = F.log_softmax(self.fc4(x), dim=1)

        return x

# Implementation of Soft Exponential activation function
class soft_exponential(nn.Module):
    '''
    Implementation of soft exponential activation.

    Shape:
        - Input: (N, *) where * means, any number of additional
          dimensions
        - Output: (N, *), same shape as the input

    Parameters:
        - alpha - trainable parameter

    References:
        - See related paper:
        https://arxiv.org/pdf/1602.01321.pdf

    Examples:
        >>> a1 = soft_exponential(256)
        >>> x = torch.randn(256)
        >>> x = a1(x)
    '''
    def __init__(self, in_features, alpha = None):
        '''
        Initialization.
        INPUT:
            - in_features: shape of the input
            - aplha: trainable parameter
            aplha is initialized with zero value by default
        '''
        super(soft_exponential,self).__init__()
        self.in_features = in_features

        # initialize alpha
        if alpha == None:
            self.alpha = Parameter(torch.tensor(6.0)) # create a tensor out of alpha
        else:
            self.alpha = Parameter(torch.tensor(alpha)) # create a tensor out of alpha

        self.alpha.requiresGrad = True # set requiresGrad to true!

    def forward(self, x):
        '''
        Forward pass of the function.
        Applies the function to the input elementwise.
        '''
        # x = torch.flatten(x)
        # for i in range(len(x)):
        #   for j in range(len(x[i])):
        #         x0 = x[i][j]

        #         if x0 <= 0.0:
        #           x0 = 0.0
        #         if x0 <= self.alpha and x0 > 0:
        #           x0 = x0
        #         if x0 > self.alpha:
        #           x0 = (self.alpha + 0.05*(x0 - self.alpha))
        
        # return x
        # unflatten = torch.nn.Unflatten(1, torch.Size([3, 64, 224, 224]))
        # x = unflatten(x)
          
        if (x.any() <= 0.0):
            return torch.zeros_like(x)

        if (x.any() <= self.alpha and x.any() > 0):
            return x

        if (x.any() > self.alpha):
            return (self.alpha + 0.05*(x - self.alpha))

# create class for basic fully-connected deep neural network
class ClassifierSExp(nn.Module):
    '''
    Basic fully-connected network to test Soft Exponential activation.
    '''
    def __init__(self):
        super().__init__()

        # initialize layers
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)

        # initialize Soft Exponential activation
        self.a1 = soft_exponential(256)
        self.a2 = soft_exponential(128)
        self.a3 = soft_exponential(64)

    def forward(self, x):
        # make sure the input tensor is flattened
        x = x.view(x.shape[0], -1)

        # apply Soft Exponential unit
        x = self.a1(self.fc1(x))
        x = self.a2(self.fc2(x))
        x = self.a3(self.fc3(x))
        x = F.log_softmax(self.fc4(x), dim=1)

        return x

# Implementation of BReLU activation function with custom backward step
class brelu(Function):
    '''
    Implementation of BReLU activation function.

    Shape:
        - Input: (N, *) where * means, any number of additional
          dimensions
        - Output: (N, *), same shape as the input

    References:
        - See BReLU paper:
        https://arxiv.org/pdf/1709.04054.pdf

    Examples:
        >>> brelu_activation = brelu.apply
        >>> t = torch.randn((5,5), dtype=torch.float, requires_grad = True)
        >>> t = brelu_activation(t)
    '''
    #both forward and backward are @staticmethods
    @staticmethod
    def forward(ctx, input):
        """
        In the forward pass we receive a Tensor containing the input and return
        a Tensor containing the output. ctx is a context object that can be used
        to stash information for backward computation. You can cache arbitrary
        objects for use in the backward pass using the ctx.save_for_backward method.
        """
        ctx.save_for_backward(input) # save input for backward pass

        # get lists of odd and even indices
        input_shape = input.shape[0]
        even_indices = [i for i in range(0, input_shape, 2)]
        odd_indices = [i for i in range(1, input_shape, 2)]

        # clone the input tensor
        output = input.clone()

        # apply ReLU to elements where i mod 2 == 0
        output[even_indices] = output[even_indices].clamp(min=0)

        # apply inversed ReLU to inversed elements where i mod 2 != 0
        output[odd_indices] = 0 - output[odd_indices] # reverse elements with odd indices
        output[odd_indices] = - output[odd_indices].clamp(min = 0) # apply reversed ReLU

        return output

    @staticmethod
    def backward(ctx, grad_output):
        """
        In the backward pass we receive a Tensor containing the gradient of the loss
        with respect to the output, and we need to compute the gradient of the loss
        with respect to the input.
        """
        grad_input = None # set output to None

        input, = ctx.saved_tensors # restore input from context

        # check that input requires grad
        # if not requires grad we will return None to speed up computation
        if ctx.needs_input_grad[0]:
            grad_input = grad_output.clone()

            # get lists of odd and even indices
            input_shape = input.shape[0]
            even_indices = [i for i in range(0, input_shape, 2)]
            odd_indices = [i for i in range(1, input_shape, 2)]

            # set grad_input for even_indices
            grad_input[even_indices] = (input[even_indices] >= 0).float() * grad_input[even_indices]

            # set grad_input for odd_indices
            grad_input[odd_indices] = (input[odd_indices] < 0).float() * grad_input[odd_indices]

        return grad_input

# Simple model to demonstrate BReLU
class ClassifierBReLU(nn.Module):
    '''
    Simple fully-connected classifier model to demonstrate BReLU activation.
    '''
    def __init__(self):
        super(ClassifierBReLU, self).__init__()

        # initialize layers
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)

        # create shortcuts for BReLU
        self.a1 = brelu.apply
        self.a2 = brelu.apply
        self.a3 = brelu.apply

    def forward(self, x):
        # make sure the input tensor is flattened
        x = x.view(x.shape[0], -1)

        # apply BReLU
        x = self.a1(self.fc1(x))
        x = self.a2(self.fc2(x))
        x = self.a3(self.fc3(x))
        x = F.log_softmax(self.fc4(x), dim=1)

        return x

def main():
    print('Loading the Fasion MNIST dataset.\n')

    # Define a transform
    transform = transforms.Compose([transforms.ToTensor()])

    # Download and load the training data for Fashion MNIST
    trainset = datasets.FashionMNIST('~/.pytorch/F_MNIST_data/', download=True, train=True, transform=transform)
    # trainset = datasets.CIFAR10('~/.pytorch/CIFAR10_data/', download=True, train=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

    # 1. SiLU demonstration with model created with Sequential
    # use SiLU with model created with Sequential

    # initialize activation function
    activation_function = SiLU()

    # Initialize the model using nn.Sequential
    model = nn.Sequential(OrderedDict([
                          ('fc1', nn.Linear(784, 256)),
                          ('activation1', activation_function), # use SiLU
                          ('fc2', nn.Linear(256, 128)),
                          ('bn2', nn.BatchNorm1d(num_features=128)),
                          ('activation2', activation_function), # use SiLU
                          ('dropout', nn.Dropout(0.3)),
                          ('fc3', nn.Linear(128, 64)),
                          ('bn3', nn.BatchNorm1d(num_features=64)),
                          ('activation3', activation_function), # use SiLU
                          ('logits', nn.Linear(64, 10)),
                          ('logsoftmax', nn.LogSoftmax(dim=1))]))

    # Run training
    print('Training model with SiLU activation.\n')
    # train_model(model, trainloader)

    # 2. SiLU demonstration of a model defined as a nn.Module

    # Create demo model
    model = ClassifierSiLU()

    # Run training
    print('Training model with SiLU activation.\n')
    # train_model(model, trainloader)

    # 3. Soft Eponential function demonstration (with trainable parameter alpha)

    # Create demo model
    model = ClassifierSExp()

    # Run training
    print('Training model with Soft Exponential activation.\n')
    train_model(model, trainloader)

    # 4. BReLU activation function demonstration (with custom backward step)

    # Create demo model
    model = ClassifierBReLU()

    # Run training
    print('Training model with BReLU activation.\n')
    train_model(model, trainloader)

if __name__ == '__main__':
    main()

Loading the Fasion MNIST dataset.

Training model with SiLU activation.

Training model with SiLU activation.

Training model with Soft Exponential activation.

Training the model. Make sure that loss decreases after each epoch.

Training loss: 556.5562156885862
Training loss: 468.1928484290838
Training loss: 448.35064136981964
Training loss: 441.4540691226721
Training loss: 425.68906961381435
Training model with BReLU activation.

Training the model. Make sure that loss decreases after each epoch.

Training loss: 570.8969507217407
Training loss: 433.26680885255337
Training loss: 399.40364254266024
Training loss: 383.17385813593864
Training loss: 364.9030570536852
