In [None]:
# %% Deep learning - Section 10.90
#    Activation functions in PyTorch

# This code pertains a deep learning course provided by Mike X. Cohen on Udemy:
#   > https://www.udemy.com/course/deeplearning_x
# The "base" code in this repository is adapted (with very minor modifications)
# from code developed by the course instructor (Mike X. Cohen), while the
# "exercises" and the "code challenges" contain more original solutions and
# creative input from my side. If you are interested in DL (and if you are
# reading this statement, chances are that you are), go check out the course, it
# is singularly good.


In [None]:
# %% Libraries and modules
import numpy               as np
import matplotlib.pyplot   as plt
import torch
import torch.nn            as nn
import seaborn             as sns
import copy
import torch.nn.functional as F
import pandas              as pd
import scipy.stats         as stats
import time

from torch.utils.data                 import DataLoader,TensorDataset
from sklearn.model_selection          import train_test_split
from google.colab                     import files
from torchsummary                     import summary
from IPython                          import display
from matplotlib_inline.backend_inline import set_matplotlib_formats
set_matplotlib_formats('svg')


In [None]:
# %% Variable and fuction

# Variable
x = torch.linspace(-3,3,101)

# Function returning the activated input
def NN_output(act_fun):

    # Get activation function type and replace torch.relu with torch.<act_fun>
    # getattr() = get-attribute of an object or instance of a class
    act_fun = getattr(torch,act_fun)

    return act_fun(x)


In [None]:
# %% Activation functions

activation_funs = ['relu','sigmoid','tanh']

phi = ( 1 + np.sqrt(5) ) / 2
fig = plt.figure(figsize=(6*phi,6))

for act_fun in activation_funs:
    plt.plot(x,NN_output(act_fun),label=act_fun,linewidth=3)

plt.plot(x[[0,-1]],[0,0],'--',color=[.7,.7,.7])
plt.plot(x[[0,-1]],[1,1],'--',color=[.7,.7,.7])
plt.plot([0,0],[-1,3],'--',color=[.7,.7,.7])

plt.legend()
plt.xlabel('$x$')
plt.ylabel('$\\sigma(x)$')
plt.title('Various activation functions')
plt.xlim(x[[0,-1]])
plt.ylim([-1,3])

plt.savefig('figure26_activation_functions_pytorch.png')

plt.show()

files.download('figure26_activation_functions_pytorch.png')


In [None]:
# %% More about activation functions in PyTorch

# Function returning the activated input
def NN_output(act_fun):

    # Get activation function type and replace torch.nn.relu with torch.nn.<act_fun>
    # getattr() = get-attribute of an object or instance of a class
    act_fun = getattr(torch.nn,act_fun)

    return act_fun()


In [None]:
# %% Activation functions

activation_funs = ['ReLU6','Hardshrink','LeakyReLU']

phi = ( 1 + np.sqrt(5) ) / 2
fig = plt.figure(figsize=(6*phi,6))

for act_fun in activation_funs:
    plt.plot(x,NN_output(act_fun)(x),label=act_fun,linewidth=3)

plt.plot(x[[0,-1]],[0,0],'--',color=[.7,.7,.7])
plt.plot(x[[0,-1]],[1,1],'--',color=[.7,.7,.7])
plt.plot([0,0],[-1,3],'--',color=[.7,.7,.7])

plt.legend()
plt.xlabel('$x$')
plt.ylabel('$\\sigma(x)$')
plt.title('Various activation functions')
plt.xlim(x[[0,-1]])
plt.ylim([-1,3])
plt.ylim([-.1,.1])

plt.savefig('figure27_activation_functions_pytorch.png')

plt.show()

files.download('figure27_activation_functions_pytorch.png')


In [None]:
# %% ReLU6 more in detail

x = torch.linspace(-3,9,101)
relu6 = torch.nn.ReLU6()

phi = ( 1 + np.sqrt(5) ) / 2
fig = plt.figure(figsize=(6*phi,6))

plt.plot(x,relu6(x),linewidth=3)

plt.plot(x[[0,-1]],[0,0],'--',color=[.7,.7,.7])
plt.plot(x[[0,-1]],[1,1],'--',color=[.7,.7,.7])
plt.plot([0,0],[-1,6],'--',color=[.7,.7,.7])

plt.legend()
plt.xlabel('$x$')
plt.ylabel('$\\sigma(x)$')
plt.title('ReLU6')

plt.savefig('figure29_activation_functions_pytorch.png')

plt.show()

files.download('figure29_activation_functions_pytorch.png')


In [None]:
# %% Difference between torch and torch.nn

x = torch.linspace(-3,3,21)

# In torch (input values directly)
y1 = torch.relu(x)

# In torch.nn (wrapper)
f  = torch.nn.ReLU()
y2 = f(x)

# Plotting
phi = ( 1 + np.sqrt(5) ) / 2
fig = plt.figure(figsize=(6*phi,6))

plt.plot(x,y1,'o',label='torch.relu',linewidth=3)
plt.plot(x,y2,'x',label='torch.nn.ReLU',linewidth=3)

plt.plot(x[[0,-1]],[0,0],'--',color=[.7,.7,.7])
plt.plot(x[[0,-1]],[1,1],'--',color=[.7,.7,.7])
plt.plot([0,0],[-1,3],'--',color=[.7,.7,.7])

plt.legend()
plt.xlabel('$x$')
plt.ylabel('$\\sigma(x)$')
plt.title('Torch vs. torch.nn')

plt.savefig('figure30_activation_functions_pytorch.png')

plt.show()

files.download('figure30_activation_functions_pytorch.png')


In [None]:
# %% Activation functions in PyTorch

# List of activation functions in PyTorch:
#  https://pytorch.org/docs/stable/nn.html#non-linear-activations-weighted-sum-nonlinearity


In [None]:
# %% Exercises

# The goal of these explorations is to help you appreciate the remarkably diverse nonlinear shapes that a node can produce.
# All explorations use the code below.


In [None]:
# %% Code

# Create input  ...
x1 = torch.linspace(-1,1,21)
x2 = abs(x1)

# ... and corresponding weights
w1 = .4
w2 = .6

# Linear combination ...
linpart = x1*w1 + x2*w2

# ... and nonlinear output
y = torch.relu(linpart)

# Plotting
phi = ( 1 + np.sqrt(5) ) / 2
fig = plt.figure(figsize=(6*phi,6))

plt.plot(x1,linpart,'bo-',label='Linear combinantion')
plt.plot(x1,y,'rs',label='Nonlinear output')

plt.plot(x[[0,-1]],[0,0],'--',color=[.7,.7,.7])
plt.plot(x[[0,-1]],[1,1],'--',color=[.7,.7,.7])
plt.plot([0,0],[-1,3],'--',color=[.7,.7,.7])

plt.legend()
plt.xlabel('$x$')
plt.ylabel('$\\sigma(x)$')

plt.xlim([-1,1])
plt.ylim([-1,1])

plt.savefig('figure31_activation_functions_pytorch_extra1.png')

plt.show()

files.download('figure31_activation_functions_pytorch_extra1.png')


In [None]:
# %% Exercise 1
#    Look through the code to make sure you understand what it does (linear weighted combination -> nonlinear function).


In [None]:
# %% Exercise 2
#    Set x2=x1**2 and run the code. Then set one of the weights to be negative. Then set the negative weight to be close
#    to zero (e.g., -.01) with the positive weight relatively large (e.g., .8). Then swap the signs


In [None]:
# %% Exercise 3
#    Set x2=x1**2, and set the weights to be .4 and .6. Now set w2=.6 (you might want to zoom in on the y-axis)


In [None]:
# %% Exercise 4
#    Set x2 to be the absolute value of x1 and both weights positive. Then set w2=-.6. Why does w2<0 have such a big impact?
#    More generally, under what conditions are the input and output identical?

# The negative weight makes the entire linpart negative, so that ReLU in that case returns nothing
# but zeros. More in general, the linear combinantion and the non-linear pass of a ReLu are
# identical if the linear combinantion is already storing only non-negative values, in that case
# ReLU returns the input as it is


In [None]:
# %% Exercise 5
#    Have fun! Spend a few minutes playing around with the code. Also try changing the activation function to tanh or
#    anything else. The goal is to see that really simple input functions with really simple weights can produce really
#    complicated-looking nonlinear outputs
