### Comparison of the effect of using different types of activation function (1/2) - Sequential

In [1]:
from IPython.display import YouTubeVideo

YouTubeVideo('i1wiF0EtNMU', width=960, height=540)

### Comparison of the effect of using different types of activation function (1/2) - Batches of 32

In [2]:
YouTubeVideo('pGGjhpGJQgI', width=960, height=540)

### Imports & setup

In [None]:
# Refer to the parent path:
import sys
sys.path.append('../')

# Import the helper scripts:
from utils import *

# Name of the experiment:
name="activation_functions1"

%matplotlib inline

### Generate a two moons dataset

In [None]:
X_moons, y_moons = sklearn.datasets.make_moons(150, noise=0.25, random_state=0)
y_moons = np.where(y_moons==0, -1., y_moons)
plt.scatter(X_moons[:,0], X_moons[:,1], s=40, c=y_moons)

### Generate a two blobs dataset

In [None]:
X_blobs, y_blobs = sklearn.datasets.make_blobs(n_samples=150, centers=2, n_features=2, random_state=0)
y_blobs = np.where(y_blobs==0, -1., y_blobs)
plt.scatter(X_blobs[:,0], X_blobs[:,1], s=40, c=y_blobs)

### Generate a concentric circles dataset

In [None]:
X_circles, y_circles = sklearn.datasets.make_circles(n_samples=150, noise=0.09, random_state=0)
y_circles = np.where(y_circles==0, -1., y_circles)
plt.scatter(X_circles[:,0], X_circles[:,1], s=40, c=y_circles)

### Convert SKLearn datasets into PyTorch datasets

In [None]:
moons_dataset = convert_to_pytorch_dataset(X_moons, y_moons)
blobs_dataset = convert_to_pytorch_dataset(X_blobs, y_blobs)
circles_dataset = convert_to_pytorch_dataset(X_circles, y_circles)

datasets = [moons_dataset, blobs_dataset, circles_dataset]

### Create classifier models

In [None]:
# An hyperbolic tangent based neural network:
class TanhNet(nn.Module):
    def __init__(self, input_size):
        super(TanhNet, self).__init__()
        self.layers = nn.Sequential(nn.Flatten(),
                                    nn.Linear(input_size, 32),
                                    nn.Tanh(),
                                    nn.Linear(32, 32),
                                    nn.Tanh(),
                                    nn.Linear(32, 1))

    def forward(self, x):
        return torch.erf(self.layers(x))

    
# A SoftSign based neural network:
class SoftSignNet(nn.Module):
    def __init__(self, input_size):
        super(SoftSignNet, self).__init__()
        self.layers = nn.Sequential(nn.Flatten(),
                                    nn.Linear(input_size, 32),
                                    nn.Softsign(),
                                    nn.Linear(32, 32),
                                    nn.Softsign(),
                                    nn.Linear(32, 1))

    def forward(self, x):
        return torch.erf(self.layers(x))

    
# An Exponential Linear Unit (ELU) based neural network:
class ELUNet(nn.Module):
    def __init__(self, input_size):
        super(ELUNet, self).__init__()
        self.layers = nn.Sequential(nn.Flatten(),
                                    nn.Linear(input_size, 32),
                                    nn.ELU(),
                                    nn.Linear(32, 32),
                                    nn.ELU(),
                                    nn.Linear(32, 1))

    def forward(self, x):
        return torch.erf(self.layers(x))

    
# A Gaussian Error Linear Unit (GELU) based neural network:
class GELUNet(nn.Module):
    def __init__(self, input_size):
        super(GELUNet, self).__init__()
        self.layers = nn.Sequential(nn.Flatten(),
                                    nn.Linear(input_size, 32),
                                    nn.GELU(),
                                    nn.Linear(32, 32),
                                    nn.GELU(),
                                    nn.Linear(32, 1))

    def forward(self, x):
        return torch.erf(self.layers(x))
    
    
# A Rectified Linear Unit (ReLU) based neural network:
class ReLUNet(nn.Module):
    def __init__(self, input_size):
        super(ReLUNet, self).__init__()
        self.layers = nn.Sequential(nn.Flatten(),
                                    nn.Linear(input_size, 32),
                                    nn.ReLU(),
                                    nn.Linear(32, 32),
                                    nn.ReLU(),
                                    nn.Linear(32, 1))

    def forward(self, x):
        return torch.erf(self.layers(x))


# Selected models are initialized for every dataset:
models = [{
    'Tanh': TanhNet(2),
    'SoftSign': SoftSignNet(2),
    'ELU': ELUNet(2),
    'GELU': GELUNet(2),
    'ReLU': ReLUNet(2)
          } for _ in datasets]

# Initialise all networks with the same weights and biases:
state = next(iter(models[0].values())).state_dict()
for d, _ in enumerate(datasets):
    for m in models[d].values():
        m.load_state_dict(state)

### Select the learning parameters

In [None]:
lr = 0.01    # Learning rate
mm = 0.9     # Momentum
wd = 0.001   # Weight decay

optimizers = [dict((k, torch.optim.SGD(model.parameters(), lr=lr, momentum=mm, weight_decay=wd))
                   for k, model in models[i].items()) for i in range(len(datasets))]

# Loss function:
criterion = nn.MSELoss()

# Number of epochs:
epochs = 300

# Sampling method:
batch_size=32
shuffle=True

### Generates the animated plot

In [None]:
generate_animated_plot(epochs=epochs, 
                       datasets=datasets, 
                       models=models, 
                       optimizers=optimizers, 
                       criterion=criterion, 
                       filename=name, 
                       batch_size=batch_size, 
                       shuffle=shuffle)