In [0]:
import torch
import numpy as np

In [0]:
seed = int(input())
np.random.seed(seed)
torch.manual_seed(seed)

11


<torch._C.Generator at 0x7ff592fe6a30>

In [0]:
NUMBER_OF_EXPERIMENTS = 200

In [0]:
class SimpleNet(torch.nn.Module):
  def __init__(self, activation):
    super().__init__()
    self.activation = activation
    self.fc1 = torch.nn.Linear(1, 1, bias=False)
    self.fc1.weight.data.fill_(1.)
    self.fc2 = torch.nn.Linear(1, 1, bias=False)
    self.fc2.weight.data.fill_(1.)
    self.fc3 = torch.nn.Linear(1, 1, bias=False)
    self.fc3.weight.data.fill_(1.)
    
  def forward(self, x):
    x = self.activation(self.fc1(x))
    x = self.activation(self.fc2(x))
    x = self.activation(self.fc3(x))
    return x
  
  def get_fc1_grad_abs_value(self):
    return torch.abs(self.fc1.weight.grad)
  
def get_fc1_grad_abs_value(net, x):
  output = net.forward(x)
  output.backward()
  
  fc1_grad = net.get_fc1_grad_abs_value().item()
  net.zero_grad()
  return fc1_grad

activations =  {'ELU': torch.nn.ELU(), 'Hardtanh': torch.nn.Hardtanh(),
               'LeakyReLU': torch.nn.LeakyReLU(), 'LogSigmoid': torch.nn.LogSigmoid(),
               'PReLU': torch.nn.PReLU(), 'ReLU': torch.nn.ReLU(), 'ReLU6': torch.nn.ReLU6(),
               'RReLU': torch.nn.RReLU(), 'SELU': torch.nn.SELU(), 'CELU': torch.nn.CELU(),
               'Sigmoid': torch.nn.Sigmoid(), 'Softplus': torch.nn.Softplus(),
               'Softshrink': torch.nn.Softshrink(), 'Softsign': torch.nn.Softsign(),
               'Tanh': torch.nn.Tanh(), 'Tanhshrink': torch.nn.Tanhshrink(),
               'Hardshrink': torch.nn.Hardshrink()}

for i in activations:
  activation = activations[i]
  
  net = SimpleNet(activation=activation)

  fc1_grads = []
  for x in torch.randn((NUMBER_OF_EXPERIMENTS, 1)):
    fc1_grads.append(get_fc1_grad_abs_value(net, x))
  print(np.mean(fc1_grads), i)
    

0.4478770935488865 ELU
0.30012892215978354 Hardtanh
0.3886191251013512 LeakyReLU
0.2563534809782868 LogSigmoid
0.41550379729509584 PReLU
0.38283357105799953 ReLU
0.38940384910907594 ReLU6
0.40075818665951374 RReLU
0.5511949503328651 SELU
0.453797201452544 CELU
0.007243898653568976 Sigmoid
0.2564494099796866 Softplus
0.23801627278327941 Softshrink
0.06558239982114174 Softsign
0.17119707921985536 Tanh
0.008037518786748526 Tanhshrink
0.739619148671627 Hardshrink


In [1]:
import torch

class SimpleNet(torch.nn.Module):
    def __init__(self, activation):
        super().__init__()

        self.activation = activation
        self.fc1 = torch.nn.Linear(1, 1, bias=False)
        self.fc1.weight.data.fill_(1.)
        self.fc2 = torch.nn.Linear(1, 1, bias=False)
        self.fc2.weight.data.fill_(1.)
        self.fc3 = torch.nn.Linear(1, 1, bias=False)
        self.fc3.weight.data.fill_(1.)
        self.fc4 = torch.nn.Linear(1, 1, bias=False)
        self.fc4.weight.data.fill_(1.)

    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.activation(self.fc4(x))
        return x

    def grad(self):
        grad = [ self.fc1.weight.grad, self.fc2.weight.grad, self.fc3.weight.grad, self.fc4.weight.grad ]
        grad = [ float( '%.3f' % float( val ) ) for val in grad ]
        return grad

answer = []

for activation in [torch.nn.Tanh(), torch.nn.ReLU()]:
  net = SimpleNet(activation)
  w = torch.tensor([100.])
  output = net.forward(w)
  output.backward()
  answer.append( str( net.grad() ) )

print( ','.join( answer ) )

[0.0, 0.168, 0.304, 0.436],[100.0, 100.0, 100.0, 100.0]
