In [1]:
import dgl
from dgl.data.utils import load_graphs
import torch
import torch.nn as nn

Using backend: pytorch


In [2]:
data = load_graphs('../data/gsi2.bin')
graph = data[0][0]

# My Module

In [None]:
encoder = nn.Sequential(
    nn.Linear(3, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Linear(64, 8),
    nn.ReLU()
)
nn.init.xavier_uniform_(encoder.weight)
nn.init.constant_(encoder.bias, 0.0)

In [13]:
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.linear1 = nn.Linear(3, 64)
        self.linear2 = nn.Linear(64, 64)
        self.linear3 = nn.Linear(64, 8)
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
        self.relu3 = nn.ReLU()
        torch.nn.init.xavier_uniform_(self.linear1.weight)
        torch.nn.init.constant_(self.linear1.bias, 0.0)
        torch.nn.init.xavier_uniform_(self.linear2.weight)
        torch.nn.init.constant_(self.linear2.bias, 0.0)
        torch.nn.init.xavier_uniform_(self.linear3.weight)
        torch.nn.init.constant_(self.linear3.bias, 0.0)
    def forward(self, x):
        x = self.linear1(x)
        x = self.relu1(x)
        x = self.linear2(x)
        x = self.relu2(x)
        x = self.linear3(x)
        x = self.relu3(x)
        return x
        

# Ori Module

In [4]:
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
import torch.nn.init as init

TORCH_ACTIVATION_LIST = ['ReLU',
                         'Sigmoid',
                         'SELU',
                         'LeakyReLU',
                         'Softplus',
                         'Tanh']

ACTIVATION_LIST = ['Mish', 'Swish', 'Absolute', 'IdentityClip', None]


class Mish(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        return x * (torch.tanh(F.softplus(x)))


class Swish(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        return x * F.sigmoid(x)


class Absolute(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        return torch.abs(x)


class IdentityClip(nn.Module):

    def __init__(self):
        super().__init__()

    def forward(self, x):
        return x.clamp(0.0, 1.0)


def get_nn_activation(activation: 'str'):
    if not activation in TORCH_ACTIVATION_LIST + ACTIVATION_LIST:
        raise RuntimeError("Not implemented activation function!")

    if activation in TORCH_ACTIVATION_LIST:
        act = getattr(torch.nn, activation)()

    if activation in ACTIVATION_LIST:
        if activation == 'Mish':
            act = Mish()
        elif activation == 'Swish':
            act = Swish()
        elif activation == 'Absolute':
            act = Absolute()
        elif activation == 'IdentityClip':
            act = IdentityClip()
        elif activation is None:
            act = nn.Identity()

    return act


class NoisyLinear(nn.Linear):
    # Adapted from the original source
    # https://github.com/Kaixhin/NoisyNet-A3C/blob/master/model.py

    def __init__(self, in_features, out_features, sigma_init=0.017):
        super(NoisyLinear, self).__init__(in_features, out_features, bias=True)  # TODO: Adapt for no bias
        # µ^w and µ^b reuse self.weight and self.bias
        self.sigma_init = sigma_init
        self.sigma_weight = Parameter(torch.Tensor(out_features, in_features))  # σ^w
        self.sigma_bias = Parameter(torch.Tensor(out_features))  # σ^b
        self.register_buffer('epsilon_weight', torch.zeros(out_features, in_features))
        self.register_buffer('epsilon_bias', torch.zeros(out_features))
        self.reset_parameters()

    def reset_parameters(self):
        if hasattr(self, 'sigma_weight'):  # Only init after all params added (otherwise super().__init__() fails)
            init.uniform_(self.weight, -math.sqrt(3 / self.in_features), math.sqrt(3 / self.in_features))
            init.uniform_(self.bias, -math.sqrt(3 / self.in_features), math.sqrt(3 / self.in_features))
            init.constant_(self.sigma_weight, self.sigma_init)
            init.constant_(self.sigma_bias, self.sigma_init)

    def forward(self, x):
        return F.linear(x, self.weight + self.sigma_weight * self.epsilon_weight,
                        self.bias + self.sigma_bias * self.epsilon_bias)

    def sample_noise(self):
        self.epsilon_weight = torch.randn(self.out_features, self.in_features)
        self.epsilon_bias = torch.randn(self.out_features)

    def remove_noise(self):
        self.epsilon_weight = torch.zeros(self.out_features, self.in_features)
        self.epsilon_bias = torch.zeros(self.out_features)


class LinearModule(nn.Module):

    def __init__(self,
                 activation: 'str',
                 norm: 'str' = None,
                 dropout_p: 'float' = 0.0,
                 weight_init: 'str' = None,
                 use_noisy: bool = False,
                 use_residual: bool = False,
                 **linear_kwargs):
        super(LinearModule, self).__init__()

        if linear_kwargs['in_features'] == linear_kwargs['out_features'] and use_residual:
            self.use_residual = True
        else:
            self.use_residual = False

        # layers
        if use_noisy:
            linear_layer = NoisyLinear(**linear_kwargs)
        else:
            linear_layer = torch.nn.Linear(**linear_kwargs)

        self.linear_layer = linear_layer
        if dropout_p > 0.0:
            self.dropout_layer = torch.nn.Dropout(dropout_p)
        else:
            self.dropout_layer = torch.nn.Identity()
        self.activation_layer = get_nn_activation(activation)

        self.weight_init = weight_init
        self.activation = activation
        self.norm = norm

        # apply weight initialization methods
        self.apply_weight_init(self.linear_layer, self.weight_init)

        if norm == 'batch':
            self.norm_layer = torch.nn.BatchNorm1d(self.linear_layer.out_features)
        elif norm == 'layer':
            self.norm_layer = torch.nn.LayerNorm(self.linear_layer.out_features)
        elif norm == 'spectral':
            self.linear_layer = torch.nn.utils.spectral_norm(self.linear_layer)
            self.norm_layer = torch.nn.Identity()
        elif norm is None:
            self.norm_layer = torch.nn.Identity()
        else:
            raise RuntimeError("Not implemented normalization function!")

    def apply_weight_init(self, tensor, weight_init=None):
        if weight_init is None:
            pass  # do not apply weight init
        elif weight_init == "normal":
            torch.nn.init.normal_(tensor.weight, std=0.3)
            torch.nn.init.constant_(tensor.bias, 0.0)
        elif weight_init == "kaiming_normal":
            if self.activation in ['sigmoid', 'tanh', 'relu', 'leaky_relu']:
                torch.nn.init.kaiming_normal_(tensor.weight, nonlinearity=self.activation)
                torch.nn.init.constant_(tensor.bias, 0.0)
            else:
                pass
        elif weight_init == "xavier":
            torch.nn.init.xavier_uniform_(tensor.weight)
            torch.nn.init.constant_(tensor.bias, 0.0)
        else:
            raise NotImplementedError("MLP initializer {} is not supported".format(weight_init))

    def forward(self, x):
        if self.use_residual:
            input_x = x

        x = self.linear_layer(x)
        x = self.norm_layer(x)
        x = self.activation_layer(x)
        x = self.dropout_layer(x)

        if self.use_residual:
            x = input_x + x
        return x


In [5]:
import torch.nn as nn

class MultiLayerPerceptron(nn.Module):

    def __init__(self,
                 input_dimension: int,
                 output_dimension: int,
                 num_neurons: list = [64, 64],
                 activation='Mish',
                 out_activation='Mish',
                 normalization=None,
                 weight_init='xavier',
                 dropout_probability=0.0,
                 use_noisy=False):

        super(MultiLayerPerceptron, self).__init__()

        self.input_dim = input_dimension
        self.output_dim = output_dimension
        self.num_neurons = num_neurons
        self.use_noisy = use_noisy

        _list_norm = self.check_input_spec(normalization)
        _input_norm = True if _list_norm and len(normalization) == 1 else False
        _list_act = self.check_input_spec(activation)
        _list_drop_p = self.check_input_spec(dropout_probability)

        input_dims = [input_dimension] + num_neurons
        output_dims = num_neurons + [output_dimension]

        # Input -> the last hidden layer
        self.layers = nn.ModuleList()
        for i, (in_dim, out_dim) in enumerate(zip(input_dims[:-1], output_dims[:-1])):
            norm = normalization[i] if _list_norm else normalization
            norm = None if _input_norm and i != 0 else norm
            act = activation[i] if _list_act else activation
            drop_p = dropout_probability[i] if _list_drop_p else dropout_probability

            linear_module = LinearModule(in_features=in_dim, out_features=out_dim, activation=act,
                                         norm=norm, dropout_p=drop_p, weight_init=weight_init, use_noisy=use_noisy)
            self.layers.append(linear_module)

        output_layer = LinearModule(in_features=input_dims[-1], out_features=output_dims[-1],
                                    activation=out_activation,
                                    norm=None, dropout_p=0.0, weight_init=weight_init, use_noisy=use_noisy)
        self.layers.append(output_layer)

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def check_input_spec(self, input_spec):
        if isinstance(input_spec, list):
            # output layer will not be normalized
            assert len(input_spec) == len(self.num_neurons) + 1, "the length of input_spec list should " \
                                                                 "match with the number of hidden layers + 1"
            _list_type = True
        else:
            _list_type = False

        return _list_type

In [6]:
model = MultiLayerPerceptron(3, 8, [64, 64], 'ReLU', 'ReLU')

# Test

In [7]:
result1 = encoder(graph.ndata['feat'])
result2 = model(graph.ndata['feat'])

In [14]:
model2 = Encoder()

In [16]:
result3 = model2(graph.ndata['feat'])

In [8]:
print(result1)

tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00, 1.7874e-01, 0.0000e+00, 3.8676e-01,
         5.8329e-02, 0.0000e+00],
        [7.8709e-02, 9.4251e-02, 2.8755e-02, 0.0000e+00, 0.0000e+00, 8.0551e-02,
         1.2337e-01, 0.0000e+00],
        [2.7956e-02, 3.5818e-02, 5.9430e-02, 1.0442e-01, 5.7396e-02, 2.2366e-01,
         2.5687e-01, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 1.5778e-01, 0.0000e+00, 1.9637e-01,
         5.4326e-02, 0.0000e+00],
        [7.4405e-02, 1.0214e-01, 4.9250e-02, 1.7544e-02, 0.0000e+00, 9.8163e-02,
         1.6029e-01, 0.0000e+00],
        [1.0880e-02, 0.0000e+00, 0.0000e+00, 1.2912e-01, 1.9527e-02, 1.7270e-01,
         8.1722e-02, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00, 1.0098e-01, 6.8932e-02, 2.1995e-01,
         2.5112e-02, 0.0000e+00],
        [0.0000e+00, 6.7834e-02, 2.9889e-02, 0.0000e+00, 0.0000e+00, 1.5372e-01,
         2.4137e-03, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 4.8242e-02, 4.6354e-02, 0.0000e+00, 1.1302e-01,

In [9]:
print(result2)

tensor([[0.1100, 0.0000, 0.0387, 0.0735, 0.0000, 0.1325, 0.0732, 0.1426],
        [0.3317, 0.4233, 0.0000, 0.0000, 0.0000, 0.2742, 0.0286, 0.0000],
        [0.2384, 0.0000, 0.3095, 0.5176, 0.0000, 0.0041, 0.2575, 0.0000],
        [0.0340, 0.2966, 0.1094, 0.2645, 0.0000, 0.3023, 0.1576, 0.1016],
        [0.2484, 0.0000, 0.1130, 0.3170, 0.0000, 0.1630, 0.0157, 0.0000],
        [0.0000, 0.2036, 0.1824, 0.2717, 0.0000, 0.1807, 0.3817, 0.0000],
        [0.0138, 0.0320, 0.0448, 0.1571, 0.0000, 0.1795, 0.0000, 0.1605],
        [0.1939, 0.1718, 0.1144, 0.0000, 0.0000, 0.2023, 0.0546, 0.0000],
        [0.1113, 0.1772, 0.0090, 0.0792, 0.0060, 0.0597, 0.0000, 0.0000],
        [0.2256, 0.3170, 0.0000, 0.0000, 0.0000, 0.1989, 0.0251, 0.0000],
        [0.2320, 0.1986, 0.0536, 0.0833, 0.0000, 0.2460, 0.0000, 0.0000],
        [0.0463, 0.0000, 0.0521, 0.1150, 0.0000, 0.1174, 0.0000, 0.1539],
        [0.1663, 0.0151, 0.1143, 0.0000, 0.0000, 0.2423, 0.1131, 0.0175],
        [0.0230, 0.0874, 0.2764, 0.408

In [17]:
print(result3)

tensor([[0.0285, 0.1012, 0.0000, 0.2244, 0.0000, 0.0914, 0.1075, 0.0000],
        [0.3209, 0.1905, 0.0000, 0.8378, 0.0000, 0.7812, 0.3445, 0.0000],
        [0.1009, 0.6995, 0.0000, 0.2589, 0.0000, 0.9120, 0.0015, 0.0000],
        [0.0280, 0.3540, 0.0000, 0.1658, 0.0000, 0.4240, 0.0000, 0.0000],
        [0.2523, 0.3608, 0.0000, 0.4188, 0.0000, 0.6673, 0.2220, 0.0000],
        [0.0386, 0.4898, 0.0000, 0.1965, 0.0000, 0.6452, 0.0000, 0.0000],
        [0.0000, 0.1196, 0.0000, 0.0808, 0.0000, 0.1416, 0.0000, 0.0000],
        [0.2145, 0.0607, 0.0000, 0.5439, 0.0000, 0.3928, 0.2487, 0.0000],
        [0.0000, 0.1787, 0.0000, 0.1597, 0.0000, 0.1607, 0.0234, 0.0777],
        [0.2549, 0.0694, 0.0000, 0.6248, 0.0000, 0.5868, 0.2934, 0.0000],
        [0.2426, 0.3294, 0.0000, 0.6245, 0.0000, 0.7101, 0.2497, 0.0000],
        [0.0000, 0.1087, 0.0000, 0.1078, 0.0000, 0.0871, 0.0321, 0.0000],
        [0.1571, 0.0745, 0.0000, 0.4531, 0.0000, 0.1725, 0.1131, 0.0000],
        [0.1001, 0.5876, 0.0000, 0.219