In [None]:
"""
Deep learning models which were used in this research

1. MLP (Multi-Layer Perceptron - Default)

2. Transformer(Encoder block - Description model)

Created by Jaehyeon Park
"""

import torch
import torch.nn as nn

In [3]:
class tox_mlp(nn.Module):
    def __init__(self, input_size, dropout):
        super(tox_mlp, self).__init__()
        self.total_layer = nn.Sequential(
            nn.Linear(input_size, input_size),
            nn.BatchNorm1d(input_size),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(input_size, 256),
            nn.BatchNorm1d(256),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.total_layer(x)

In [8]:
mlp_model = tox_mlp(input_size=512, dropout=0.2)

print(mlp_model)

tox_mlp(
  (total_layer): Sequential(
    (0): Linear(in_features=512, out_features=512, bias=True)
    (1): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): GELU(approximate='none')
    (3): Dropout(p=0.2, inplace=False)
    (4): Linear(in_features=512, out_features=256, bias=True)
    (5): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): GELU(approximate='none')
    (7): Dropout(p=0.2, inplace=False)
    (8): Linear(in_features=256, out_features=128, bias=True)
    (9): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): GELU(approximate='none')
    (11): Dropout(p=0.2, inplace=False)
    (12): Linear(in_features=128, out_features=64, bias=True)
    (13): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (14): GELU(approximate='none')
    (15): Dropout(p=0.2, inplace=False)
    (16): Linear(in_features=64, out_features=32, bias=Tru

In [4]:
class tox_transformer(nn.Module):
    def __init__(self, feature_dim, d_model, nhead, num_layers, dropout):
        super(tox_transformer, self).__init__()
        
        self.linear = nn.Linear(1, 2)

        self.dim_transform = nn.Linear(feature_dim, d_model)
        
        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=4*d_model,
            dropout=dropout,
            batch_first=True,
            activation="gelu"            
        )
        self.transformer_encoder = nn.TransformerEncoder(
            self.encoder_layer, 
            num_layers=num_layers,
            enable_nested_tensor=False)
        self.avg_pool = nn.AdaptiveAvgPool1d(1)

        self.output_linear = nn.Linear(d_model, 1)
    
    def forward(self, x):
        x = x.unsqueeze(-1)  # [B, Fd, 1]
        x = self.linear(x)   # [B, Fd, 2]
        x = x.permute(0, 2, 1)  # [B, 2, Fd]
        
        x = self.dim_transform(x)  # [B, 2, d_model]
        
        x = self.transformer_encoder(x)  # [B, 2, d_model]
        x = x.permute(0, 2, 1)  # [B, d_model, 2]
        x = self.avg_pool(x)    # [B, d_model, 1]
        x = x.squeeze(-1)       # [B, d_model]
        x = self.output_linear(x)  # [B, 1]
        return x

In [9]:
transformer_model = tox_transformer(feature_dim=512, 
                                    d_model=256, 
                                    nhead=2, 
                                    num_layers=2,
                                    dropout=0.2)

print(transformer_model)

tox_transformer(
  (linear): Linear(in_features=1, out_features=2, bias=True)
  (dim_transform): Linear(in_features=512, out_features=256, bias=True)
  (encoder_layer): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
    )
    (linear1): Linear(in_features=256, out_features=1024, bias=True)
    (dropout): Dropout(p=0.2, inplace=False)
    (linear2): Linear(in_features=1024, out_features=256, bias=True)
    (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.2, inplace=False)
    (dropout2): Dropout(p=0.2, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-1): 2 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
