In [1]:
from ToxicMl.trainer import GenericRegressionTrainer
from ToxicMl.metrics import MAE, MSE, MaxError

import torch
from ToxicMl.dataset import LipoDataset

import torch
from pathlib import Path

root = Path("dataset/ogbg_mollipo_custom")
EPOCHS = 8

In [2]:
dataset = LipoDataset(root)
split_idx = dataset.get_idx_split() 
train = dataset[split_idx["train"]]
validation = dataset[split_idx["valid"]]
test = dataset[split_idx["test"]]

train_metrics = [MAE(), MSE(), MaxError()]
validation_metrics = [MAE(), MSE(), MaxError()]
EPOCHS = 10

In [8]:
from ToxicMl.MLmodels.gcn import ChemConvBlock
import torch
import torch.nn.functional as F

from torch.nn import Linear, ReLU
from torch_geometric.data import Data
from torch_geometric.nn.pool import global_max_pool

from torch.nn import BatchNorm1d

class ChemConvDescriptorsReg(torch.nn.Module):
    def __init__(self, embedder_depth, in_channels, hidden_channels, out_channels, in_descriptors):
        super().__init__()
        self.in_descriptors = in_descriptors
        self.encoder = torch.nn.ModuleList([
            ChemConvBlock(in_channels=in_channels, out_channels=hidden_channels)
            if i == 0
            else ChemConvBlock(in_channels=hidden_channels, out_channels=hidden_channels)
            for i in range(embedder_depth)])
        self.norm1 = BatchNorm1d(hidden_channels+in_descriptors)
        self.linear1 = Linear(hidden_channels+in_descriptors, 64)
        self.relu1 = ReLU(inplace=True)
        self.linear2 = Linear(64, 32)
        self.relu2 = ReLU(inplace=True)
        self.linear3 = Linear(32, out_channels)
        self.relu3 = ReLU(inplace=True)
        

    def forward(self, data: Data):
        for module in self.encoder:
            data = module(data)
        x = global_max_pool(data.x, data.batch)


        d = data.descriptors.view(-1, self.in_descriptors)
        x = torch.cat([x,d], dim=1)
        x = self.norm1(x)
        x = self.linear1(x)
        x = self.relu1(x)
        x = self.linear2(x)
        x = self.relu2(x)
        x = self.linear3(x)
        x = self.relu3(x)
        return x
    

In [10]:
from ToxicMl.MLmodels.gcn import GCNReg, ChemGCNReg
model = ChemConvDescriptorsReg(3, 133, 16, 1, 200)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.MSELoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericRegressionTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    None,
    EPOCHS,
    None,
    32
)

trainer.train("LIPO GCN 3-16, custom dataset, descriptor")



  return F.mse_loss(input, target, reduction=self.reduction)
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 157.59it/s]
  return F.mse_loss(input, target, reduction=self.reduction)
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 237.90it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 138.89it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 232.49it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 149.71it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 225.44it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 132.13it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 232.77it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 142.96it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 253.44it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 142.93it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 194.00it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 137.52it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<0

0,1
train/MAE,█▁▁▁▁▁▁▁▁▁
train/MSE,█▁▁▁▁▁▁▁▁▁
train/Max Error,█▂▃▂▁▂▂▁▁▂
train/loss,█▁▁▂▂▂▂▂▃▃
validation/MAE,▄▄▃▅█▄▁▅▂▂
validation/MSE,▁█▇██▇▆█▆▇
validation/Max Error,▁█▆▇▆▇▆▇▆▇
validation/loss,▁▁▁▁▁▁█▁▁▁

0,1
train/MAE,0.97885
train/MSE,1.48639
train/Max Error,4.72509
train/loss,1.38589
validation/MAE,1.19882
validation/MSE,10.49697
validation/Max Error,60.27538
validation/loss,1.58488


In [11]:
model = ChemConvDescriptorsReg(3, 133, 32, 1, 200)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.MSELoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericRegressionTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    None,
    EPOCHS,
    None,
    32
)

trainer.train("LIPO GCN 3-32, custom dataset, descriptor")



  return F.mse_loss(input, target, reduction=self.reduction)
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 134.65it/s]
  return F.mse_loss(input, target, reduction=self.reduction)
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 228.56it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 140.02it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 231.07it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 146.04it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 230.93it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 142.26it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 229.94it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 133.27it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 253.19it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 138.97it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 235.60it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 133.00it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<0

0,1
train/MAE,█▁▁▁▁▁▁▁▁▁
train/MSE,█▁▁▁▁▁▁▁▁▁
train/Max Error,█▁▁▂▁▁▁▁▁▂
train/loss,█▂▂▄▂▃▁▁▁▃
validation/MAE,▁▇▅▆▆▇▇▇█▅
validation/MSE,▁██▇▇███▇▇
validation/Max Error,▁██▇▇███▇█
validation/loss,▂▂▃█▂▄▁▄▂▁

0,1
train/MAE,0.99053
train/MSE,1.52832
train/Max Error,4.16707
train/loss,1.43532
validation/MAE,1.55259
validation/MSE,118.19404
validation/Max Error,221.23167
validation/loss,0.54184


In [12]:
model = ChemConvDescriptorsReg(3, 133, 64, 1, 200)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.MSELoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericRegressionTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    None,
    EPOCHS,
    None,
    32
)

trainer.train("LIPO GCN 3-64, custom dataset, descriptor")



  return F.mse_loss(input, target, reduction=self.reduction)
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 109.18it/s]
  return F.mse_loss(input, target, reduction=self.reduction)
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 206.37it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 112.75it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 214.85it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 114.79it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 268.91it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 138.78it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 211.88it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 123.56it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 242.51it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 130.90it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 258.97it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 136.44it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<0

0,1
train/MAE,█▁▁▁▁▁▁▁▁▁
train/MSE,█▁▁▁▁▁▁▁▁▁
train/Max Error,█▃▂▂▂▃▁▂▂▁
train/loss,█▂▁▂▂▃▁▂▁▂
validation/MAE,▇▄▄▆▄▆▄▁█▄
validation/MSE,▁▇███▆▆▄█▇
validation/Max Error,▁█▇▇▇▅▅▄▄▇
validation/loss,█▃▃▁▄▅▂▃▄█

0,1
train/MAE,0.99044
train/MSE,1.50873
train/Max Error,3.89094
train/loss,1.39478
validation/MAE,1.15028
validation/MSE,4.75795
validation/Max Error,33.74727
validation/loss,3.35059


In [13]:
model = ChemConvDescriptorsReg(5, 133, 16, 1, 200)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.MSELoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericRegressionTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    None,
    EPOCHS,
    None,
    32
)

trainer.train("LIPO GCN 5-16, custom dataset, descriptor")



  return F.mse_loss(input, target, reduction=self.reduction)
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 112.73it/s]
  return F.mse_loss(input, target, reduction=self.reduction)
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 191.21it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 107.72it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 197.89it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 113.73it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 192.71it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 124.52it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 221.55it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 131.95it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 190.01it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 105.09it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 185.20it/s]
train epoch: 100%|██████████| 105/105.0 [00:01<00:00, 104.10it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<0

0,1
train/MAE,█▁▁▁▁▁▁▁▁▁
train/MSE,█▁▁▁▁▁▁▁▁▁
train/Max Error,█▁▁▁▁▂▂▂▂▁
train/loss,█▁▄▃▂▄▃▄▃▂
validation/MAE,▆█▁▂▇▅▅▆▁▆
validation/MSE,▂█▃▁▅▄▃▄▃▅
validation/Max Error,▂█▄▁▃▄▂▄▄▅
validation/loss,▁▂▃▁█▅▂▃▄▂

0,1
train/MAE,0.98207
train/MSE,1.49246
train/Max Error,4.52619
train/loss,1.39088
validation/MAE,1.16542
validation/MSE,5.65767
validation/Max Error,39.3958
validation/loss,0.485


In [14]:
model = ChemConvDescriptorsReg(5, 133, 32, 1, 200)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.MSELoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericRegressionTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    None,
    EPOCHS,
    None,
    32
)

trainer.train("LIPO GCN 5-32, custom dataset, descriptor")



  return F.mse_loss(input, target, reduction=self.reduction)
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 105.83it/s]
  return F.mse_loss(input, target, reduction=self.reduction)
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 179.31it/s]
train epoch: 100%|██████████| 105/105.0 [00:01<00:00, 103.47it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 184.22it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 124.89it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 219.34it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 111.96it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 185.08it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 114.90it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 185.41it/s]
train epoch: 100%|██████████| 105/105.0 [00:00<00:00, 112.65it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 215.29it/s]
train epoch: 100%|██████████| 105/105.0 [00:01<00:00, 98.64it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00

0,1
train/MAE,█▁▁▁▁▁▁▁▁▁
train/MSE,█▁▁▁▁▁▁▁▁▁
train/Max Error,█▂▂▂▂▂▁▂▃▂
train/loss,█▁▂▄▂▂▂▃▂▁
validation/MAE,▁▅▅▆▅▆█▅▆▃
validation/MSE,▁▅▅▅▄▅█▄▅▄
validation/Max Error,▁██▇█▇▇█▇▇
validation/loss,▁▁▁▁█▁▁▁▁▁

0,1
train/MAE,0.98279
train/MSE,1.49421
train/Max Error,4.60001
train/loss,1.3899
validation/MAE,1.29059
validation/MSE,15.14099
validation/Max Error,73.50523
validation/loss,0.6377


In [15]:
model = ChemConvDescriptorsReg(5, 133, 64, 1, 200)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.MSELoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericRegressionTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    None,
    EPOCHS,
    None,
    32
)

trainer.train("LIPO GCN 5-64, custom dataset, descriptor")



  return F.mse_loss(input, target, reduction=self.reduction)
train epoch: 100%|██████████| 105/105.0 [00:01<00:00, 92.42it/s]
  return F.mse_loss(input, target, reduction=self.reduction)
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 93.33it/s]
train epoch: 100%|██████████| 105/105.0 [00:01<00:00, 90.60it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 155.97it/s]
train epoch: 100%|██████████| 105/105.0 [00:01<00:00, 74.55it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 117.77it/s]
train epoch: 100%|██████████| 105/105.0 [00:01<00:00, 66.95it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 98.45it/s] 
train epoch: 100%|██████████| 105/105.0 [00:01<00:00, 64.18it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 121.00it/s]
train epoch: 100%|██████████| 105/105.0 [00:01<00:00, 63.41it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 121.87it/s]
train epoch: 100%|██████████| 105/105.0 [00:01<00:00, 67.03it/s]
val epoch: 100%|██████████| 14/14.0 [00:00<00:00, 12

0,1
train/MAE,█▁▁▁▁▁▁▁▁▁
train/MSE,█▁▁▁▁▁▁▁▁▁
train/Max Error,█▃▂▁▂▂▃▃▂▃
train/loss,█▁▃▇▄▂▅▁▄▂
validation/MAE,▂▂▂▁▅█▃▃▅▇
validation/MSE,▁█▆▆▆█▇▇▇▆
validation/Max Error,▁█▇▇▇▇▇▇▇▇
validation/loss,▄▃▆█▅▂▁▄▇▇

0,1
train/MAE,0.9834
train/MSE,1.50239
train/Max Error,4.85438
train/loss,1.39243
validation/MAE,1.42404
validation/MSE,20.00425
validation/Max Error,68.46175
validation/loss,1.29894
