In [1]:
from ToxicMl.evaluation import get_hiv_data
from ToxicMl.trainer import GenericClassificationTrainer
from ToxicMl.metrics import Accuracy, F1, Recall, Precision

import torch
from torch_geometric.loader import ImbalancedSampler

In [2]:
EPOCHS = 10

dataset, train, validation, test = get_hiv_data()
train_metrics = [F1(), Accuracy(), Recall(), Precision()]
validation_metrics = [F1(), Accuracy(), Recall(), Precision()]

  self.data, self.slices = torch.load(self.processed_paths[0])


In [3]:
from ToxicMl.MLmodels.gcn import GCN

model = GCN(9, 16, 2)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.CrossEntropyLoss()
sampler = ImbalancedSampler(train) 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericClassificationTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    None,
    EPOCHS,
    None,
    32
)

trainer.train("HIV base GCN")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mcus-tibor[0m ([33mcus-tibor-none[0m). Use [1m`wandb login --relogin`[0m to force relogin


train epoch: 100%|██████████| 1029/1029.0 [00:04<00:00, 232.10it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 136.82it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:04<00:00, 207.47it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 257.33it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:05<00:00, 189.85it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 183.64it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:05<00:00, 184.93it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 284.97it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:04<00:00, 227.57it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 269.85it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:04<00:00, 226.38it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 253.39it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:05<00:00, 202.74it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 263.70it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:04<00:00, 213.31it/s]
val epoc

0,1
train/Accuracy,▁█████████
train/F1,█▁▁▁▁▁▁▁▁▁
train/Precision,█▁▁▁▁▁▁▁▁▁
train/Recall,█▁▁▁▁▁▁▁▁▁
train/loss,▅▂▂▂▃▃▁▅█▂
validation/Accuracy,▁▁▁▁▁▁▁▁▁▁
validation/F1,▁▁▁▁▁▁▁▁▁▁
validation/Precision,▁▁▁▁▁▁▁▁▁▁
validation/Recall,▁▁▁▁▁▁▁▁▁▁
validation/loss,▁▁▁▁▆▃▁▃█▃

0,1
train/Accuracy,0.9626
train/F1,0.0
train/Precision,0.0
train/Recall,0.0
train/loss,0.31349
validation/Accuracy,0.9684
validation/F1,0.0
validation/Precision,0.0
validation/Recall,0.0
validation/loss,0.37223


In [4]:
from sklearn.utils.class_weight import compute_class_weight

weights = compute_class_weight("balanced", classes=dataset.y.flatten().unique().numpy(), y=dataset.y.flatten().numpy())
weights = torch.tensor(weights, dtype=torch.float)

model = GCN(9, 16, 2)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.CrossEntropyLoss(weight=weights)
sampler = ImbalancedSampler(train) 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericClassificationTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    None,
    EPOCHS,
    None,
    32
)

trainer.train("HIV base GCN weighted loss")



train epoch: 100%|██████████| 1029/1029.0 [00:05<00:00, 193.97it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 261.63it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:05<00:00, 176.13it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 200.48it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:06<00:00, 170.12it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 189.30it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:05<00:00, 187.50it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 233.82it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:06<00:00, 164.54it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 229.36it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:05<00:00, 171.84it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 212.25it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:05<00:00, 191.33it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 226.57it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:06<00:00, 171.19it/s]
val epoc

0,1
train/Accuracy,▆▁████████
train/F1,▅█▂▁▂▁▁▂▂▂
train/Precision,▄█▂▁▂▁▁▁▁▁
train/Recall,██▆▁▇▁▃▄▄▅
train/loss,█▂▂▁▂▁▂▂▁▂
validation/Accuracy,▁█████████
validation/F1,█▂▁▁▁▁▁▁▁▁
validation/Precision,█▁▁▁▁▁▁▁▁▁
validation/Recall,█▆▁▁▁▁▁▁▁▁
validation/loss,▃▁██▁▁▁▁▁▇

0,1
train/Accuracy,0.9568
train/F1,0.00975
train/Precision,0.00568
train/Recall,0.03431
train/loss,0.40339
validation/Accuracy,0.9638
validation/F1,0.0
validation/Precision,0.0
validation/Recall,0.0
validation/loss,0.85951


In [5]:
model = GCN(9, 16, 2)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.CrossEntropyLoss()
sampler = ImbalancedSampler(train) 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericClassificationTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    sampler,
    EPOCHS,
    None,
    32
)

trainer.train("HIV base GCN with sampling")



train epoch: 100%|██████████| 1029/1029.0 [00:05<00:00, 184.18it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 234.69it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:05<00:00, 173.98it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 204.24it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:06<00:00, 154.38it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 233.39it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:06<00:00, 166.05it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 222.88it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:05<00:00, 179.87it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 200.43it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:05<00:00, 179.39it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 261.58it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:05<00:00, 171.72it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 208.66it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:06<00:00, 168.88it/s]
val epoc

0,1
train/Accuracy,▁▆▇█▆▆▇▇▆█
train/F1,█▁▄▇▄▅▄▅▅▇
train/Precision,█▁▂▄▂▃▃▃▃▃
train/Recall,▁▅█▇▇▇▇▆▆█
train/loss,▄▆▂▄▆█▅▆▁▆
validation/Accuracy,█▂▁▁▁▁▁▁▁▁
validation/F1,█▁▂▂▂▂▂▂▂▂
validation/Precision,▁▆████████
validation/Recall,█▁▁▂▁▁▁▁▁▁
validation/loss,▃▆▄▇▃▅▂█▁▆

0,1
train/Accuracy,0.5402
train/F1,0.5351
train/Precision,0.53076
train/Recall,0.53951
train/loss,0.71575
validation/Accuracy,0.5419
validation/F1,0.07647
validation/Precision,0.6
validation/Recall,0.04084
validation/loss,0.70066


In [6]:
from ToxicMl.MLmodels.gcn import ChemGCN
model = ChemGCN(3, 9, 16, 2)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.CrossEntropyLoss()
sampler = ImbalancedSampler(train) 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericClassificationTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    sampler,
    EPOCHS,
    None,
    32
)

trainer.train("HIV ChemGCN 3-16, sampling")



train epoch: 100%|██████████| 1029/1029.0 [00:07<00:00, 137.10it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 197.73it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 116.50it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 190.25it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 121.36it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 189.00it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 127.03it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 204.90it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 120.05it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 202.85it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 121.06it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 204.84it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 127.87it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 194.90it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:07<00:00, 130.17it/s]
val epoc

0,1
train/Accuracy,▁▇▇▇█▇▇█▇▇
train/F1,▁█████▇█▇█
train/Precision,▁███████▇█
train/Recall,▁▅▆▆▆▆▄▇▆█
train/loss,█▆▂▂▄▆▄▆▆▁
validation/Accuracy,▄█▆▅▅▁▅█▅▇
validation/F1,▁▆█▇▆▂▄▆▄▃
validation/Precision,▃▁▅▆▆█▄▁▄▁
validation/Recall,▁▇█▇▆▂▄▇▄▄
validation/loss,▄█▆▅▆█▇▄▅▁

0,1
train/Accuracy,0.5963
train/F1,0.59964
train/Precision,0.59639
train/Recall,0.60293
train/loss,0.47573
validation/Accuracy,0.7471
validation/F1,0.09408
validation/Precision,0.41538
validation/Recall,0.05305
validation/loss,0.55049


In [7]:
torch.save(model.state_dict(), "../saved_models/ChemGCN_HIV_Sampled")

In [8]:
from ToxicMl.MLmodels.gcn import ChemGCN
model = ChemGCN(3, 9, 32, 2)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.CrossEntropyLoss()
sampler = ImbalancedSampler(train) 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericClassificationTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    sampler,
    EPOCHS,
    None,
    32
)

trainer.train("HIV ChemGCN 3-32, sampling")



train epoch: 100%|██████████| 1029/1029.0 [00:07<00:00, 128.94it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 163.04it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 117.66it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 187.30it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:09<00:00, 114.33it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 144.72it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 114.85it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 153.83it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 114.42it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 172.60it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 117.83it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 155.18it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:09<00:00, 114.25it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 163.96it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:09<00:00, 112.85it/s]
val epoc

0,1
train/Accuracy,▁▇▇▇█▇▇█▇▇
train/F1,▁████▇████
train/Precision,▁█▇███████
train/Recall,▁▆▇▆█▆▇█▇▇
train/loss,▂▃▁█▅▆▇▅█▅
validation/Accuracy,▁▅▅▇██▅▃▆▅
validation/F1,▁▅▅▅▆█▅▄▄▅
validation/Precision,█▆▅▁▁▂▅█▂▅
validation/Recall,▁▅▅▆▇█▅▄▅▅
validation/loss,█▅▁▃▄▇▇▄▁▅

0,1
train/Accuracy,0.6141
train/F1,0.61768
train/Precision,0.62553
train/Recall,0.61002
train/loss,0.71483
validation/Accuracy,0.7408
validation/F1,0.1087
validation/Precision,0.5
validation/Recall,0.06098
validation/loss,0.60079


In [9]:
from ToxicMl.MLmodels.gcn import ChemGCN
model = ChemGCN(5, 9, 32, 2)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.CrossEntropyLoss()
sampler = ImbalancedSampler(train) 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericClassificationTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    sampler,
    EPOCHS,
    None,
    32
)

trainer.train("HIV ChemGCN 5-32, sampling")



train epoch: 100%|██████████| 1029/1029.0 [00:10<00:00, 94.99it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 142.52it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:11<00:00, 87.75it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 130.81it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:11<00:00, 87.32it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 143.66it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:11<00:00, 90.94it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 149.44it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:12<00:00, 84.38it/s]
val epoch: 100%|██████████| 129/129.0 [00:01<00:00, 117.42it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:13<00:00, 75.94it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 138.62it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:12<00:00, 80.41it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 132.35it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:12<00:00, 82.52it/s]
val epoch: 100%|

0,1
train/Accuracy,▁▆▇█▇█▇█▇▇
train/F1,▁▆▇████▇▇█
train/Precision,▁▅█████▇▇█
train/Recall,▁▆▅█▇▇▇██▆
train/loss,▃▃▃▅█▃▂▇▁█
validation/Accuracy,▅▁█▂▅▄▅█▄▅
validation/F1,▆▁█▂▇▅▅▆▆▇
validation/Precision,▅▅▄▅█▆▄▁▇▆
validation/Recall,▆▁█▂▇▅▅▇▆▇
validation/loss,▃█▂▅▇▄▂▆▇▁

0,1
train/Accuracy,0.6181
train/F1,0.62155
train/Precision,0.62848
train/Recall,0.61478
train/loss,0.9092
validation/Accuracy,0.6822
validation/F1,0.10295
validation/Precision,0.57692
validation/Recall,0.05652
validation/loss,0.53387


In [10]:
from ToxicMl.MLmodels.gcn import ChemGCN
model = ChemGCN(5, 9, 64, 2)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.CrossEntropyLoss()
sampler = ImbalancedSampler(train) 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericClassificationTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    sampler,
    EPOCHS,
    None,
    32
)

trainer.train("HIV ChemGCN 5-64, sampling")



train epoch: 100%|██████████| 1029/1029.0 [00:14<00:00, 70.97it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 137.61it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:14<00:00, 69.94it/s]
val epoch: 100%|██████████| 129/129.0 [00:01<00:00, 110.00it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:15<00:00, 64.82it/s]
val epoch: 100%|██████████| 129/129.0 [00:01<00:00, 124.88it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:14<00:00, 72.81it/s]
val epoch: 100%|██████████| 129/129.0 [00:01<00:00, 127.33it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:14<00:00, 68.81it/s]
val epoch: 100%|██████████| 129/129.0 [00:01<00:00, 126.26it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:15<00:00, 65.88it/s]
val epoch: 100%|██████████| 129/129.0 [00:01<00:00, 125.07it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:14<00:00, 71.95it/s]
val epoch: 100%|██████████| 129/129.0 [00:01<00:00, 116.27it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:14<00:00, 70.29it/s]
val epoch: 100%|

0,1
train/Accuracy,▁███████▇█
train/F1,▁█▇▆▇▇▇▆▅▇
train/Precision,▁█▃▃▅▅▄▃▂▄
train/Recall,▁▇████▇█▇█
train/loss,▃▂▃▇▁█▆▄▄▅
validation/Accuracy,▁▄▆▅▅▇▅█▆▅
validation/F1,▃▄▅▁▅▇▅▆█▃
validation/Precision,█▆▅▁▆▅▆▃█▃
validation/Recall,▂▄▅▁▅█▅▇█▃
validation/loss,▄▆▃▂▁▁▆▃█▆

0,1
train/Accuracy,0.612
train/F1,0.59759
train/Precision,0.57557
train/Recall,0.62137
train/loss,0.69116
validation/Accuracy,0.7209
validation/F1,0.0989
validation/Precision,0.48462
validation/Recall,0.05507
validation/loss,0.64854


In [11]:
from ToxicMl.MLmodels.sage import ChemSage
model = ChemSage(3, 9, 16, 2)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.CrossEntropyLoss()
sampler = ImbalancedSampler(train) 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericClassificationTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    sampler,
    EPOCHS,
    None,
    32
)

trainer.train("HIV ChemSage 3-16, sampling")



train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 121.83it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 187.33it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:09<00:00, 104.73it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 175.29it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:07<00:00, 129.59it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 195.22it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 125.74it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 171.84it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 120.11it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 212.43it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 128.08it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 236.03it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 127.57it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 227.88it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 127.09it/s]
val epoc

0,1
train/Accuracy,▃█▂▅▂▂▁▃▃▁
train/F1,▁▁▁▁▁▁▁▁▁▁
train/Precision,▁▁▁▁▁▁▁▁▁▁
train/Recall,▁▁▁▁▁▁▁▁▁▁
train/loss,▃▃▇▄▅▁█▅▅▅
validation/Accuracy,▁▁▁▁▁▁▁▁▁▁
validation/F1,▁▁▁▁▁▁▁▁▁▁
validation/Precision,▁▁▁▁▁▁▁▁▁▁
validation/Recall,▁▁▁▁▁▁▁▁▁▁
validation/loss,█▆█▄▄▃▄▅▁▄

0,1
train/Accuracy,0.4968
train/F1,0.0
train/Precision,0.0
train/Recall,0.0
train/loss,0.69527
validation/Accuracy,0.9684
validation/F1,0.0
validation/Precision,0.0
validation/Recall,0.0
validation/loss,0.56629


In [12]:
from ToxicMl.MLmodels.sage import ChemSage
model = ChemSage(3, 9, 32, 2)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.CrossEntropyLoss()
sampler = ImbalancedSampler(train) 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericClassificationTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    sampler,
    EPOCHS,
    None,
    32
)

trainer.train("HIV ChemSage 3-32, sampling")



train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 127.49it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 181.20it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:09<00:00, 107.04it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 195.65it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:09<00:00, 110.81it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 213.15it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:09<00:00, 108.70it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 183.68it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:09<00:00, 105.59it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 184.51it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:09<00:00, 112.19it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 194.94it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:09<00:00, 112.97it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 207.60it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:08<00:00, 115.55it/s]
val epoc

0,1
train/Accuracy,▁▇▇█▆▆▆▇▇▇
train/F1,█▃▂▂▁▁▁▁▁▁
train/Precision,█▃▂▁▁▁▁▁▁▁
train/Recall,▁▆▇██▇▇▇▇█
train/loss,▁▂▁▄▄█▇▄▂▄
validation/Accuracy,▃▅▆█▁▇▆▄▆▂
validation/F1,▃▅▇█▁▅▇▆▄▃
validation/Precision,▆▅▆▂▆▂▇█▁▇
validation/Recall,▃▅▆█▁▅▇▆▄▃
validation/loss,▆▄▆▁▆▁█▂▃▂

0,1
train/Accuracy,0.6115
train/F1,0.54524
train/Precision,0.46794
train/Recall,0.65314
train/loss,0.70631
validation/Accuracy,0.789
validation/F1,0.12146
validation/Precision,0.46154
validation/Recall,0.06993
validation/loss,0.68929


In [13]:
from ToxicMl.MLmodels.sage import ChemSage
model = ChemSage(5, 9, 32, 2)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.CrossEntropyLoss()
sampler = ImbalancedSampler(train) 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericClassificationTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    sampler,
    EPOCHS,
    None,
    32
)

trainer.train("HIV ChemSage 5-32, sampling")



train epoch: 100%|██████████| 1029/1029.0 [00:09<00:00, 105.18it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 158.44it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:11<00:00, 91.70it/s] 
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 177.28it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:10<00:00, 94.08it/s] 
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 191.55it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:11<00:00, 91.98it/s] 
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 183.21it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:11<00:00, 92.20it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 183.82it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:10<00:00, 95.99it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 152.16it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:12<00:00, 85.11it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 160.27it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:11<00:00, 90.02it/s]
val epoch: 1

0,1
train/Accuracy,▁▆█▄▂▁█▆▅▆
train/F1,█▁▁▂▁▁▁▁▁▁
train/Precision,█▁▁▂▁▁▁▁▁▁
train/Recall,▄▁▁█▁▁▁▁▁▁
train/loss,▄▆▃▅▄▂▁▄▄█
validation/Accuracy,▁▁▁▁▁▁▁▁▁▁
validation/F1,▁▁▁▁▁▁▁▁▁▁
validation/Precision,▁▁▁▁▁▁▁▁▁▁
validation/Recall,▁▁▁▁▁▁▁▁▁▁
validation/loss,▄█▅▅█▄▅▁▇█

0,1
train/Accuracy,0.5005
train/F1,0.0
train/Precision,0.0
train/Recall,0.0
train/loss,0.75377
validation/Accuracy,0.9684
validation/F1,0.0
validation/Precision,0.0
validation/Recall,0.0
validation/loss,0.62308


In [14]:
from ToxicMl.MLmodels.sage import ChemSage
model = ChemSage(5, 9, 64, 2)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.CrossEntropyLoss()
sampler = ImbalancedSampler(train) 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericClassificationTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    sampler,
    EPOCHS,
    None,
    32
)

trainer.train("HIV ChemSage 5-64, sampling")



train epoch: 100%|██████████| 1029/1029.0 [00:12<00:00, 82.51it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 145.30it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:13<00:00, 78.43it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 151.74it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:13<00:00, 78.64it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 160.37it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:13<00:00, 74.49it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 136.02it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:14<00:00, 71.21it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 147.74it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:14<00:00, 70.49it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 155.08it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:13<00:00, 75.10it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 164.64it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:14<00:00, 71.96it/s]
val epoch: 100%|

0,1
train/Accuracy,▁█▇▇█▇███▇
train/F1,▁▃▃▁▅▃▄▅█▄
train/Precision,█▁▂▁▂▂▂▂▃▂
train/Recall,▁█▇▇█▇███▇
train/loss,▁▆▄▂▆▄▅▄█▃
validation/Accuracy,█▄▆▄▂▃▁▆▃▃
validation/F1,█▆▃▅▂▃▁█▂▅
validation/Precision,▂▇▁▇▇▇█▆▆█
validation/Recall,█▅▄▅▂▃▁▇▂▄
validation/loss,▂▂▁▆█▅▄▂▃▃

0,1
train/Accuracy,0.6273
train/F1,0.59434
train/Precision,0.54622
train/Recall,0.65177
train/loss,0.59946
validation/Accuracy,0.7622
validation/F1,0.11573
validation/Precision,0.49231
validation/Recall,0.06557
validation/loss,0.59171


In [3]:
from ToxicMl.MLmodels.attention import ChemAttention
model = ChemAttention(3, 9, 16, 2)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.CrossEntropyLoss()
sampler = ImbalancedSampler(train) 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericClassificationTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    sampler,
    EPOCHS,
    None,
    32
)

trainer.train("HIV ChemAttention 3-16, sampling")

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mcus-tibor[0m ([33mcus-tibor-none[0m). Use [1m`wandb login --relogin`[0m to force relogin


train epoch: 100%|██████████| 1029/1029.0 [00:10<00:00, 97.15it/s]
val epoch: 100%|██████████| 129/129.0 [00:01<00:00, 109.64it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:10<00:00, 97.84it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 199.42it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:09<00:00, 104.47it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 199.89it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:10<00:00, 99.62it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 184.61it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:10<00:00, 102.07it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 155.01it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:11<00:00, 86.81it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 172.73it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:12<00:00, 84.91it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 146.48it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:12<00:00, 85.07it/s]
val epoch: 100

0,1
train/Accuracy,▁▇▇█▇▇▇▇▆▇
train/F1,█▁▄▄▃▃▄▃▂▃
train/Precision,█▁▃▃▂▂▃▂▂▂
train/Recall,▁█▇▇▇▇▇▇▇▇
train/loss,▁▆▆▂▇▃█▅▄▆
validation/Accuracy,█▇▃▇▁▅▂▄█▇
validation/F1,▁▅▃▅▄▄▁▁▇█
validation/Precision,▁▄▆▅█▅▅▃▅▆
validation/Recall,▂▅▃▆▃▄▁▁██
validation/loss,▃▃▃▇▅█▁▂▆▄

0,1
train/Accuracy,0.5912
train/F1,0.54556
train/Precision,0.48668
train/Recall,0.62065
train/loss,0.75885
validation/Accuracy,0.7941
validation/F1,0.1277
validation/Precision,0.47692
validation/Recall,0.07372
validation/loss,0.72007


In [4]:
model = ChemAttention(3, 9, 32, 2)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.CrossEntropyLoss()
sampler = ImbalancedSampler(train) 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericClassificationTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    sampler,
    EPOCHS,
    None,
    32
)

trainer.train("HIV ChemAttention 3-32, sampling")



train epoch: 100%|██████████| 1029/1029.0 [00:10<00:00, 94.53it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 166.88it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:12<00:00, 81.38it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 163.48it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:12<00:00, 83.67it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 170.59it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:12<00:00, 83.04it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 164.29it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:12<00:00, 84.32it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 167.65it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:12<00:00, 84.01it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 149.92it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:12<00:00, 84.30it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 166.26it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:12<00:00, 83.49it/s]
val epoch: 100%|

0,1
train/Accuracy,▄▆█▆▇▅▄▁▆▁
train/F1,█▁▁▁▁▁▁▁▁▁
train/Precision,█▁▁▁▁▁▁▁▁▁
train/Recall,▄▁▅▁██▁▁▁▁
train/loss,▃▄▁█▆▄▅▇▃▆
validation/Accuracy,▁▁▁▁▁▁▁▁▁▁
validation/F1,▁▁▁▁▁▁▁▁▁▁
validation/Precision,▁▁▁▁▁▁▁▁▁▁
validation/Recall,▁▁▁▁▁▁▁▁▁▁
validation/loss,▃▃▅▂▆█▃▃▁▄

0,1
train/Accuracy,0.4925
train/F1,0.0
train/Precision,0.0
train/Recall,0.0
train/loss,0.70694
validation/Accuracy,0.9684
validation/F1,0.0
validation/Precision,0.0
validation/Recall,0.0
validation/loss,0.53523


In [6]:
model = ChemAttention(3, 9, 32, 2)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.CrossEntropyLoss()
sampler = ImbalancedSampler(train) 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericClassificationTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    sampler,
    EPOCHS,
    None,
    32
)

trainer.train("HIV ChemAttention 5-32, sampling")



train epoch: 100%|██████████| 1029/1029.0 [00:11<00:00, 89.28it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 191.30it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:11<00:00, 88.24it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 160.28it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:11<00:00, 88.99it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 194.44it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:12<00:00, 85.57it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 136.24it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:12<00:00, 80.69it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 160.81it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:14<00:00, 71.25it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 161.44it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:13<00:00, 77.14it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 153.28it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:14<00:00, 73.25it/s]
val epoch: 100%|

0,1
train/Accuracy,▁▆█▇▇█▇▇▇▇
train/F1,▁▅▆█▅█▇▆▅▇
train/Precision,█▃▁▅▁▅▄▃▂▅
train/Recall,▁▆█▇▇▇▇▇▇▇
train/loss,█▁▇▅▅▄██▂▂
validation/Accuracy,█▇▄▃▂▅▁▇▂▅
validation/F1,█▄▃▃▄▄▁█▂▇
validation/Precision,▃▁▅▆█▃▆▄▆▆
validation/Recall,█▅▃▃▃▄▁▇▂▆
validation/loss,▁▃▆▅█▂▁▃█▅

0,1
train/Accuracy,0.615
train/F1,0.56874
train/Precision,0.50453
train/Recall,0.65169
train/loss,0.45917
validation/Accuracy,0.8456
validation/F1,0.15446
validation/Precision,0.44615
validation/Recall,0.0934
validation/loss,0.60421


In [7]:
model = ChemAttention(3, 9, 64, 2)
optimizer = torch.optim.Adam(model.parameters())
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = 0.09)
loss_fn = torch.nn.CrossEntropyLoss()
sampler = ImbalancedSampler(train) 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

trainer = GenericClassificationTrainer(
    model,
    optimizer,
    loss_fn,
    lr_scheduler,
    train_metrics,
    validation_metrics,
    train,
    validation,
    test,
    device,
    sampler,
    EPOCHS,
    None,
    32
)

trainer.train("HIV ChemAttention 5-64, sampling")



train epoch: 100%|██████████| 1029/1029.0 [00:18<00:00, 54.75it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 132.13it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:16<00:00, 60.55it/s]
val epoch: 100%|██████████| 129/129.0 [00:01<00:00, 102.87it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:15<00:00, 64.95it/s]
val epoch: 100%|██████████| 129/129.0 [00:01<00:00, 126.53it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:16<00:00, 61.55it/s]
val epoch: 100%|██████████| 129/129.0 [00:01<00:00, 116.88it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:16<00:00, 63.20it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 133.01it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:16<00:00, 63.76it/s]
val epoch: 100%|██████████| 129/129.0 [00:01<00:00, 127.39it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:16<00:00, 63.07it/s]
val epoch: 100%|██████████| 129/129.0 [00:00<00:00, 139.97it/s]
train epoch: 100%|██████████| 1029/1029.0 [00:16<00:00, 62.52it/s]
val epoch: 100%|

0,1
train/Accuracy,▁▆█▇▆█▆▆█▇
train/F1,▁▆▇▇▆█▆▆▇▆
train/Precision,▁█▆▆▆█▅▅▇▅
train/Recall,▁▅█▇▆█▆▆▇▇
train/loss,▇█▃▅▁▂▁▅▇▇
validation/Accuracy,▄█▅▁▃▃▂▅▃▇
validation/F1,▂█▅▁▄▂▂▄▁▆
validation/Precision,▂▄▄▅█▃▅▂▁▂
validation/Recall,▂█▄▁▃▂▂▄▁▆
validation/loss,▃▅▃█▁█▂▂▂▁

0,1
train/Accuracy,0.6472
train/F1,0.61157
train/Precision,0.55401
train/Recall,0.68247
train/loss,0.76952
validation/Accuracy,0.8634
validation/F1,0.15868
validation/Precision,0.40769
validation/Recall,0.09851
validation/loss,0.48131
