### Try with AugLiChem

In [1]:
import sys
from tqdm import tqdm
sys.path.append(sys.path[0][:-8])

import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
torch.manual_seed(8)

from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import OneHotEncoder

from rdkit import Chem

In [2]:
from auglichem.molecule.data import MoleculeDatasetWrapper
from auglichem.molecule import RandomAtomMask, RandomBondDelete, Compose
from auglichem.molecule.models import AttentiveFP

from auglichem.molecule.models.AttentiveLayers import Fingerprint
from auglichem.molecule.models.getFeatures import save_smiles_dicts, get_smiles_array

from auglichem.molecule.models import AttentiveFP as AFP

ModuleNotFoundError: No module named 'auglichem.molecule.models.AttentiveLayers'

In [None]:
task_name = 'ClinTox'
tasks = ['CT_TOX']

random_seed = 88

batch_size = 100
epochs = 800
p_dropout = 0.5
fingerprint_dim = 200

radius = 3
T = 3
weight_decay = 3 # also known as l2_regularization_lambda
learning_rate = 3.5
per_task_output_units_num = 2 # for classification model with 2 classes
output_units_num = len(tasks) * per_task_output_units_num

In [None]:
transform = Compose([
    RandomAtomMask(1.),
    RandomBondDelete([0., 0.2])
])
dataset = MoleculeDatasetWrapper('BACE', transform=transform, aug_time=1, batch_size=batch_size)
print(dataset.batch_size)

### Torch Geometric AFP Implementation

In [None]:
train_loader, val_loader, test_loader = dataset.get_data_loaders()

In [None]:
model = AFP(num_layers=radius, num_timesteps=T, dropout=p_dropout, hidden_channels=fingerprint_dim, out_channels=output_units_num, edge_dim=2, in_channels=2)
#their_model = model = Fingerprint(radius, T, 2, 2,
            #fingerprint_dim, output_units_num, p_dropout)

In [None]:
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print(params)
for idx, (name, param) in enumerate(model.named_parameters()):
    if param.requires_grad:
        print(idx, name, param.data.shape)
#print(model)

In [None]:
def evaluate(model, test_loader):
    with torch.no_grad():
        model.eval()
        data = next(iter(test_loader))
        
        # Get data
        x = data.x
        edge_index = data.edge_index
        edge_attr = data.edge_attr
        
        # Predict
        pred = model(x.float(), edge_index, edge_attr, data.batch)
        
        loss = criterion(pred, data.y.flatten())

    score = roc_auc_score(data.y, pred.detach()[:,1])
    print("TEST LOSS: {0:.3f}, ROC-AUC: {1:.3f}".format(loss.detach(), score))
    
def validate(model, val_loader):
    with torch.no_grad():
        model.eval()
        data = next(iter(val_loader))
        
        # Get data
        x = data.x
        edge_index = data.edge_index
        edge_attr = data.edge_attr
        
        # Predict
        pred = model(x.float(), edge_index, edge_attr, data.batch)
        
        loss = criterion(pred, data.y.flatten())

    score = roc_auc_score(data.y, pred.detach()[:,1])
    print("VALIDATION LOSS: {0:.3f}, ROC-AUC: {1:.3f}".format(loss.detach(), score))

In [None]:
optimizer = optim.Adam(model.parameters(), 10**-learning_rate, weight_decay=10**-weight_decay)
criterion = nn.CrossEntropyLoss()

In [None]:
for epoch in range(epochs):
    for bn, data in tqdm(enumerate(train_loader)):
        optimizer.zero_grad()

        # Get data
        x = data.x
        edge_index = data.edge_index
        edge_attr = data.edge_attr
        
        # Predict
        pred = model(x.float(), edge_index, edge_attr, data.batch)
        
        # Update
        loss = criterion(pred, data.y.flatten())
        loss.backward()
        optimizer.step()
    print("EPOCH:\t{}".format(epoch))
    validate(model, val_loader)
    evaluate(model, test_loader)