# Molecular Property Prediction by ChemProp

In [8]:
import io
import sys
import zipfile

import pandas as pd
import requests
import torch
import torch.nn as nn
from ogb.graphproppred import Evaluator, PygGraphPropPredDataset
from sklearn.metrics import accuracy_score, roc_auc_score
from torch_geometric.loader import DataLoader
from tqdm import tqdm

from pyg_chemprop import DMPNNEncoder, RevIndexedDataset
from pyg_chemprop_utils import FeatureScaler, NoamLR, initialize_weights, smiles2data

In [9]:
data_dir = "./data"

## version check

In [10]:
import torch
torch.__version__

'2.2.0'

In [11]:
import torch_geometric
torch_geometric.__version__

'2.5.0'

In [12]:
import torch_scatter
torch_scatter.__version__

'2.1.2'

In [13]:
import ogb
ogb.__version__

'1.3.6'

## preparation

In [14]:
pyg_dataset = PygGraphPropPredDataset(name="CHNOPS2743-HARMONIC-processed-scaled", root=data_dir)
split_idx = pyg_dataset.get_idx_split()
pyg_dataset.task_type

ValueError: Invalid dataset name CHNOPS2743-HARMONIC-processed-scaled.
Available datasets are as follows:
ogbg-molbace
ogbg-molbbbp
ogbg-molclintox
ogbg-molmuv
ogbg-molpcba
ogbg-molsider
ogbg-moltox21
ogbg-moltoxcast
ogbg-molhiv
ogbg-molesol
ogbg-molfreesolv
ogbg-mollipo
ogbg-molchembl
ogbg-ppa
ogbg-code2

In [8]:
dataset = RevIndexedDataset(pyg_dataset)

100%|██████████| 41127/41127 [01:04<00:00, 638.09it/s]


In [9]:
batch_size = 50

train_data = torch.utils.data.Subset(dataset, split_idx["train"])
valid_data = torch.utils.data.Subset(dataset, split_idx["valid"])
test_data = torch.utils.data.Subset(dataset, split_idx["test"])

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [10]:
def train(config, loader, device=torch.device("cpu")):
    criterion = config["loss"]
    model = config["model"]
    optimizer = config["optimizer"]
    scheduler = config["scheduler"]

    model = model.to(device)
    model.train()
    for batch in tqdm(loader, total=len(loader)):
        batch = batch.to(device)
        optimizer.zero_grad()
        out = model(batch)
        loss = criterion(out, batch.y.float())
        loss.backward()
        optimizer.step()
        scheduler.step()

In [11]:
def make_prediction(config, loader, device=torch.device("cpu")):
    model = config["model"]

    model = model.to(device)
    model.eval()
    y_pred = []
    y_true = []
    for batch in tqdm(loader, total=len(loader)):
        batch = batch.to(device)
        with torch.no_grad():
            batch_preds = torch.sigmoid(model(batch))
        y_pred.extend(batch_preds)
        y_true.extend(batch.y)
    return torch.stack(y_pred).cpu(), torch.stack(y_true).cpu()

## run test (cpu)

In [12]:
num_epochs = 3
hidden_size = 300
depth = 3
out_dim = 1

In [13]:
head = nn.Sequential(
    nn.Linear(hidden_size, hidden_size, bias=True),
    nn.ReLU(),
    nn.Linear(hidden_size, out_dim, bias=True),
)
model = nn.Sequential(
    DMPNNEncoder(
        hidden_size,
        dataset.num_node_features,
        dataset.num_edge_features,
        depth,
    ),
    head,
)
initialize_weights(model)

In [14]:
dataset.num_node_features, dataset.num_edge_features

(9, 3)

In [15]:
model

Sequential(
  (0): DMPNNEncoder(
    (act_func): ReLU()
    (W1): Linear(in_features=12, out_features=300, bias=False)
    (W2): Linear(in_features=300, out_features=300, bias=False)
    (W3): Linear(in_features=309, out_features=300, bias=True)
  )
  (1): Sequential(
    (0): Linear(in_features=300, out_features=300, bias=True)
    (1): ReLU()
    (2): Linear(in_features=300, out_features=1, bias=True)
  )
)

In [16]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.BCEWithLogitsLoss()
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer, max_lr=1e-3, steps_per_epoch=len(train_loader), epochs=num_epochs
)

In [17]:
config = {
    "loss": criterion,
    "model": model,
    "optimizer": optimizer,
    "scheduler": scheduler,
}

In [18]:
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}", file=sys.stderr)
    train(config, train_loader)
    y_pred, y_true = make_prediction(config, valid_loader)
    auc = roc_auc_score(y_true, y_pred)
    acc = accuracy_score(y_true, (y_pred > 0.5).int())
    print(f"val auc={auc:.6} acc={acc:.6}", file=sys.stderr)

Epoch 1
100%|██████████| 659/659 [00:23<00:00, 27.75it/s]
100%|██████████| 83/83 [00:01<00:00, 46.22it/s]
val auc=0.735981 acc=0.980306
Epoch 2
100%|██████████| 659/659 [00:23<00:00, 27.64it/s]
100%|██████████| 83/83 [00:02<00:00, 40.58it/s]
val auc=0.724356 acc=0.981036
Epoch 3
100%|██████████| 659/659 [00:24<00:00, 27.11it/s]
100%|██████████| 83/83 [00:01<00:00, 46.28it/s]
val auc=0.730946 acc=0.980306


In [19]:
y_pred, y_true = make_prediction(config, test_loader)
auc = roc_auc_score(y_true, y_pred)
acc = accuracy_score(y_true, (y_pred > 0.5).int())
print(f"test auc={auc:.6} acc={acc:.6}", file=sys.stderr)

100%|██████████| 83/83 [00:01<00:00, 50.87it/s]
test auc=0.679287 acc=0.968393


## run test (gpu)

In [20]:
seed = 0
torch.manual_seed(seed);

In [21]:
torch.cuda.is_available()

True

In [22]:
cuda = torch.device("cuda")

In [23]:
dev_id = torch.cuda.current_device()
torch.cuda.get_device_name(dev_id)

'Tesla V100-PCIE-32GB'

In [24]:
num_epochs = 30
hidden_size = 300
depth = 3
out_dim = 1

In [25]:
rate = 0.0
head = nn.Sequential(
    nn.Dropout(p=rate, inplace=False),
    nn.Linear(hidden_size, hidden_size, bias=True),
    nn.ReLU(),
    nn.Dropout(p=rate, inplace=False),
    nn.Linear(hidden_size, out_dim, bias=True),
)
model = nn.Sequential(
    DMPNNEncoder(
        hidden_size,
        dataset.num_node_features,
        dataset.num_edge_features,
        depth,
    ),
    head,
)
initialize_weights(model)

In [26]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.BCEWithLogitsLoss()
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer, max_lr=1e-3, steps_per_epoch=len(train_loader), epochs=num_epochs
)

In [27]:
config = {
    "loss": criterion,
    "model": model,
    "optimizer": optimizer,
    "scheduler": scheduler,
}

In [28]:
best = {'epoch': -1, 'score': float("-inf")}
outfile = f"{data_dir}/best_snapshot.pth"

In [29]:
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}", file=sys.stderr)
    train(config, train_loader, device=cuda)
    y_pred, y_true = make_prediction(config, valid_loader, device=cuda)
    auc = roc_auc_score(y_true, y_pred)
    if auc > best['score']:
        print(f"* best-auc {best['score']:.6} ==> {auc:.6}", file=sys.stderr)
        best['score'] = auc
        best['epoch'] = epoch+1
        torch.save(model.state_dict(), outfile)
    acc = accuracy_score(y_true, (y_pred > 0.5).int())
    print(f"val auc={auc:.6} acc={acc:.6}", file=sys.stderr)

Epoch 1
100%|██████████| 659/659 [00:06<00:00, 105.34it/s]
100%|██████████| 83/83 [00:00<00:00, 181.13it/s]
* best-auc -inf ==> 0.647879
val auc=0.647879 acc=0.980306
Epoch 2
100%|██████████| 659/659 [00:05<00:00, 111.52it/s]
100%|██████████| 83/83 [00:00<00:00, 181.64it/s]
* best-auc 0.647879 ==> 0.710235
val auc=0.710235 acc=0.980306
Epoch 3
100%|██████████| 659/659 [00:05<00:00, 111.60it/s]
100%|██████████| 83/83 [00:00<00:00, 181.29it/s]
val auc=0.69596 acc=0.980306
Epoch 4
100%|██████████| 659/659 [00:05<00:00, 111.68it/s]
100%|██████████| 83/83 [00:00<00:00, 174.61it/s]
val auc=0.694016 acc=0.980306
Epoch 5
100%|██████████| 659/659 [00:05<00:00, 111.61it/s]
100%|██████████| 83/83 [00:00<00:00, 180.97it/s]
* best-auc 0.710235 ==> 0.766635
val auc=0.766635 acc=0.980306
Epoch 6
100%|██████████| 659/659 [00:06<00:00, 108.73it/s]
100%|██████████| 83/83 [00:00<00:00, 181.22it/s]
val auc=0.73989 acc=0.980306
Epoch 7
100%|██████████| 659/659 [00:06<00:00, 106.86it/s]
100%|██████████| 83/

In [30]:
print(f"best validation auc = {best['score']} on epoch {best['epoch']}")

best validation auc = 0.7718529541446207 on epoch 27


#### final 

In [31]:
y_pred, y_true = make_prediction(config, test_loader, device=cuda)
auc = roc_auc_score(y_true, y_pred)
acc = accuracy_score(y_true, (y_pred > 0.5).int())
print(f"test auc={auc:.6} acc={acc:.6}", file=sys.stderr)

100%|██████████| 83/83 [00:00<00:00, 181.16it/s]
test auc=0.73878 acc=0.967177


#### best snapshot

In [32]:
model = nn.Sequential(
    DMPNNEncoder(
        hidden_size,
        dataset.num_node_features,
        dataset.num_edge_features,
        depth,
    ),
    head,
)
model.load_state_dict(torch.load(outfile))

<All keys matched successfully>

In [33]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.BCEWithLogitsLoss()
scheduler = torch.optim.lr_scheduler.OneCycleLR(
    optimizer, max_lr=1e-3, steps_per_epoch=len(train_loader), epochs=num_epochs
)

In [34]:
config = {
    "loss": criterion,
    "model": model,
    "optimizer": optimizer,
    "scheduler": scheduler,
}

In [35]:
y_pred, y_true = make_prediction(config, train_loader, device=cuda)
auc = roc_auc_score(y_true, y_pred)
acc = accuracy_score(y_true, (y_pred > 0.5).int())
print(f"train auc={auc:.6} acc={acc:.6}", file=sys.stderr)

100%|██████████| 659/659 [00:03<00:00, 176.07it/s]
train auc=0.972638 acc=0.979636


In [36]:
y_pred, y_true = make_prediction(config, valid_loader, device=cuda)
auc = roc_auc_score(y_true, y_pred)
acc = accuracy_score(y_true, (y_pred > 0.5).int())
print(f"valid auc={auc:.6} acc={acc:.6}", file=sys.stderr)

100%|██████████| 83/83 [00:00<00:00, 179.44it/s]
valid auc=0.771853 acc=0.980793


In [37]:
y_pred, y_true = make_prediction(config, test_loader, device=cuda)
auc = roc_auc_score(y_true, y_pred)
acc = accuracy_score(y_true, (y_pred > 0.5).int())
print(f"test auc={auc:.6} acc={acc:.6}", file=sys.stderr)

100%|██████████| 83/83 [00:00<00:00, 181.12it/s]
test auc=0.741718 acc=0.966934


# Using ChemProp Atom- and Bond- Features

In [38]:
seed = 0
torch.manual_seed(seed);

In [39]:
def get_dataset(df):
    data_list = []
    print("Convert to PyG Objects...", file=sys.stderr)
    for _, row in tqdm(df.iterrows(), total=len(df)):
        smi = row["smiles"]
        data = smiles2data(smi, explicit_h=True)
        data.y = torch.tensor([[row["HIV_active"]]])
        data_list.append(data)
    print("Convert to RevIndexedDataset...", file=sys.stderr)
    return RevIndexedDataset(data_list)

In [40]:
data_url = "http://snap.stanford.edu/ogb/data/graphproppred/csv_mol_download/hiv.zip"

In [41]:
r = requests.get(data_url)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall(data_dir)

In [42]:
df = pd.read_csv(f"{data_dir}/hiv/mapping/mol.csv.gz")
dataset = get_dataset(df)

Convert to PyG Objects...
100%|██████████| 41127/41127 [01:28<00:00, 463.81it/s]
Convert to RevIndexedDataset...
100%|██████████| 41127/41127 [01:37<00:00, 423.82it/s]


In [43]:
split = {
    "train": pd.read_csv(f"{data_dir}/hiv/split/scaffold/train.csv.gz", header=None),
    "valid": pd.read_csv(f"{data_dir}/hiv/split/scaffold/valid.csv.gz", header=None),
    "test": pd.read_csv(f"{data_dir}/hiv/split/scaffold/test.csv.gz", header=None),
}
train_idx = split["train"].to_numpy().flatten()
valid_idx = split["valid"].to_numpy().flatten()
test_idx = split["test"].to_numpy().flatten()

In [44]:
batch_size = 50

train_raw = torch.utils.data.Subset(dataset, train_idx)
valid_raw = torch.utils.data.Subset(dataset, valid_idx)
test_raw = torch.utils.data.Subset(dataset, test_idx)

scaler = FeatureScaler(targets=["x", "edge_attr"])
train_data = scaler.fit_transform(train_raw)
valid_data = scaler.transform(valid_raw)
test_data = scaler.transform(test_raw)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

100%|██████████| 32901/32901 [00:01<00:00, 26711.67it/s]
100%|██████████| 4113/4113 [00:00<00:00, 24006.44it/s]
100%|██████████| 4113/4113 [00:00<00:00, 26878.06it/s]


In [45]:
cuda = torch.device("cuda")

In [46]:
dev_id = torch.cuda.current_device()
torch.cuda.get_device_name(dev_id)

'Tesla V100-PCIE-32GB'

In [47]:
num_epochs = 30
hidden_size = 300
depth = 3
out_dim = 1

In [48]:
rate = 0.0
head = nn.Sequential(
    nn.Dropout(p=rate, inplace=False),
    nn.Linear(hidden_size, hidden_size, bias=True),
    nn.ReLU(),
    nn.Dropout(p=rate, inplace=False),
    nn.Linear(hidden_size, out_dim, bias=True),
)
model = nn.Sequential(
    DMPNNEncoder(
        hidden_size,
        dataset.num_node_features,
        dataset.num_edge_features,
        depth,
    ),
    head,
)
initialize_weights(model)

In [49]:
model

Sequential(
  (0): DMPNNEncoder(
    (act_func): ReLU()
    (W1): Linear(in_features=147, out_features=300, bias=False)
    (W2): Linear(in_features=300, out_features=300, bias=False)
    (W3): Linear(in_features=433, out_features=300, bias=True)
  )
  (1): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=300, out_features=300, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=300, out_features=1, bias=True)
  )
)

In [50]:
sum(p.numel() for p in model.parameters())

354901

In [51]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
criterion = nn.BCEWithLogitsLoss()
scheduler = NoamLR(
    optimizer,
    warmup_epochs=[2],
    total_epochs=[num_epochs],
    steps_per_epoch=len(train_loader),
    init_lr=[1e-4],
    max_lr=[1e-3],
    final_lr=[1e-4],
)

In [52]:
dataset.num_node_features, dataset.num_edge_features

(133, 14)

In [53]:
config = {
    "loss": criterion,
    "model": model,
    "optimizer": optimizer,
    "scheduler": scheduler,
}

In [54]:
best = {'epoch': -1, 'score': float("-inf")}
outfile = f"{data_dir}/best_snapshot.pth"

In [55]:
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}", file=sys.stderr)
    train(config, train_loader, device=cuda)
    y_pred, y_true = make_prediction(config, valid_loader, device=cuda)
    auc = roc_auc_score(y_true, y_pred)
    if auc > best['score']:
        print(f"* best-auc {best['score']:.6} ==> {auc:.6}", file=sys.stderr)
        best['score'] = auc
        best['epoch'] = epoch+1
        torch.save(model.state_dict(), outfile)
    acc = accuracy_score(y_true, (y_pred > 0.5).int())
    print(f"val auc={auc:.6} acc={acc:.6}", file=sys.stderr)

Epoch 1
100%|██████████| 659/659 [00:06<00:00, 102.12it/s]
100%|██████████| 83/83 [00:00<00:00, 162.82it/s]
* best-auc -inf ==> 0.717424
val auc=0.717424 acc=0.980306
Epoch 2
100%|██████████| 659/659 [00:06<00:00, 102.22it/s]
100%|██████████| 83/83 [00:00<00:00, 162.86it/s]
* best-auc 0.717424 ==> 0.760888
val auc=0.760888 acc=0.979577
Epoch 3
100%|██████████| 659/659 [00:06<00:00, 102.92it/s]
100%|██████████| 83/83 [00:00<00:00, 163.20it/s]
val auc=0.719274 acc=0.981522
Epoch 4
100%|██████████| 659/659 [00:06<00:00, 102.91it/s]
100%|██████████| 83/83 [00:00<00:00, 162.74it/s]
val auc=0.743169 acc=0.981279
Epoch 5
100%|██████████| 659/659 [00:06<00:00, 102.94it/s]
100%|██████████| 83/83 [00:00<00:00, 162.82it/s]
* best-auc 0.760888 ==> 0.779459
val auc=0.779459 acc=0.980306
Epoch 6
100%|██████████| 659/659 [00:06<00:00, 100.03it/s]
100%|██████████| 83/83 [00:00<00:00, 149.93it/s]
val auc=0.762064 acc=0.982008
Epoch 7
100%|██████████| 659/659 [00:06<00:00, 99.31it/s] 
100%|██████████| 8

In [56]:
print(f"best validation auc = {best['score']} on epoch {best['epoch']}")

best validation auc = 0.7948939349402312 on epoch 20


#### final 

In [57]:
y_pred, y_true = make_prediction(config, test_loader, device=cuda)
auc = roc_auc_score(y_true, y_pred)
acc = accuracy_score(y_true, (y_pred > 0.5).int())
print(f"test auc={auc:.6} acc={acc:.6}", file=sys.stderr)

100%|██████████| 83/83 [00:00<00:00, 157.68it/s]
test auc=0.767595 acc=0.962071


#### best snapshot

In [58]:
model = nn.Sequential(
    DMPNNEncoder(
        hidden_size,
        dataset.num_node_features,
        dataset.num_edge_features,
        depth,
    ),
    head,
)
model.load_state_dict(torch.load(outfile))

<All keys matched successfully>

In [59]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
criterion = nn.BCEWithLogitsLoss()
scheduler = NoamLR(
    optimizer,
    warmup_epochs=[2],
    total_epochs=[num_epochs],
    steps_per_epoch=len(train_loader),
    init_lr=[1e-4],
    max_lr=[1e-3],
    final_lr=[1e-4],
)

In [60]:
config = {
    "loss": criterion,
    "model": model,
    "optimizer": optimizer,
    "scheduler": scheduler,
}

In [61]:
y_pred, y_true = make_prediction(config, train_loader, device=cuda)
auc = roc_auc_score(y_true, y_pred)
acc = accuracy_score(y_true, (y_pred > 0.5).int())
print(f"train auc={auc:.6} acc={acc:.6}", file=sys.stderr)

100%|██████████| 659/659 [00:04<00:00, 155.09it/s]
train auc=0.975327 acc=0.978906


In [62]:
y_pred, y_true = make_prediction(config, valid_loader, device=cuda)
auc = roc_auc_score(y_true, y_pred)
acc = accuracy_score(y_true, (y_pred > 0.5).int())
print(f"valid auc={auc:.6} acc={acc:.6}", file=sys.stderr)

100%|██████████| 83/83 [00:00<00:00, 154.71it/s]
valid auc=0.794894 acc=0.979334


In [63]:
y_pred, y_true = make_prediction(config, test_loader, device=cuda)
auc = roc_auc_score(y_true, y_pred)
acc = accuracy_score(y_true, (y_pred > 0.5).int())
print(f"test auc={auc:.6} acc={acc:.6}", file=sys.stderr)

100%|██████████| 83/83 [00:00<00:00, 157.72it/s]
test auc=0.787761 acc=0.964746


#### performances of other models

https://ogb.stanford.edu/docs/leader_graphprop/#ogbg-molhiv

#### original chemprop

```bash
$ python train.py --data_path ../train_hiv.csv --separate_val_path ../valid_hiv.csv --separate_test_path ../test_hiv.csv --target_columns HIV_active --smiles_columns smiles --dataset_type classification --save_dir tmp --explicit_h --seed 0 --quiet
```

```bash
Seed 0 ==> test auc = 0.794824
Seed 1 ==> test auc = 0.788354
Seed 2 ==> test auc = 0.772676
```

```python
MoleculeModel(
  (sigmoid): Sigmoid()
  (encoder): MPN(
    (encoder): ModuleList(
      (0): MPNEncoder(
        (dropout_layer): Dropout(p=0.0, inplace=False)
        (act_func): ReLU()
        (W_i): Linear(in_features=147, out_features=300, bias=False)
        (W_h): Linear(in_features=300, out_features=300, bias=False)
        (W_o): Linear(in_features=433, out_features=300, bias=True)
      )
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=300, out_features=300, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=300, out_features=1, bias=True)
  )
)
Number of parameters = 355,201
```

**Note: this # of parameters includes "cached_zero_vector" (300)** and 354901 = 355201-300

In [64]:
layers = [
    nn.Dropout(p=0.0, inplace=False),
    nn.ReLU(),
    nn.Linear(in_features=147, out_features=300, bias=False),
    nn.Linear(in_features=300, out_features=300, bias=False),
    nn.Linear(in_features=433, out_features=300, bias=True),
    nn.Dropout(p=0.0, inplace=False),
    nn.Linear(in_features=300, out_features=300, bias=True),
    nn.ReLU(),
    nn.Dropout(p=0.0, inplace=False),
    nn.Linear(in_features=300, out_features=1, bias=True)
]
num_params = 0
for l in layers:
    num_params += sum(p.numel() for p in l.parameters())
num_params

354901

```bash
Model 0 best validation auc = 0.819074 on epoch 4
Loading pretrained parameter "encoder.encoder.0.cached_zero_vector".
Loading pretrained parameter "encoder.encoder.0.W_i.weight".
Loading pretrained parameter "encoder.encoder.0.W_h.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.weight".
Loading pretrained parameter "encoder.encoder.0.W_o.bias".
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Moving model to cuda
Model 0 test auc = 0.794824                                                                                                 
Ensemble test auc = 0.794824
1-fold cross validation
        Seed 0 ==> test auc = 0.794824
Overall test auc = 0.794824 +/- 0.000000
Elapsed time = 0:06:50
```