<a href="https://colab.research.google.com/github/PabloExperimental/OGB/blob/main/ogbg_molhiv.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Original: [Github](https://github.com/danielegrattarola/spektral/blob/master/examples/graph_prediction/ogbg-mol-hiv_ecc.py)

[OGB HIV Overview on Hugging Face](https://huggingface.co/datasets/OGB/ogbg-molhiv)

In [13]:
!pip install tensorflow==2.15.0 spektral ogb



In [25]:
import numpy as np
import tensorflow as tf
from ogb.graphproppred import Evaluator, GraphPropPredDataset
from tensorflow.keras.layers import Dense, Input, Dropout, BatchNormalization
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

from spektral.data import DisjointLoader
from spektral.datasets import OGB
from spektral.layers import ECCConv, GlobalAvgPool

################################################################################
# Config
################################################################################
learning_rate = 1e-3  # Learning rate
epochs = 15  # Number of training epochs
batch_size = 16  # Batch size

################################################################################
# Load data
################################################################################
dataset_name = "ogbg-molhiv"
ogb_dataset = GraphPropPredDataset(name=dataset_name)
dataset = OGB(ogb_dataset)

# Parameters
F = dataset.n_node_features  # Dimension of node features
S = dataset.n_edge_features  # Dimension of edge features
n_out = dataset.n_labels  # Dimension of the target

# Train/test split
idx = ogb_dataset.get_idx_split()
idx_tr, idx_va, idx_te = idx["train"], idx["valid"], idx["test"]
dataset_tr = dataset[idx_tr]
dataset_va = dataset[idx_va]
dataset_te = dataset[idx_te]

loader_tr = DisjointLoader(dataset_tr, batch_size=batch_size, epochs=epochs)
loader_te = DisjointLoader(dataset_te, batch_size=batch_size, epochs=1)

################################################################################
# Build model
################################################################################
X_in = Input(shape=(F,))
A_in = Input(shape=(None,), sparse=True)
E_in = Input(shape=(S,))
I_in = Input(shape=(), dtype=tf.int64)

x = ECCConv(64, activation="relu")([X_in, A_in, E_in])

x = Dropout(0.25)(x)

x = ECCConv(64, activation="relu")([x, A_in, E_in])

x = Dropout(0.25)(x)

x = ECCConv(64, activation="relu")([x, A_in, E_in])

x = Dropout(0.25)(x)

x = GlobalAvgPool()([x, I_in])

output = Dense(n_out, activation="sigmoid")(x)

model = Model(inputs=[X_in, A_in, E_in, I_in], outputs=output)
optimizer = Adam(learning_rate)
loss_fn = BinaryCrossentropy()

model.summary()

################################################################################
# Fit model
################################################################################
@tf.function(input_signature=loader_tr.tf_signature(), experimental_relax_shapes=True)
def train_step(inputs, target):
    with tf.GradientTape() as tape:
        predictions = model(inputs, training=True)
        loss = loss_fn(target, predictions) + sum(model.losses)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss


step = loss = 0
for batch in loader_tr:
    step += 1
    loss += train_step(*batch)
    if step == loader_tr.steps_per_epoch:
        step = 0
        print("Loss: {:.4f}".format(loss / loader_tr.steps_per_epoch))
        loss = 0

################################################################################
# Evaluate model
################################################################################
print("Testing model")
evaluator = Evaluator(name=dataset_name)
y_true = []
y_pred = []
for batch in loader_te:
    inputs, target = batch
    p = model(inputs, training=False)
    y_true.append(target)
    y_pred.append(p.numpy())

y_true = np.vstack(y_true)
y_pred = np.vstack(y_pred)
model_loss = loss_fn(y_true, y_pred)
ogb_score = evaluator.eval({"y_true": y_true, "y_pred": y_pred})

print(
    "Done. Test loss: {:.4f}. ROC-AUC: {:.2f}".format(model_loss, ogb_score["rocauc"])
)

  loaded_dict = torch.load(pre_processed_file_path, 'rb')


Model: "model_18"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_77 (InputLayer)       [(None, 9)]                  0         []                            
                                                                                                  
 input_78 (InputLayer)       [(None, None)]               0         []                            
                                                                                                  
 input_79 (InputLayer)       [(None, 3)]                  0         []                            
                                                                                                  
 ecc_conv_53 (ECCConv)       (None, 64)                   2944      ['input_77[0][0]',            
                                                                     'input_78[0][0]',     

  np.random.shuffle(a)


Loss: 0.1674
Loss: 0.1609
Loss: 0.1561
Loss: 0.1551
Loss: 0.1517
Loss: 0.1493
Loss: 0.1488
Loss: 0.1464
Loss: 0.1475
Loss: 0.1451
Loss: 0.1442
Loss: 0.1424
Loss: 0.1417
Loss: 0.1412
Loss: 0.1403
Testing model


  np.random.shuffle(a)


Done. Test loss: 0.1332. ROC-AUC: 0.74
