# GML Final Project Classification on bbbp Dataset

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install dgl -f https://data.dgl.ai/wheels/repo.html

!pip install dglgo -f https://data.dgl.ai/wheels-test/repo.html

Looking in links: https://data.dgl.ai/wheels/repo.html
Collecting dgl
  Downloading dgl-1.1.1-cp310-cp310-manylinux1_x86_64.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m46.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: dgl
Successfully installed dgl-1.1.1
Looking in links: https://data.dgl.ai/wheels-test/repo.html
Collecting dglgo
  Downloading dglgo-0.0.2-py3-none-any.whl (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.5/63.5 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
Collecting isort>=5.10.1 (from dglgo)
  Downloading isort-5.12.0-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting autopep8>=1.6.0 (from dglgo)
  Downloading autopep8-2.0.2-py2.py3-none-any.whl (45 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.2/45.2 kB[0m [31m4.5 MB/s[0m eta [36m0:00:0

In [3]:
%matplotlib inline
import os

os.environ["DGLBACKEND"] = "pytorch"
import dgl
import numpy as np
import networkx as nx
import torch
import torch.nn as nn
import dgl.function as fn
import torch.nn.functional as F
import shutil
from torch.utils.data import DataLoader
import cloudpickle
from dgl.nn import GraphConv

#### Set Path

In [4]:
current_dir = "/content/drive/MyDrive/GML Final Project/"
checkpoint_path = current_dir + "save_models/model_checkpoints/" + "checkpoint"
os.makedirs(checkpoint_path, exist_ok=True)

best_model_path = current_dir + "save_models/best_model/"

folder_data_temp = current_dir +"data_temp/"
shutil.rmtree(folder_data_temp, ignore_errors=True)

path_save = current_dir + "Hierarchical_Quotient_type_False_Both_False_Uni_Vert_False_#quotient_2_#layers_1_127_one_hot.zip"
shutil.unpack_archive(path_save, folder_data_temp)

#### Custom PyTorch Datasets

In [5]:
""" Classification Dataset """
class DGLDatasetClass(torch.utils.data.Dataset):
    def __init__(self, address):
            self.address=address+".bin"
            self.list_graphs, train_labels_globals = dgl.load_graphs(self.address)
            num_graphs =len(self.list_graphs)
            self.labels = train_labels_globals["labels"]
            self.globals = train_labels_globals["globals"]

    def __len__(self):
        return len(self.list_graphs)

    def __getitem__(self, idx):
        return  self.list_graphs[idx], self.labels[idx], self.globals[idx]

#### Defining Train, Validation, and Test Set

In [6]:
path_data_temp = folder_data_temp + "scaffold"+"_"+str(2)
train_set = DGLDatasetClass(address=path_data_temp+"_train")
val_set = DGLDatasetClass(address=path_data_temp+"_val")
test_set = DGLDatasetClass(address=path_data_temp+"_test")

print(len(train_set), len(val_set), len(test_set))


1631 203 205


#### Data Loader

In [7]:
def collate(batch):
    # batch is a list of tuples (graphs, labels, globals)
    # Concatenate a sequence of graphs
    graphs = [e[0] for e in batch]
    g = dgl.batch(graphs)

    # Concatenate a sequence of tensors (labels) along a new dimension
    labels = [e[1] for e in batch]
    labels = torch.stack(labels, 0)

    # Concatenate a sequence of tensors (globals) along a new dimension
    globals = [e[2] for e in batch]
    globals = torch.stack(globals, 0)

    return g, labels, globals


def loader(batch_size=64):
    train_dataloader = DataLoader(train_set,
                              batch_size=batch_size,
                              collate_fn=collate,
                              drop_last=True,
                              shuffle=True,
                              num_workers=1)

    val_dataloader =  DataLoader(val_set,
                             batch_size=batch_size,
                             collate_fn=collate,
                             drop_last=True,
                             shuffle=False,
                             num_workers=1)

    test_dataloader = DataLoader(test_set,
                             batch_size=batch_size,
                             collate_fn=collate,
                             drop_last=True,
                             shuffle=False,
                             num_workers=1)
    return train_dataloader, val_dataloader, test_dataloader

In [8]:
train_dataloader, val_dataloader, test_dataloader = loader(batch_size=64)

#### Defining A GNN

##### Some Variables

In [9]:
#Bace dataset has 1 task. Some other datasets may have some more number of tasks, e.g., tox21 has 12 tasks.
num_tasks = 1

# Size of global feature of each graph
global_size = 200

# Number of epochs to train the model
num_epochs = 100

# Number of steps to wait if the model performance on the validation set does not improve
patience = 10

#Configurations to instantiate the model
config = {"node_feature_size":127, "edge_feature_size":12, "hidden_size":100}


# innovated module 1

In [10]:
dgl.use_libxsmm(False)

class CustomModule(nn.Module):
    def __init__(self, in_feat, out_feat):
        super(CustomModule, self).__init__()

        self.linear1 = nn.Linear(in_feat * 2, 256)
        self.linear2 = nn.Linear(256, out_feat)
        self.relu = nn.ReLU()

    def forward(self, g, h):
        with g.local_scope():
            g.ndata["h"] = h
            g.update_all(
                message_func=fn.copy_u('h', 'm'),
                reduce_func=fn.max("m", "A"),
            )
            A = g.ndata["A"]
            concat = torch.cat([h, A], dim=1)
            output = self.linear1(concat)
            output = self.relu(output)
            output = self.linear2(output)
            output = self.relu(output)
            return output


In [11]:
class GNN(nn.Module):
    def __init__(self, config, global_size=200):
        super().__init__()
        self.config = config


        # Node feature size
        self.node_feature_size = self.config.get('node_feature_size', 127)

        # Edge feature size
        self.edge_feature_size = self.config.get('edge_feature_size', 12)

        # Hidden size
        self.hidden_size = self.config.get('hidden_size', 100)

        self.Custom1 = CustomModule(self.node_feature_size, self.hidden_size)
        self.Custom2 = CustomModule(self.hidden_size, self.hidden_size)
        self.Custom3 = CustomModule(self.hidden_size, self.hidden_size)
        self.Custom4 = CustomModule(self.hidden_size, 1)

    def forward(self, mol_dgl_graph, globals):
        mol_dgl_graph.ndata["v"] = mol_dgl_graph.ndata["v"][:, :self.node_feature_size]
        mol_dgl_graph.edata["e"] = mol_dgl_graph.edata["e"][:, :self.edge_feature_size]

        h = mol_dgl_graph.ndata["v"]
        h = self.Custom1(mol_dgl_graph, h)
        h = F.relu(h)
        h = self.Custom2(mol_dgl_graph, h)
        h = F.relu(h)
        h = self.Custom3(mol_dgl_graph, h)
        h = F.relu(h)
        h = self.Custom4(mol_dgl_graph, h)
        mol_dgl_graph.ndata["h"] = h
        return dgl.mean_nodes(mol_dgl_graph, "h")


In [12]:
from sklearn.metrics import roc_auc_score

def compute_score(model, data_loader, val_size):
    model.eval()
    metric = roc_auc_score
    with torch.no_grad():
        prediction_all= torch.empty(0)
        labels_all= torch.empty(0)
        for i, (mol_dgl_graph, labels, globals) in enumerate(data_loader):
            prediction = model(mol_dgl_graph, globals)
            prediction = torch.sigmoid(prediction)
            prediction_all = torch.cat((prediction_all, prediction), 0)
            labels_all = torch.cat((labels_all, labels), 0)
        average = torch.tensor([0.])
        for i in range(num_tasks):
            a1 = prediction_all[:, i]
            a2 = labels_all[:, i]
            try:
                t = metric(a2.int().cpu(), a1.cpu()).item()
            except ValueError:
                t = 0
            average += t
    return average.item()

In [13]:
def loss_func(output, label):
    criterion = torch.nn.BCEWithLogitsLoss(reduction='mean')
    loss = criterion(output, label)
    return loss

In [14]:
def train_epoch(train_dataloader, model, optimizer):
    epoch_train_loss = 0
    iterations = 0
    model.train()
    for i, (mol_dgl_graph, labels, globals) in enumerate(train_dataloader):
        prediction = model(mol_dgl_graph, globals)
        loss_train = loss_func(prediction, labels)
        optimizer.zero_grad(set_to_none=True)
        loss_train.backward()
        optimizer.step()
        epoch_train_loss += loss_train.detach().item()
        iterations += 1
    epoch_train_loss /= iterations
    return epoch_train_loss

In [15]:
def train_evaluate():

    model = GNN(config, global_size)
    optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)

    best_val = 0
    patience_count = 1
    epoch = 1

    while epoch <= num_epochs:
        if patience_count <= patience:
            model.train()
            loss_train = train_epoch(train_dataloader, model, optimizer)
            model.eval()
            score_val = compute_score(model, val_dataloader, len(val_set))
            if score_val > best_val:
                best_val = score_val
                print("Save checkpoint")
                path = os.path.join(checkpoint_path, 'checkpoint.pth')
                dict_checkpoint = {"score_val": score_val}
                dict_checkpoint.update({"model_state_dict": model.state_dict(), "optimizer_state": optimizer.state_dict()})
                with open(path, "wb") as outputfile:
                    cloudpickle.dump(dict_checkpoint, outputfile)
                patience_count = 1
            else:
                print("Patience", patience_count)
                patience_count += 1

            print("Epoch: {}/{} | Training Loss: {:.3f} | Valid Score: {:.3f}".format(
            epoch, num_epochs, loss_train, score_val))

            print(" ")
            print("Epoch: {}/{} | Best Valid Score Until Now: {:.3f}".format(epoch, num_epochs, best_val), "\n")
        epoch += 1

    # best model save
    shutil.rmtree(best_model_path, ignore_errors=True)
    shutil.copytree(checkpoint_path, best_model_path)

    print("Final results:")
    print("Average Valid Score: {:.3f}".format(np.mean(best_val)), "\n")


In [16]:
def test_evaluate():
    final_model = GNN(config, global_size)
    path = os.path.join(best_model_path, 'checkpoint.pth')
    with open(path, 'rb') as f:
        checkpoint = cloudpickle.load(f)
    final_model.load_state_dict(checkpoint["model_state_dict"])
    final_model.eval()
    test_score = compute_score(final_model, test_dataloader, len(test_set))

    print("Test Score: {:.3f}".format(test_score), "\n")
    print("Execution time: {:.3f} seconds".format(time.time() - start_time))


In [17]:
import time
start_time = time.time()

train_evaluate()
test_evaluate()

Save checkpoint
Epoch: 1/100 | Training Loss: 0.681 | Valid Score: 0.465
 
Epoch: 1/100 | Best Valid Score Until Now: 0.465 

Patience 1
Epoch: 2/100 | Training Loss: 0.641 | Valid Score: 0.446
 
Epoch: 2/100 | Best Valid Score Until Now: 0.465 

Patience 2
Epoch: 3/100 | Training Loss: 0.590 | Valid Score: 0.452
 
Epoch: 3/100 | Best Valid Score Until Now: 0.465 

Save checkpoint
Epoch: 4/100 | Training Loss: 0.575 | Valid Score: 0.492
 
Epoch: 4/100 | Best Valid Score Until Now: 0.492 

Save checkpoint
Epoch: 5/100 | Training Loss: 0.556 | Valid Score: 0.650
 
Epoch: 5/100 | Best Valid Score Until Now: 0.650 

Patience 1
Epoch: 6/100 | Training Loss: 0.532 | Valid Score: 0.622
 
Epoch: 6/100 | Best Valid Score Until Now: 0.650 

Save checkpoint
Epoch: 7/100 | Training Loss: 0.501 | Valid Score: 0.656
 
Epoch: 7/100 | Best Valid Score Until Now: 0.656 

Save checkpoint
Epoch: 8/100 | Training Loss: 0.476 | Valid Score: 0.687
 
Epoch: 8/100 | Best Valid Score Until Now: 0.687 

Save ch

# innovated module 2

In [21]:
class CustomModule(nn.Module):
    def __init__(self, in_feat, out_feat):
        super(CustomModule, self).__init__()

        self.linear1 = nn.Linear(in_feat * 2, 256)
        self.linear2 = nn.Linear(256, out_feat)
        self.relu = nn.ReLU()

    def forward(self, g, h):
        with g.local_scope():
            g.ndata["h"] = h
            g.update_all(
                message_func=fn.v_mul_u('h', 'h', 'm'),
                reduce_func=fn.max("m", "A"),
            )
            A = g.ndata["A"]
            concat = torch.cat([h, A], dim=1)
            output = self.linear1(concat)
            output = self.relu(output)
            output = self.linear2(output)
            output = self.relu(output)
            return output


In [22]:
class GNN(nn.Module):
    def __init__(self, config, global_size=200):
        super().__init__()
        self.config = config


        # Node feature size
        self.node_feature_size = self.config.get('node_feature_size', 127)

        # Edge feature size
        self.edge_feature_size = self.config.get('edge_feature_size', 12)

        # Hidden size
        self.hidden_size = self.config.get('hidden_size', 100)

        self.Custom1 = CustomModule(self.node_feature_size, self.hidden_size)
        self.Custom2 = CustomModule(self.hidden_size, self.hidden_size)
        self.Custom3 = CustomModule(self.hidden_size, self.hidden_size)
        self.Custom4 = CustomModule(self.hidden_size, 1)

    def forward(self, mol_dgl_graph, globals):
        mol_dgl_graph.ndata["v"] = mol_dgl_graph.ndata["v"][:, :self.node_feature_size]
        mol_dgl_graph.edata["e"] = mol_dgl_graph.edata["e"][:, :self.edge_feature_size]

        h = mol_dgl_graph.ndata["v"]
        h = self.Custom1(mol_dgl_graph, h)
        h = F.relu(h)
        h = self.Custom2(mol_dgl_graph, h)
        h = F.relu(h)
        h = self.Custom3(mol_dgl_graph, h)
        h = F.relu(h)
        h = self.Custom4(mol_dgl_graph, h)
        mol_dgl_graph.ndata["h"] = h
        return dgl.mean_nodes(mol_dgl_graph, "h")


In [23]:
import time
start_time = time.time()

train_evaluate()
test_evaluate()

Save checkpoint
Epoch: 1/100 | Training Loss: 0.678 | Valid Score: 0.438
 
Epoch: 1/100 | Best Valid Score Until Now: 0.438 

Patience 1
Epoch: 2/100 | Training Loss: 0.664 | Valid Score: 0.425
 
Epoch: 2/100 | Best Valid Score Until Now: 0.438 

Patience 2
Epoch: 3/100 | Training Loss: 0.634 | Valid Score: 0.431
 
Epoch: 3/100 | Best Valid Score Until Now: 0.438 

Save checkpoint
Epoch: 4/100 | Training Loss: 0.552 | Valid Score: 0.594
 
Epoch: 4/100 | Best Valid Score Until Now: 0.594 

Save checkpoint
Epoch: 5/100 | Training Loss: 0.537 | Valid Score: 0.627
 
Epoch: 5/100 | Best Valid Score Until Now: 0.627 

Save checkpoint
Epoch: 6/100 | Training Loss: 0.525 | Valid Score: 0.641
 
Epoch: 6/100 | Best Valid Score Until Now: 0.641 

Patience 1
Epoch: 7/100 | Training Loss: 0.512 | Valid Score: 0.635
 
Epoch: 7/100 | Best Valid Score Until Now: 0.641 

Save checkpoint
Epoch: 8/100 | Training Loss: 0.500 | Valid Score: 0.666
 
Epoch: 8/100 | Best Valid Score Until Now: 0.666 

Save ch

# innovated module 3

In [24]:
class CustomModule(nn.Module):
    def __init__(self, in_feat, out_feat):
        super(CustomModule, self).__init__()

        self.linear1 = nn.Linear(in_feat * 2, 256)
        self.linear2 = nn.Linear(256, out_feat)
        self.relu = nn.ReLU()

    def forward(self, g, h):
        with g.local_scope():
            g.ndata["h"] = h
            g.update_all(
                message_func=fn.v_add_u('h', 'h', 'm'),
                reduce_func=fn.max("m", "A"),
            )
            A = g.ndata["A"]
            concat = torch.cat([h, A], dim=1)
            output = self.linear1(concat)
            output = self.relu(output)
            output = self.linear2(output)
            output = self.relu(output)
            return output


In [25]:
class GNN(nn.Module):
    def __init__(self, config, global_size=200):
        super().__init__()
        self.config = config


        # Node feature size
        self.node_feature_size = self.config.get('node_feature_size', 127)

        # Edge feature size
        self.edge_feature_size = self.config.get('edge_feature_size', 12)

        # Hidden size
        self.hidden_size = self.config.get('hidden_size', 100)

        self.Custom1 = CustomModule(self.node_feature_size, self.hidden_size)
        self.Custom2 = CustomModule(self.hidden_size, self.hidden_size)
        self.Custom3 = CustomModule(self.hidden_size, self.hidden_size)
        self.Custom4 = CustomModule(self.hidden_size, 1)

    def forward(self, mol_dgl_graph, globals):
        mol_dgl_graph.ndata["v"] = mol_dgl_graph.ndata["v"][:, :self.node_feature_size]
        mol_dgl_graph.edata["e"] = mol_dgl_graph.edata["e"][:, :self.edge_feature_size]

        h = mol_dgl_graph.ndata["v"]
        h = self.Custom1(mol_dgl_graph, h)
        h = F.relu(h)
        h = self.Custom2(mol_dgl_graph, h)
        h = F.relu(h)
        h = self.Custom3(mol_dgl_graph, h)
        h = F.relu(h)
        h = self.Custom4(mol_dgl_graph, h)
        mol_dgl_graph.ndata["h"] = h
        return dgl.mean_nodes(mol_dgl_graph, "h")


In [26]:
import time
start_time = time.time()

train_evaluate()
test_evaluate()

Save checkpoint
Epoch: 1/100 | Training Loss: 0.658 | Valid Score: 0.451
 
Epoch: 1/100 | Best Valid Score Until Now: 0.451 

Save checkpoint
Epoch: 2/100 | Training Loss: 0.601 | Valid Score: 0.452
 
Epoch: 2/100 | Best Valid Score Until Now: 0.452 

Save checkpoint
Epoch: 3/100 | Training Loss: 0.570 | Valid Score: 0.567
 
Epoch: 3/100 | Best Valid Score Until Now: 0.567 

Save checkpoint
Epoch: 4/100 | Training Loss: 0.538 | Valid Score: 0.641
 
Epoch: 4/100 | Best Valid Score Until Now: 0.641 

Patience 1
Epoch: 5/100 | Training Loss: 0.498 | Valid Score: 0.631
 
Epoch: 5/100 | Best Valid Score Until Now: 0.641 

Save checkpoint
Epoch: 6/100 | Training Loss: 0.475 | Valid Score: 0.697
 
Epoch: 6/100 | Best Valid Score Until Now: 0.697 

Save checkpoint
Epoch: 7/100 | Training Loss: 0.458 | Valid Score: 0.717
 
Epoch: 7/100 | Best Valid Score Until Now: 0.717 

Save checkpoint
Epoch: 8/100 | Training Loss: 0.448 | Valid Score: 0.733
 
Epoch: 8/100 | Best Valid Score Until Now: 0.733

# GCN 4 Layer

In [27]:
class GNN(nn.Module):
    def __init__(self, config, global_size=200, num_tasks=1):
        super().__init__()
        self.config = config
        self.num_tasks = num_tasks

        # Node feature size
        self.node_feature_size = self.config.get('node_feature_size', 127)

        # Edge feature size
        self.edge_feature_size = self.config.get('edge_feature_size', 12)

        # Hidden size
        self.hidden_size = self.config.get('hidden_size', 100)

        self.conv1 = GraphConv(self.node_feature_size, self.hidden_size, allow_zero_in_degree=True)
        self.conv2 = GraphConv(self.hidden_size, self.hidden_size, allow_zero_in_degree=True)
        self.conv3 = GraphConv(self.hidden_size, self.hidden_size, allow_zero_in_degree=True)
        self.conv4 = GraphConv(self.hidden_size, self.num_tasks, allow_zero_in_degree=True)

    def forward(self, mol_dgl_graph, globals):
        mol_dgl_graph.ndata["v"] = mol_dgl_graph.ndata["v"][:, :self.node_feature_size]
        mol_dgl_graph.edata["e"] = mol_dgl_graph.edata["e"][:, :self.edge_feature_size]

        h = mol_dgl_graph.ndata["v"]
        h = self.conv1(mol_dgl_graph, h)
        h = F.relu(h)
        h = self.conv2(mol_dgl_graph, h)
        h = F.relu(h)
        h = self.conv3(mol_dgl_graph, h)
        h = F.relu(h)
        h = self.conv4(mol_dgl_graph, h)
        mol_dgl_graph.ndata["h"] = h
        return dgl.mean_nodes(mol_dgl_graph, "h")


In [28]:
import time
start_time = time.time()

train_evaluate()
test_evaluate()

Save checkpoint
Epoch: 1/100 | Training Loss: 0.632 | Valid Score: 0.439
 
Epoch: 1/100 | Best Valid Score Until Now: 0.439 

Save checkpoint
Epoch: 2/100 | Training Loss: 0.597 | Valid Score: 0.445
 
Epoch: 2/100 | Best Valid Score Until Now: 0.445 

Save checkpoint
Epoch: 3/100 | Training Loss: 0.586 | Valid Score: 0.459
 
Epoch: 3/100 | Best Valid Score Until Now: 0.459 

Save checkpoint
Epoch: 4/100 | Training Loss: 0.578 | Valid Score: 0.492
 
Epoch: 4/100 | Best Valid Score Until Now: 0.492 

Save checkpoint
Epoch: 5/100 | Training Loss: 0.568 | Valid Score: 0.561
 
Epoch: 5/100 | Best Valid Score Until Now: 0.561 

Save checkpoint
Epoch: 6/100 | Training Loss: 0.560 | Valid Score: 0.592
 
Epoch: 6/100 | Best Valid Score Until Now: 0.592 

Save checkpoint
Epoch: 7/100 | Training Loss: 0.547 | Valid Score: 0.604
 
Epoch: 7/100 | Best Valid Score Until Now: 0.604 

Save checkpoint
Epoch: 8/100 | Training Loss: 0.537 | Valid Score: 0.622
 
Epoch: 8/100 | Best Valid Score Until Now: 

# GCN 2Layer

In [29]:
class GNN(nn.Module):
    def __init__(self, config, global_size = 200, num_tasks = 1):
        super().__init__()
        self.config = config
        self.num_tasks = num_tasks

        # Node feature size
        self.node_feature_size = self.config.get('node_feature_size', 127)

        # Edge feature size
        self.edge_feature_size = self.config.get('edge_feature_size', 12)

        # Hidden size
        self.hidden_size = self.config.get('hidden_size', 100)

        self.conv1 = GraphConv(self.node_feature_size, self.hidden_size, allow_zero_in_degree=True)
        self.conv2 = GraphConv(self.hidden_size, self.num_tasks, allow_zero_in_degree=True)

    # def forward(self, g, in_feat):
    def forward(self, mol_dgl_graph, globals):
        mol_dgl_graph.ndata["v"]= mol_dgl_graph.ndata["v"][:,:self.node_feature_size]
        mol_dgl_graph.edata["e"] = mol_dgl_graph.edata["e"][:,:self.edge_feature_size]
        h = self.conv1(mol_dgl_graph, mol_dgl_graph.ndata["v"])
        h = F.relu(h)
        h = self.conv2(mol_dgl_graph, h)
        mol_dgl_graph.ndata["h"] = h
        return dgl.mean_nodes(mol_dgl_graph, "h")

In [30]:
import time
start_time = time.time()

train_evaluate()
test_evaluate()

Save checkpoint
Epoch: 1/100 | Training Loss: 0.606 | Valid Score: 0.438
 
Epoch: 1/100 | Best Valid Score Until Now: 0.438 

Patience 1
Epoch: 2/100 | Training Loss: 0.597 | Valid Score: 0.433
 
Epoch: 2/100 | Best Valid Score Until Now: 0.438 

Patience 2
Epoch: 3/100 | Training Loss: 0.591 | Valid Score: 0.436
 
Epoch: 3/100 | Best Valid Score Until Now: 0.438 

Save checkpoint
Epoch: 4/100 | Training Loss: 0.587 | Valid Score: 0.444
 
Epoch: 4/100 | Best Valid Score Until Now: 0.444 

Save checkpoint
Epoch: 5/100 | Training Loss: 0.583 | Valid Score: 0.451
 
Epoch: 5/100 | Best Valid Score Until Now: 0.451 

Save checkpoint
Epoch: 6/100 | Training Loss: 0.580 | Valid Score: 0.460
 
Epoch: 6/100 | Best Valid Score Until Now: 0.460 

Save checkpoint
Epoch: 7/100 | Training Loss: 0.579 | Valid Score: 0.474
 
Epoch: 7/100 | Best Valid Score Until Now: 0.474 

Save checkpoint
Epoch: 8/100 | Training Loss: 0.573 | Valid Score: 0.485
 
Epoch: 8/100 | Best Valid Score Until Now: 0.485 

Sa

# GraphSage 3Layer

In [31]:
from dgl.nn import SAGEConv

In [32]:
class GNN(nn.Module):
    def __init__(self, config, global_size=200, num_tasks=1):
        super().__init__()
        self.config = config
        self.num_tasks = num_tasks

        # Node feature size
        self.node_feature_size = self.config.get('node_feature_size', 127)

        # Edge feature size
        self.edge_feature_size = self.config.get('edge_feature_size', 12)

        # Hidden size
        self.hidden_size = self.config.get('hidden_size', 100)

        self.conv1 = SAGEConv(self.node_feature_size, self.hidden_size,aggregator_type='mean')
        self.conv2 = SAGEConv(self.hidden_size, self.hidden_size,aggregator_type='mean')
        self.conv3 = SAGEConv(self.hidden_size, self.num_tasks,aggregator_type='mean')

    def forward(self, mol_dgl_graph, globals):
        mol_dgl_graph.ndata["v"] = mol_dgl_graph.ndata["v"][:, :self.node_feature_size]
        mol_dgl_graph.edata["e"] = mol_dgl_graph.edata["e"][:, :self.edge_feature_size]

        h = self.conv1(mol_dgl_graph, mol_dgl_graph.ndata["v"])
        h = F.relu(h)
        h = self.conv2(mol_dgl_graph, h)
        h = F.relu(h)
        h = self.conv3(mol_dgl_graph, h)
        mol_dgl_graph.ndata["h"] = h

        return dgl.mean_nodes(mol_dgl_graph, "h")


In [33]:
import time
start_time = time.time()

train_evaluate()
test_evaluate()

Save checkpoint
Epoch: 1/100 | Training Loss: 1.320 | Valid Score: 0.372
 
Epoch: 1/100 | Best Valid Score Until Now: 0.372 

Patience 1
Epoch: 2/100 | Training Loss: 0.626 | Valid Score: 0.284
 
Epoch: 2/100 | Best Valid Score Until Now: 0.372 

Patience 2
Epoch: 3/100 | Training Loss: 0.597 | Valid Score: 0.323
 
Epoch: 3/100 | Best Valid Score Until Now: 0.372 

Save checkpoint
Epoch: 4/100 | Training Loss: 0.572 | Valid Score: 0.396
 
Epoch: 4/100 | Best Valid Score Until Now: 0.396 

Save checkpoint
Epoch: 5/100 | Training Loss: 0.548 | Valid Score: 0.460
 
Epoch: 5/100 | Best Valid Score Until Now: 0.460 

Save checkpoint
Epoch: 6/100 | Training Loss: 0.530 | Valid Score: 0.505
 
Epoch: 6/100 | Best Valid Score Until Now: 0.505 

Save checkpoint
Epoch: 7/100 | Training Loss: 0.513 | Valid Score: 0.558
 
Epoch: 7/100 | Best Valid Score Until Now: 0.558 

Save checkpoint
Epoch: 8/100 | Training Loss: 0.503 | Valid Score: 0.598
 
Epoch: 8/100 | Best Valid Score Until Now: 0.598 

Sa

# GraphSage 2 Layer

In [34]:
class GNN(nn.Module):
    def __init__(self, config, global_size = 200, num_tasks = 1):
        super().__init__()
        self.config = config
        self.num_tasks = num_tasks

        # Node feature size
        self.node_feature_size = self.config.get('node_feature_size', 127)

        # Edge feature size
        self.edge_feature_size = self.config.get('edge_feature_size', 12)

        # Hidden size
        self.hidden_size = self.config.get('hidden_size', 100)

        self.conv1 = SAGEConv(self.node_feature_size, self.hidden_size,aggregator_type='mean')
        self.conv2 = SAGEConv(self.hidden_size, self.num_tasks,aggregator_type='mean')

    # def forward(self, g, in_feat):
    def forward(self, mol_dgl_graph, globals):
        mol_dgl_graph.ndata["v"]= mol_dgl_graph.ndata["v"][:,:self.node_feature_size]
        mol_dgl_graph.edata["e"] = mol_dgl_graph.edata["e"][:,:self.edge_feature_size]
        h = self.conv1(mol_dgl_graph, mol_dgl_graph.ndata["v"])
        h = F.relu(h)
        h = self.conv2(mol_dgl_graph, h)
        mol_dgl_graph.ndata["h"] = h
        return dgl.mean_nodes(mol_dgl_graph, "h")

In [35]:
import time
start_time = time.time()

train_evaluate()
test_evaluate()

Save checkpoint
Epoch: 1/100 | Training Loss: 0.658 | Valid Score: 0.423
 
Epoch: 1/100 | Best Valid Score Until Now: 0.423 

Save checkpoint
Epoch: 2/100 | Training Loss: 0.617 | Valid Score: 0.437
 
Epoch: 2/100 | Best Valid Score Until Now: 0.437 

Save checkpoint
Epoch: 3/100 | Training Loss: 0.595 | Valid Score: 0.460
 
Epoch: 3/100 | Best Valid Score Until Now: 0.460 

Save checkpoint
Epoch: 4/100 | Training Loss: 0.576 | Valid Score: 0.493
 
Epoch: 4/100 | Best Valid Score Until Now: 0.493 

Save checkpoint
Epoch: 5/100 | Training Loss: 0.560 | Valid Score: 0.534
 
Epoch: 5/100 | Best Valid Score Until Now: 0.534 

Save checkpoint
Epoch: 6/100 | Training Loss: 0.548 | Valid Score: 0.579
 
Epoch: 6/100 | Best Valid Score Until Now: 0.579 

Save checkpoint
Epoch: 7/100 | Training Loss: 0.538 | Valid Score: 0.608
 
Epoch: 7/100 | Best Valid Score Until Now: 0.608 

Save checkpoint
Epoch: 8/100 | Training Loss: 0.527 | Valid Score: 0.628
 
Epoch: 8/100 | Best Valid Score Until Now: 