In [1]:
!pip3 install ogb torch 
!pip3 install dgl-cu101

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl
from dgl.nn import SAGEConv, GraphConv
from torch.nn import Linear
import dgl.function as fn
import pandas as pd
import sklearn.metrics as sk_m
import numpy as np
import scipy.sparse as sp
import itertools
from ogb.graphproppred import DglGraphPropPredDataset
from torch.utils.data import Dataset
from tqdm import tqdm
from collections import Counter
import random
import os
from ogb.graphproppred import Evaluator


Using backend: pytorch


In [3]:
device = torch.device("cuda")

In [4]:
evaluator = Evaluator(name = 'ogbg-molhiv')


In [5]:
def set_seed(seed: int = 42) -> None:
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    # Set a fixed value for the hash seed
    os.environ["PYTHONHASHSEED"] = str(seed)
    print(f"Random seed set as {seed}")


In [6]:
def get_by_indx(graphs, idx):
  return [graphs[i] for i in idx]

# Loading and preparing the data

In [7]:
class CustomGraphDataGenerator(Dataset):
    def __init__(self, graphs, labels, device, balanced_sampling=False, batch_size=32):
        self.device = device
        self.balanced_sampling = balanced_sampling
        self.batch_size= batch_size
        self.y = labels
        self.X = graphs
        if balanced_sampling:
          self.__prepare_samples()
          self.__define_batch_size()
          self.neg_indices = np.array(list(range(len(self.X_neg))))
          self.pos_indices = np.array(list(range(len(self.X_pos))))
        self.overall_indices = np.array(list(range(len(self.X))))
        
    @staticmethod
    def floor(x):
        if x<1:
            tmp = 1
        else:
            tmp = int(x)
        return int(np.ceil(x)) if x % tmp >= 0.5 else int(np.floor(x))
       
    
    def __define_batch_size(self):
        counts = Counter(self.y.reshape(-1))
        
        batch_size_neg = (counts[0]/len(self.X))*self.batch_size
        batch_size_neg = self.floor(batch_size_neg)
        
        batch_size_pos = (counts[1]/len(self.X))*self.batch_size
        batch_size_pos = self.floor(batch_size_pos)
        
        self.batch_size_pos = batch_size_pos
        self.batch_size_neg = batch_size_neg
    
    def __prepare_samples(self):
        tmp = np.arange(len(self.X))
        self.X_neg = get_by_indx(self.X, tmp[self.y.reshape(-1)==0])
        self.X_pos = get_by_indx(self.X, tmp[self.y.reshape(-1)==1])
        
        
    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.ceil(len(self.X) / self.batch_size))
        
    def __getitem__(self, index):
        if index+1>self.__len__()-1:
            raise StopIteration
        
        if self.balanced_sampling:
            
            indices_neg = self.neg_indices[index*self.batch_size_neg:(index+1)*self.batch_size_neg]
            indices_pos = self.pos_indices[index*self.batch_size_pos:(index+1)*self.batch_size_pos]

            X = get_by_indx(self.X_pos, indices_pos)+get_by_indx(self.X_neg, indices_neg)
            y = np.concatenate((self.y[indices_pos], self.y[indices_neg]))
        
            return dgl.batch(X).to(self.device), torch.Tensor(y).to(self.device)
        
        else:
            
            indices = self.overall_indices[index*self.batch_size:(index+1)*self.batch_size]
            
            return dgl.batch(get_by_indx(self.X, indices)).to(self.device), torch.Tensor(self.y[indices]).to(self.device)
        
    
    def shuffle_indices(self):
        np.random.shuffle(self.neg_indices)
        np.random.shuffle(self.pos_indices)
        np.random.shuffle(self.overall_indices)

In [8]:
dataset = DglGraphPropPredDataset(name = 'ogbg-molhiv') 
split_idx = dataset.get_idx_split()
train_idx, valid_idx, test_idx = split_idx["train"], split_idx["valid"], split_idx["test"]
labels = np.array(dataset.labels)
graphs = dataset.graphs
train_graphs, val_graphs, test_graphs  = get_by_indx(graphs, train_idx), get_by_indx(graphs, valid_idx),  get_by_indx(graphs, test_idx)
train_labels, valid_labels, test_labels = labels[train_idx], labels[valid_idx], labels[test_idx]

In [9]:
test_graphs_batched = dgl.batch(test_graphs)

In [10]:
Counter(train_labels.reshape(-1))

Counter({0: 31669, 1: 1232})

In [11]:
len(dataset), len(train_graphs), len(val_graphs), len(test_graphs)

(41127, 32901, 4113, 4113)

## class weights

In [12]:
from sklearn.utils.class_weight import compute_class_weight

In [13]:
class_weights = compute_class_weight('balanced', y=train_labels.reshape(-1),
                                     classes=np.unique(train_labels.reshape(-1)))


# Training

In [21]:
from copy import deepcopy
def metrics(labels, scores, threshold=0.5):
    labels = np.hstack([i.reshape(-1) for i in labels])
    scores = np.hstack([i.reshape(-1) for i in scores])
    pred = (np.array(scores)>threshold).astype(int)
    f1 = sk_m.f1_score(y_pred=pred, y_true=labels, average='binary')
    roc = sk_m.roc_auc_score(y_score=scores, y_true=labels)
    return f1, roc

def BCELoss_class_weighted(weights):

    def loss(input, target):
        input = torch.clamp(input,min=1e-7,max=1-1e-7)
        bce = - weights[1] * target * torch.log(input) - (1 - target) * weights[0] * torch.log(1 - input)
        return torch.mean(bce)

    return loss

def train(train_generator, val_generator, model, 
          epochs=10, lr=1e-3, weight_decay=1e-5,
          class_weights=None,
          use_edge_feature=False, shuffle=False, early_stopping=None):
    
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    
    loss_func = BCELoss_class_weighted(class_weights)
    
    if not (class_weights is None):
        class_weights = torch.FloatTensor(class_weights)
        loss_func = BCELoss_class_weighted(class_weights)
    else:
        loss_func = F.binary_cross_entropy

    
    for e in range(epochs):
        train_scores = []
        train_labels = []
        loss_train_holder = []
        
        val_scores = []
        val_loss_holder = []
        val_labels = []
        
        for train_graph, label in tqdm(train_generator):
            if use_edge_feature:
                logits_train = model(train_graph, train_graph.ndata['feat'],\
                                     train_graph.edata['feat'])
            else:
                logits_train = model(train_graph, train_graph.ndata['feat'])
            sigmoided_train = F.sigmoid(logits_train)
            loss_train = loss_func(sigmoided_train, label)
            
            # Backward
            optimizer.zero_grad()
            loss_train.backward()
            optimizer.step()
        
            train_labels.append(label.cpu().numpy())
            train_scores.append(sigmoided_train.cpu().detach().numpy())
            loss_train_holder.append(loss_train.cpu().detach().numpy())
            
        
        with torch.no_grad():  
            for val_graph, val_label in val_generator:
                
                if use_edge_feature:
                    logits_val = model(val_graph, val_graph.ndata['feat'], \
                                   val_graph.edata['feat'], train_mode=False)
                else:
                    logits_val = model(val_graph, val_graph.ndata['feat'], train_mode=False)
                    
                sigmoided_val = F.sigmoid(logits_val)
                
                loss_val = loss_func(sigmoided_val, val_label)
                
                
                val_labels.append(val_label.cpu().numpy())
                val_scores.append(sigmoided_val.cpu().detach().numpy())
                val_loss_holder.append(loss_val.cpu())
            
        train_f1, train_roc = metrics(train_labels, train_scores)
        val_f1, val_roc = metrics(val_labels, val_scores)
        

        print('In epoch {}, Train loss: {:.3f}, train roc: {:.3f}, train f1: {:.3f},'
              ' val loss: {:.3f}, val roc: {:.3f}, val f1 : {:.3f}'.format(
            e, np.mean(loss_train_holder), train_roc, train_f1, np.mean(val_loss_holder), \
        val_roc, val_f1))
        
        if early_stopping:
            early_stopping(val_roc, deepcopy(model))
            print('Early stopping extemum : {}'.format(early_stopping.extemum_value))
            if early_stopping.early_stop:
                print('Stopping early')
                model = early_stopping.best_model
                break
        
        if shuffle:
            train_generator.shuffle_indices()
            
            
    return model

In [22]:
import operator

In [23]:
class EarlyStopping:
    def __init__(self, tolerance=5, mode='min'):
        assert mode in ['min','max'], 'Mode should be min or max'
        self.mode = operator.lt if mode=='min' else operator.gt 
        self.tolerance = tolerance
        self.counter = 0
        self.early_stop = False
        self.extemum_value = None
        self.best_model = None
        
        
    def __call__(self, val, model):
        if self.extemum_value is None:
            self.extemum_value = val
            self.best_model = model
        else:
            if not self.mode(val, self.extemum_value):
                self.counter+=1
            else:
                self.extemum_value = val
                self.best_model = model
                self.counter = 0
        
        if self.counter==self.tolerance:
            self.early_stop=True

In [24]:
from dgl.nn import GraphConv
from dgl.nn.pytorch.conv import EdgeWeightNorm
from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder

class GraphModel(nn.Module):
    def __init__(self, emb_shape=100):
        set_seed(0)
        super(GraphModel, self).__init__()
        self.emb = AtomEncoder(emb_shape)
        self.node_conv1 = GraphConv(emb_shape, 256, allow_zero_in_degree=True)
        self.node_conv2 = SAGEConv(256, 128, 'lstm')
        self.node_conv3 = SAGEConv(128, 64, 'mean')
        self.node_conv4 = SAGEConv(64, 1, 'mean')
        self.dropout1 = torch.nn.Dropout(0.25)

        
    def forward(self, g, n_data, train_mode=True):
        h_nodes = self.emb(n_data)
        h_nodes = self.node_conv1(g, h_nodes)
        h_nodes = F.relu(h_nodes)
        h_nodes = self.node_conv2(g, h_nodes)
        if train_mode:
          h_nodes = self.dropout1(h_nodes)
        h_nodes = F.relu(h_nodes)
        h_nodes = self.node_conv3(g, h_nodes)
        h_nodes = F.relu(h_nodes)
        h_nodes = self.node_conv4(g, h_nodes)
        g.ndata['h'] = h_nodes
        h_nodes = dgl.mean_nodes(g, 'h')
        return h_nodes


In [25]:
model = GraphModel(100)
model.to(device)
batch_size = 64
balanced_sampling = False
use_edge_feature = False
early_stopping = EarlyStopping(tolerance=10, mode='max')

Random seed set as 0


In [26]:
train_generator = CustomGraphDataGenerator(train_graphs, train_labels,device=device, batch_size=batch_size,
                                          balanced_sampling=balanced_sampling)
valid_generator = CustomGraphDataGenerator(val_graphs, valid_labels, device=device, batch_size=batch_size,
                                          balanced_sampling=balanced_sampling)

In [27]:
model = train(train_generator, valid_generator, model, class_weights=class_weights, weight_decay=1e-5,
             epochs=100, use_edge_feature=use_edge_feature, 
             early_stopping=early_stopping)

100%|█████████▉| 514/515 [00:11<00:00, 46.00it/s]


In epoch 0, Train loss: 0.677, train roc: 0.624, train f1: 0.097, val loss: 0.927, val roc: 0.693, val f1 : 0.039
Early stopping extemum : 0.6929046938179357


100%|█████████▉| 514/515 [00:10<00:00, 49.66it/s]


In epoch 1, Train loss: 0.657, train roc: 0.657, train f1: 0.119, val loss: 0.881, val roc: 0.717, val f1 : 0.039
Early stopping extemum : 0.717445382285565


100%|█████████▉| 514/515 [00:10<00:00, 49.50it/s]


In epoch 2, Train loss: 0.644, train roc: 0.676, train f1: 0.124, val loss: 0.816, val roc: 0.698, val f1 : 0.039
Early stopping extemum : 0.717445382285565


100%|█████████▉| 514/515 [00:10<00:00, 49.44it/s]


In epoch 3, Train loss: 0.640, train roc: 0.684, train f1: 0.129, val loss: 0.773, val roc: 0.720, val f1 : 0.040
Early stopping extemum : 0.7195609058622757


100%|█████████▉| 514/515 [00:10<00:00, 49.87it/s]


In epoch 4, Train loss: 0.624, train roc: 0.703, train f1: 0.131, val loss: 0.779, val roc: 0.723, val f1 : 0.041
Early stopping extemum : 0.7233245698999123


100%|█████████▉| 514/515 [00:10<00:00, 47.74it/s]


In epoch 5, Train loss: 0.613, train roc: 0.718, train f1: 0.141, val loss: 0.759, val roc: 0.731, val f1 : 0.042
Early stopping extemum : 0.731120950755654


100%|█████████▉| 514/515 [00:11<00:00, 45.69it/s]


In epoch 6, Train loss: 0.602, train roc: 0.736, train f1: 0.152, val loss: 0.741, val roc: 0.749, val f1 : 0.043
Early stopping extemum : 0.7490982888243163


100%|█████████▉| 514/515 [00:10<00:00, 49.22it/s]


In epoch 7, Train loss: 0.592, train roc: 0.746, train f1: 0.158, val loss: 0.709, val roc: 0.750, val f1 : 0.046
Early stopping extemum : 0.7495041741617086


100%|█████████▉| 514/515 [00:10<00:00, 49.17it/s]


In epoch 8, Train loss: 0.579, train roc: 0.761, train f1: 0.168, val loss: 0.677, val roc: 0.763, val f1 : 0.046
Early stopping extemum : 0.7632489276324892


100%|█████████▉| 514/515 [00:10<00:00, 46.76it/s]


In epoch 9, Train loss: 0.567, train roc: 0.772, train f1: 0.179, val loss: 0.615, val roc: 0.769, val f1 : 0.049
Early stopping extemum : 0.7685346616853466


100%|█████████▉| 514/515 [00:10<00:00, 48.96it/s]


In epoch 10, Train loss: 0.558, train roc: 0.779, train f1: 0.187, val loss: 0.629, val roc: 0.758, val f1 : 0.048
Early stopping extemum : 0.7685346616853466


100%|█████████▉| 514/515 [00:10<00:00, 48.50it/s]


In epoch 11, Train loss: 0.542, train roc: 0.796, train f1: 0.204, val loss: 0.633, val roc: 0.763, val f1 : 0.050
Early stopping extemum : 0.7685346616853466


100%|█████████▉| 514/515 [00:10<00:00, 48.93it/s]


In epoch 12, Train loss: 0.528, train roc: 0.811, train f1: 0.211, val loss: 0.643, val roc: 0.759, val f1 : 0.050
Early stopping extemum : 0.7685346616853466


100%|█████████▉| 514/515 [00:10<00:00, 49.92it/s]


In epoch 13, Train loss: 0.508, train roc: 0.828, train f1: 0.225, val loss: 0.611, val roc: 0.756, val f1 : 0.051
Early stopping extemum : 0.7685346616853466


100%|█████████▉| 514/515 [00:10<00:00, 49.71it/s]


In epoch 14, Train loss: 0.491, train roc: 0.841, train f1: 0.236, val loss: 0.587, val roc: 0.759, val f1 : 0.056
Early stopping extemum : 0.7685346616853466


100%|█████████▉| 514/515 [00:10<00:00, 48.18it/s]


In epoch 15, Train loss: 0.475, train roc: 0.855, train f1: 0.249, val loss: 0.589, val roc: 0.762, val f1 : 0.056
Early stopping extemum : 0.7685346616853466


100%|█████████▉| 514/515 [00:10<00:00, 49.36it/s]


In epoch 16, Train loss: 0.459, train roc: 0.866, train f1: 0.258, val loss: 0.534, val roc: 0.762, val f1 : 0.068
Early stopping extemum : 0.7685346616853466


100%|█████████▉| 514/515 [00:10<00:00, 48.81it/s]


In epoch 17, Train loss: 0.441, train roc: 0.878, train f1: 0.271, val loss: 0.561, val roc: 0.766, val f1 : 0.068
Early stopping extemum : 0.7685346616853466


100%|█████████▉| 514/515 [00:11<00:00, 46.51it/s]


In epoch 18, Train loss: 0.425, train roc: 0.888, train f1: 0.278, val loss: 0.562, val roc: 0.751, val f1 : 0.068
Early stopping extemum : 0.7685346616853466


100%|█████████▉| 514/515 [00:10<00:00, 49.73it/s]


In epoch 19, Train loss: 0.407, train roc: 0.899, train f1: 0.291, val loss: 0.539, val roc: 0.769, val f1 : 0.076
Early stopping extemum : 0.7691896130252295


100%|█████████▉| 514/515 [00:10<00:00, 49.51it/s]


In epoch 20, Train loss: 0.388, train roc: 0.909, train f1: 0.305, val loss: 0.525, val roc: 0.759, val f1 : 0.075
Early stopping extemum : 0.7691896130252295


100%|█████████▉| 514/515 [00:11<00:00, 46.66it/s]


In epoch 21, Train loss: 0.381, train roc: 0.914, train f1: 0.305, val loss: 0.498, val roc: 0.760, val f1 : 0.084
Early stopping extemum : 0.7691896130252295


100%|█████████▉| 514/515 [00:10<00:00, 47.85it/s]


In epoch 22, Train loss: 0.357, train roc: 0.924, train f1: 0.327, val loss: 0.491, val roc: 0.757, val f1 : 0.094
Early stopping extemum : 0.7691896130252295


100%|█████████▉| 514/515 [00:10<00:00, 47.63it/s]


In epoch 23, Train loss: 0.330, train roc: 0.936, train f1: 0.350, val loss: 0.512, val roc: 0.771, val f1 : 0.093
Early stopping extemum : 0.7711083437110835


100%|█████████▉| 514/515 [00:10<00:00, 49.44it/s]


In epoch 24, Train loss: 0.313, train roc: 0.943, train f1: 0.362, val loss: 0.541, val roc: 0.764, val f1 : 0.096
Early stopping extemum : 0.7711083437110835


100%|█████████▉| 514/515 [00:10<00:00, 49.89it/s]


In epoch 25, Train loss: 0.308, train roc: 0.945, train f1: 0.364, val loss: 0.526, val roc: 0.777, val f1 : 0.115
Early stopping extemum : 0.7768983595467613


100%|█████████▉| 514/515 [00:10<00:00, 49.84it/s]


In epoch 26, Train loss: 0.292, train roc: 0.951, train f1: 0.373, val loss: 0.563, val roc: 0.758, val f1 : 0.138
Early stopping extemum : 0.7768983595467613


100%|█████████▉| 514/515 [00:11<00:00, 46.68it/s]


In epoch 27, Train loss: 0.276, train roc: 0.956, train f1: 0.386, val loss: 0.559, val roc: 0.753, val f1 : 0.119
Early stopping extemum : 0.7768983595467613


100%|█████████▉| 514/515 [00:10<00:00, 49.36it/s]


In epoch 28, Train loss: 0.256, train roc: 0.962, train f1: 0.412, val loss: 0.551, val roc: 0.755, val f1 : 0.112
Early stopping extemum : 0.7768983595467613


100%|█████████▉| 514/515 [00:10<00:00, 49.15it/s]


In epoch 29, Train loss: 0.257, train roc: 0.962, train f1: 0.408, val loss: 0.561, val roc: 0.775, val f1 : 0.125
Early stopping extemum : 0.7768983595467613


100%|█████████▉| 514/515 [00:10<00:00, 49.81it/s]


In epoch 30, Train loss: 0.248, train roc: 0.965, train f1: 0.422, val loss: 0.581, val roc: 0.767, val f1 : 0.154
Early stopping extemum : 0.7768983595467613


100%|█████████▉| 514/515 [00:10<00:00, 49.51it/s]


In epoch 31, Train loss: 0.252, train roc: 0.963, train f1: 0.413, val loss: 0.534, val roc: 0.781, val f1 : 0.138
Early stopping extemum : 0.7810494595882723


100%|█████████▉| 514/515 [00:10<00:00, 49.82it/s]


In epoch 32, Train loss: 0.230, train roc: 0.969, train f1: 0.439, val loss: 0.607, val roc: 0.757, val f1 : 0.131
Early stopping extemum : 0.7810494595882723


100%|█████████▉| 514/515 [00:10<00:00, 50.14it/s]


In epoch 33, Train loss: 0.229, train roc: 0.969, train f1: 0.435, val loss: 0.634, val roc: 0.740, val f1 : 0.106
Early stopping extemum : 0.7810494595882723


100%|█████████▉| 514/515 [00:10<00:00, 50.08it/s]


In epoch 34, Train loss: 0.221, train roc: 0.970, train f1: 0.443, val loss: 0.639, val roc: 0.728, val f1 : 0.093
Early stopping extemum : 0.7810494595882723


100%|█████████▉| 514/515 [00:10<00:00, 49.62it/s]


In epoch 35, Train loss: 0.224, train roc: 0.970, train f1: 0.448, val loss: 0.621, val roc: 0.732, val f1 : 0.122
Early stopping extemum : 0.7810494595882723


100%|█████████▉| 514/515 [00:11<00:00, 46.59it/s]


In epoch 36, Train loss: 0.204, train roc: 0.975, train f1: 0.467, val loss: 0.624, val roc: 0.738, val f1 : 0.135
Early stopping extemum : 0.7810494595882723


100%|█████████▉| 514/515 [00:10<00:00, 49.30it/s]


In epoch 37, Train loss: 0.197, train roc: 0.976, train f1: 0.476, val loss: 0.616, val roc: 0.732, val f1 : 0.106
Early stopping extemum : 0.7810494595882723


100%|█████████▉| 514/515 [00:10<00:00, 48.82it/s]


In epoch 38, Train loss: 0.212, train roc: 0.973, train f1: 0.458, val loss: 0.615, val roc: 0.715, val f1 : 0.107
Early stopping extemum : 0.7810494595882723


100%|█████████▉| 514/515 [00:10<00:00, 49.74it/s]


In epoch 39, Train loss: 0.186, train roc: 0.978, train f1: 0.494, val loss: 0.614, val roc: 0.733, val f1 : 0.097
Early stopping extemum : 0.7810494595882723


100%|█████████▉| 514/515 [00:10<00:00, 49.83it/s]


In epoch 40, Train loss: 0.178, train roc: 0.980, train f1: 0.499, val loss: 0.625, val roc: 0.722, val f1 : 0.123
Early stopping extemum : 0.7810494595882723


100%|█████████▉| 514/515 [00:10<00:00, 50.01it/s]


In epoch 41, Train loss: 0.192, train roc: 0.977, train f1: 0.487, val loss: 0.600, val roc: 0.719, val f1 : 0.107
Early stopping extemum : 0.7810494595882723
Stopping early


# Evaluation

In [28]:
predicted_logits = model(test_graphs_batched.to(device), test_graphs_batched.ndata['feat'].to(device), train_mode=False)
predicted = F.sigmoid(predicted_logits).cpu().detach().numpy()

  self.dropout, self.training, self.bidirectional, self.batch_first)


In [29]:
evaluator.eval({"y_true": test_labels, "y_pred": predicted})

{'rocauc': 0.778821529963885}

# Table with results

In [30]:
!ls

dataset  experiments_gcn.csv  sample_data


In [31]:
import pandas as pd
pd.set_option('display.max_colwidth', None)

display(pd.read_csv('experiments_gcn.csv'))

Unnamed: 0,Approach,Val rocauc,Test rocauc
0,GCN and sageconv;\nonly raw node features; \nbatch size: 512;,0.625,0.625
1,GCN and sageconv (mean aggregation);\nonly raw node features; \nbatch size: 512; imbalanced weights;,0.672,0.632
2,GCN only;\nonly raw node features; \nbatch size: 512; imbalanced weights;,0.615,0.614
3,GCN and sageconv (lstm+mean agg);;\nonly raw node features; \nbatch size: 512; imbalanced weights; \nl2 no weight decay,0.61,0.65
4,GCN and sageconv (lstm+mean agg);;\nonly raw node features; \nbatch size: 512; imbalanced weights; l2 weight decay (1e-5),0.6,0.651
5,GCN and sageconv (lstm+mean agg);;\nonly raw node features; \nbatch size: 512; imbalanced weights; l2 weight decay (1e-5) + \nearly stopping (tolerance=3),0.63,0.655
6,GCN and sageconv (lstm+mean agg);;\nonly raw node features; \nbatch size: 64; imbalanced weights; l2 weight decay (1e-5) + \nearly stopping (tolerance=3),0.67,0.66
7,GCN and sageconv (lstm+mean agg);;\nembedded node features; \nbatch size: 64; imbalanced weights; l2 weight decay (1e-5) + \nearly stopping (tolerance=10); bigger number of epochs;,0.781,~0.77
8,GCN and sageconv (lstm+mean agg);;\nembedded node features; embedded edge features as edge_weight\nbatch size: 64; imbalanced weights; l2 weight decay (1e-5) + \nearly stopping (tolerance=10); bigger number of epochs;,0.5,0.5
9,GCN and sageconv (lstm+mean agg);;\nembedded node features; embedded edge features in average + \nconcat and dense\nbatch size: 64; imbalanced weights; l2 weight decay (1e-5) + \nearly stopping (tolerance=10),0.745,0.69


# Conclusions:
1) Using sampling w.r.t distribution in the dataset (balanced_sampling) on average gives worser results; 

2) Using weighted loss improves rocauc score (basically because of the highly imbalanced weights)

3) Adding l2 regularization helps 

4) Using embeddings for node features hepls a lot, no overfitting detected for longer period (basically because of the better representation of categorical features)

5) Using embeddings of edges/raw edge features makes results worser both on the level of message passing (edge_weight) and mere inclusion to the network

Interstingly, current setup of training achievies similar results to ones reported at the bottom of leaderboard (https://ogb.stanford.edu/docs/leader_graphprop/#ogbg-molhiv). A fine-tuning can possibly give better results. 
