This notebook runs prototype versions of the following methods:
- MAP
- MAP Ensemble
- MC-Dropout
- Featurisation + GP
- SWAG-Diag
- SWAG
- MultiSWAG
- SGLD
- BBP
- BBP + reparametrisation

The methods are run with:
- A smaller version of QM9 (50,000 examples)
- A single split of the data (random)
- A D-MPNN optimised through crude grid-search (hidden size 500, depth 5, layers 3)

Please re-instantiate args before running each method

In [1]:
import csv
import os
import sys
import numpy as np
import torch
import pickle
import copy
import pandas as pd

from logging import Logger
from typing import List
from tqdm import trange

from torch.optim.lr_scheduler import ExponentialLR
from torch_geometric.datasets import QM9

In [2]:
# cd to chempropBayes
%cd /Users/georgelamb/Documents/GitHub/chempropBayes

/Users/georgelamb/Documents/GitHub/chempropBayes


In [3]:
# imports
from chemprop.train.run_training import run_training
from chemprop.args import TrainArgs
from chemprop.data.utils import get_class_sizes, get_data, get_task_names, split_data

### Args

In [4]:
# instantiate args class and load from dict
args = TrainArgs()
args.from_dict({
    'dataset_type': 'regression',
    'data_path': '/Users/georgelamb/Documents/GitHub/chempropBayes/data/QM9.csv'
})

# location for model checkpoints to be saved
args.save_dir = '/Users/georgelamb/Documents/GitHub/chempropBayes/log'

In [5]:
### args (non-model)

# seed for splitting and loading data
args.seed = 0

# data
args.max_data_size = 50000
args.features_path = None
args.features_generator = None

# splitting data
args.split_type = 'random'
args.split_sizes = (0.8, 0.1, 0.1)

# evaluation metric
args.metric = 'mae'

# epochs and logging
args.epochs = 50
args.log_frequency = 800


In [6]:
### args (model)

# seed for random initial weights
args.pytorch_seed = 0

# message passing
args.atom_messages = False
args.undirected = False
args.bias = False
args.hidden_size = 500
args.depth = 5

# FFN
args.ffn_hidden_size = args.hidden_size
args.ffn_num_layers = 3

# shared
args.activation = 'ReLU'


### MAP

In [7]:
args.ensemble_size = 1
args.samples = 1

results_MAP = run_training(args)
#np.savez(args.save_dir+'/results_MAP', results_MAP)

9639it [00:00, 96383.43it/s]

Command line
python /Applications/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py -f /Users/georgelamb/Library/Jupyter/runtime/kernel-6a41b4ab-3f98-4bbb-82d2-2b0cc542ed11.json
Args
{'activation': 'ReLU',
 'atom_messages': False,
 'batch_size_gp': 100,
 'batch_size_sgld': 50,
 'bias': False,
 'block': True,
 'burnin_epochs': 10,
 'c_swag': 0,
 'cache_cutoff': 10000,
 'class_balance': False,
 'config_path': None,
 'cov_mat': False,
 'crossval_index_dir': None,
 'crossval_index_file': None,
 'crossval_index_sets': None,
 'data_path': '/Users/georgelamb/Documents/GitHub/chempropBayes/data/QM9.csv',
 'dataset_type': 'regression',
 'depth': 5,
 'dropout': 0.0,
 'dropout_FFNonly': False,
 'ensemble_size': 1,
 'epochs': 50,
 'epochs_gp': 100,
 'epochs_swag': 0,
 'features_generator': None,
 'features_only': False,
 'features_path': None,
 'features_size': None,
 'ffn_hidden_size': 500,
 'ffn_num_layers': 3,
 'final_lr': 0.0001,
 'final_lr_gp': 0.0001,
 'folds_file': None,
 'gp': Fa

44390it [00:00, 71271.90it/s]
  0%|          | 0/50000 [00:00<?, ?it/s]ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/Applications/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-887484e67c96>", line 4, in <module>
    results_MAP = run_training(args)
  File "/Users/georgelamb/Documents/GitHub/chempropBayes/chemprop/train/run_training.py", line 60, in run_training
    data = get_data(path=args.data_path, args=args, logger=logger)
  File "/Users/georgelamb/Documents/GitHub/chempropBayes/chemprop/data/utils.py", line 184, in get_data
    total=len(all_smiles))
  File "/Users/georgelamb/Documents/GitHub/chempropBayes/chemprop/data/utils.py", line 183, in <listcomp>
    ) for i, (smiles, targets, row) in tqdm(enumerate(zip(all_smiles, all_targets, all_rows)),
  File "/Users/georgelamb/Documents/GitHub/chempropBayes/chemprop/data/data.py", line 21, in __init__
    def __init__(self,
KeyboardInterrupt

During handling of the above exceptio

KeyboardInterrupt: 

### MAP Ensemble

In [None]:
args.ensemble_size = 10
args.samples = 1

results_ens = run_training(args)
#np.savez(args.save_dir+'/results_ens', results_ens)

### MC-Dropout

In [8]:
# we have dropout in the following places:
# - after the edge update function (following ReLU)
# - after converting to an atomic representation (following ReLU)
# - on the global hidden state representation
# - after every non-final FFN layer (following ReLU)

In [14]:
args.epochs = 250
args.init_lr = 1e-4
args.max_lr = 1e-3
args.final_lr = 1e-4
args.batch_size = 50
args.log_frequency = 800

args.ensemble_size = 1
args.samples = 100

args.dropout = 0.1
args.test_dropout = True

results_MCdrop = run_training(args)
#np.savez(args.save_dir+'/results_MCdrop', results_MCdrop)

0it [00:00, ?it/s]

Command line
python /Applications/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py -f /Users/georgelamb/Library/Jupyter/runtime/kernel-832232a1-7bee-48f8-801a-d6a9460319ca.json
Args
{'RMS': False,
 'activation': 'ReLU',
 'atom_messages': False,
 'batch_size': 50,
 'batch_size_sgld': 50,
 'bias': False,
 'block': True,
 'burnin_epochs': 10,
 'c_swag': 0,
 'cache_cutoff': 10000,
 'class_balance': False,
 'config_path': None,
 'cov_mat': False,
 'crossval_index_dir': None,
 'crossval_index_file': None,
 'crossval_index_sets': None,
 'data_path': '/Users/georgelamb/Documents/GitHub/chempropBayes/data/QM9.csv',
 'dataset_type': 'regression',
 'depth': 5,
 'dropout': 0.1,
 'ensemble_size': 1,
 'epochs': 250,
 'epochs_swag': 0,
 'features_generator': None,
 'features_only': False,
 'features_path': None,
 'features_size': None,
 'ffn_hidden_size': 500,
 'ffn_num_layers': 3,
 'final_lr': 0.0001,
 'folds_file': None,
 'hidden_size': 500,
 'init_log_noise': -2,
 'init_lr': 0.0001,
 'l

43996it [00:00, 50033.88it/s] 
100%|██████████| 50000/50000 [00:00<00:00, 131488.74it/s]
100%|██████████| 50000/50000 [00:03<00:00, 13659.39it/s]


Number of tasks = 12
Splitting data with seed 0
Total size = 50,000 | train size = 40,000 | val size = 5,000 | test size = 5,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): MPNEncoder(
      (dropout_layer): Dropout(p=0.1, inplace=False)
      (act_func): ReLU()
      (W_i): Linear(in_features=147, out_features=500, bias=False)
      (W_h): Linear(in_features=500, out_features=500, bias=False)
      (W_o): Linear(in_features=633, out_features=500, bias=True)
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.1, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.1, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.1, inplace=False)
    (7): Linear(in_features=500, out_features=12, bias=True)
  )
)
Number of parameters = 1,147,512
Epoch 0
Loss = 7.5858e-03, PNorm = 49.3504, GNorm = 3.0082, lr_0 = 5.5056e-04
Validation mae = 13

Loss = 5.6047e-04, PNorm = 168.5862, GNorm = 0.3261, lr_0 = 5.3186e-04
Validation mae = 3.092782
Epoch 70
Loss = 5.4370e-04, PNorm = 169.2516, GNorm = 0.1202, lr_0 = 5.2695e-04
Validation mae = 3.042189
Epoch 71
Loss = 5.4269e-04, PNorm = 169.9423, GNorm = 0.1563, lr_0 = 5.2208e-04
Validation mae = 3.111689
Epoch 72
Loss = 5.4389e-04, PNorm = 170.5885, GNorm = 0.1389, lr_0 = 5.1725e-04
Validation mae = 2.809120
Epoch 73
Loss = 5.3294e-04, PNorm = 171.2383, GNorm = 0.1105, lr_0 = 5.1247e-04
Validation mae = 2.775399
Epoch 74
Loss = 5.1902e-04, PNorm = 171.8482, GNorm = 0.1105, lr_0 = 5.0774e-04
Validation mae = 2.736141
Epoch 75
Loss = 5.2317e-04, PNorm = 172.5105, GNorm = 0.1700, lr_0 = 5.0305e-04
Validation mae = 2.815205
Epoch 76
Loss = 5.3230e-04, PNorm = 173.1640, GNorm = 0.3042, lr_0 = 4.9840e-04
Validation mae = 2.892644
Epoch 77
Loss = 5.1875e-04, PNorm = 173.7112, GNorm = 0.1585, lr_0 = 4.9379e-04
Validation mae = 2.832198
Epoch 78
Loss = 5.0980e-04, PNorm = 174.2837, GNorm = 0

Loss = 3.7833e-04, PNorm = 198.1248, GNorm = 0.1082, lr_0 = 2.6021e-04
Validation mae = 2.366049
Epoch 147
Loss = 3.7411e-04, PNorm = 198.3311, GNorm = 0.0918, lr_0 = 2.5780e-04
Validation mae = 2.383507
Epoch 148
Loss = 3.7508e-04, PNorm = 198.5238, GNorm = 0.0848, lr_0 = 2.5542e-04
Validation mae = 2.423979
Epoch 149
Loss = 3.7203e-04, PNorm = 198.7144, GNorm = 0.1037, lr_0 = 2.5306e-04
Validation mae = 2.433752
Epoch 150
Loss = 3.6902e-04, PNorm = 198.8876, GNorm = 0.1092, lr_0 = 2.5072e-04
Validation mae = 2.420938
Epoch 151
Loss = 3.7394e-04, PNorm = 199.0645, GNorm = 0.1066, lr_0 = 2.4840e-04
Validation mae = 2.342943
Epoch 152
Loss = 3.6581e-04, PNorm = 199.2369, GNorm = 0.1109, lr_0 = 2.4611e-04
Validation mae = 2.331077
Epoch 153
Loss = 3.7052e-04, PNorm = 199.4257, GNorm = 0.1024, lr_0 = 2.4383e-04
Validation mae = 2.436813
Epoch 154
Loss = 3.7025e-04, PNorm = 199.6050, GNorm = 0.1223, lr_0 = 2.4158e-04
Validation mae = 2.423140
Epoch 155
Loss = 3.6365e-04, PNorm = 199.7720, 

Validation mae = 2.223402
Epoch 223
Loss = 3.1767e-04, PNorm = 207.2461, GNorm = 0.0975, lr_0 = 1.2730e-04
Validation mae = 2.261421
Epoch 224
Loss = 3.1785e-04, PNorm = 207.3153, GNorm = 0.0954, lr_0 = 1.2613e-04
Validation mae = 2.213537
Epoch 225
Loss = 3.1655e-04, PNorm = 207.3830, GNorm = 0.0841, lr_0 = 1.2496e-04
Validation mae = 2.230956
Epoch 226
Loss = 3.1867e-04, PNorm = 207.4441, GNorm = 0.3352, lr_0 = 1.2380e-04
Validation mae = 2.227172
Epoch 227
Loss = 3.1591e-04, PNorm = 207.5038, GNorm = 0.1043, lr_0 = 1.2266e-04
Validation mae = 2.310653
Epoch 228
Loss = 3.1493e-04, PNorm = 207.5676, GNorm = 0.1142, lr_0 = 1.2153e-04
Validation mae = 2.235040
Epoch 229
Loss = 3.1603e-04, PNorm = 207.6281, GNorm = 0.1073, lr_0 = 1.2040e-04
Validation mae = 2.220497
Epoch 230
Loss = 3.1550e-04, PNorm = 207.6886, GNorm = 0.0987, lr_0 = 1.1929e-04
Validation mae = 2.250058
Epoch 231
Loss = 3.1698e-04, PNorm = 207.7503, GNorm = 0.0798, lr_0 = 1.1819e-04
Validation mae = 2.261920
Epoch 232
L

### SWAG-Diag

In [7]:
args.ensemble_size = 1
args.samples = 30
args.dropout = 0
args.test_dropout = False

args.swag = True # SWAG switch
args.cov_mat = False # whether to compute deviations and then covariance
args.block = False # whether to compute covariances layer by layer
args.max_num_models = 30 # max number of columns of deviations matrix

args.epochs_swag = 25 # number of epochs
args.c_swag = 200 # how frequently to collect a model (in batches)

args.lr_swag = 1e-3
args.wd_swag = 0.001
args.momentum_swag = 0.5

results_swagD = run_training(args)
#np.savez(args.save_dir+'/results_swagD', results_swagD)

0it [00:00, ?it/s]

Command line
python /Applications/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py -f /Users/georgelamb/Library/Jupyter/runtime/kernel-eec2b1f7-f2f6-4a68-beac-8aee973255b6.json
Args
{'activation': 'ReLU',
 'atom_messages': False,
 'bias': False,
 'block': False,
 'c_swag': 200,
 'cache_cutoff': 10000,
 'class_balance': False,
 'config_path': None,
 'cov_mat': False,
 'crossval_index_dir': None,
 'crossval_index_file': None,
 'crossval_index_sets': None,
 'data_path': '/Users/georgelamb/Documents/GitHub/chempropBayes/data/QM9.csv',
 'dataset_type': 'regression',
 'depth': 5,
 'dropout': 0,
 'ensemble_size': 1,
 'epochs': 50,
 'epochs_swag': 25,
 'features_generator': None,
 'features_only': False,
 'features_path': None,
 'features_size': None,
 'ffn_hidden_size': 500,
 'ffn_num_layers': 3,
 'final_lr': 0.0001,
 'folds_file': None,
 'hidden_size': 500,
 'init_log_noise': 0,
 'init_lr': 0.0001,
 'log_frequency': 800,
 'lr_sgld': 0.001,
 'lr_swag': 0.001,
 'max_data_size': 5000

47074it [00:00, 79154.59it/s]
100%|██████████| 50000/50000 [00:00<00:00, 259260.70it/s]
100%|██████████| 50000/50000 [00:03<00:00, 13420.17it/s]


Number of tasks = 12
Splitting data with seed 0
Total size = 50,000 | train size = 40,000 | val size = 5,000 | test size = 5,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): MPNEncoder(
      (dropout_layer): Dropout(p=0, inplace=False)
      (act_func): ReLU()
      (W_i): Linear(in_features=147, out_features=500, bias=False)
      (W_h): Linear(in_features=500, out_features=500, bias=False)
      (W_o): Linear(in_features=633, out_features=500, bias=True)
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0, inplace=False)
    (7): Linear(in_features=500, out_features=12, bias=True)
  )
)
Number of parameters = 1,147,512
Model 0 best validation mae = inf on epoch 0
Loading pretrained parameter "encoder.encoder.cached_zero_ve

### SWAG

In [11]:
args.ensemble_size = 1
args.samples = 30
args.dropout = 0
args.test_dropout = False

args.swag = True # SWAG switch
args.cov_mat = True # whether to compute deviations and then covariance
args.block = False # whether to compute covariances layer by layer
args.max_num_models = 30 # max number of columns of deviations matrix

args.epochs_swag = 25 # number of epochs
args.c_swag = 200 # how frequently to collect a model (in batches)

args.lr_swag = 1e-3
args.wd_swag = 0.001
args.momentum_swag = 0.5

results_swag = run_training(args)
#np.savez(args.save_dir+'/results_swag', results_swag)

0it [00:00, ?it/s]

Command line
python /Applications/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py -f /Users/georgelamb/Library/Jupyter/runtime/kernel-832232a1-7bee-48f8-801a-d6a9460319ca.json
Args
{'RMS': False,
 'activation': 'ReLU',
 'atom_messages': False,
 'batch_size_sgld': 50,
 'bias': False,
 'block': False,
 'burnin_epochs': 10,
 'c_swag': 200,
 'cache_cutoff': 10000,
 'class_balance': False,
 'config_path': None,
 'cov_mat': True,
 'crossval_index_dir': None,
 'crossval_index_file': None,
 'crossval_index_sets': None,
 'data_path': '/Users/georgelamb/Documents/GitHub/chempropBayes/data/QM9.csv',
 'dataset_type': 'regression',
 'depth': 5,
 'dropout': 0,
 'ensemble_size': 1,
 'epochs': 50,
 'epochs_swag': 25,
 'features_generator': None,
 'features_only': False,
 'features_path': None,
 'features_size': None,
 'ffn_hidden_size': 500,
 'ffn_num_layers': 3,
 'final_lr': 0.0001,
 'folds_file': None,
 'hidden_size': 500,
 'init_log_noise': -2,
 'init_lr': 0.0001,
 'log_frequency': 800,

41446it [00:00, 69921.23it/s]
100%|██████████| 50000/50000 [00:01<00:00, 28798.80it/s]
100%|██████████| 50000/50000 [00:03<00:00, 13861.14it/s]


Number of tasks = 12
Splitting data with seed 0
Total size = 50,000 | train size = 40,000 | val size = 5,000 | test size = 5,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): MPNEncoder(
      (dropout_layer): Dropout(p=0, inplace=False)
      (act_func): ReLU()
      (W_i): Linear(in_features=147, out_features=500, bias=False)
      (W_h): Linear(in_features=500, out_features=500, bias=False)
      (W_o): Linear(in_features=633, out_features=500, bias=True)
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0, inplace=False)
    (7): Linear(in_features=500, out_features=12, bias=True)
  )
)
Number of parameters = 1,147,512
Model 0 best validation mae = inf on epoch 0
----------SWAG training----------
SWAG spoch 0
Loss = 5.2671

### MultiSWAG

In [7]:
args.ensemble_size = 10
args.samples = 30
args.dropout = 0
args.test_dropout = False

args.swag = True # SWAG switch
args.cov_mat = True # whether to compute deviations and then covariance
args.block = False # whether to compute covariances layer by layer
args.max_num_models = 30 # max number of columns of deviations matrix

args.epochs_swag = 25 # number of epochs
args.c_swag = 200 # how frequently to collect a model (in batches)

args.lr_swag = 1e-3
args.wd_swag = 0.001
args.momentum_swag = 0.5

results_swagM = run_training(args)
#np.savez(args.save_dir+'/results_swagM', results_swagM)

0it [00:00, ?it/s]

Command line
python /Applications/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py -f /Users/georgelamb/Library/Jupyter/runtime/kernel-832232a1-7bee-48f8-801a-d6a9460319ca.json
Args
{'RMS': False,
 'activation': 'ReLU',
 'atom_messages': False,
 'batch_size_sgld': 50,
 'bias': False,
 'block': False,
 'burnin_epochs': 10,
 'c_swag': 200,
 'cache_cutoff': 10000,
 'class_balance': False,
 'config_path': None,
 'cov_mat': True,
 'crossval_index_dir': None,
 'crossval_index_file': None,
 'crossval_index_sets': None,
 'data_path': '/Users/georgelamb/Documents/GitHub/chempropBayes/data/QM9.csv',
 'dataset_type': 'regression',
 'depth': 5,
 'dropout': 0,
 'ensemble_size': 10,
 'epochs': 50,
 'epochs_swag': 25,
 'features_generator': None,
 'features_only': False,
 'features_path': None,
 'features_size': None,
 'ffn_hidden_size': 500,
 'ffn_num_layers': 3,
 'final_lr': 0.0001,
 'folds_file': None,
 'hidden_size': 500,
 'init_log_noise': -2,
 'init_lr': 0.0001,
 'log_frequency': 800

43280it [00:00, 68612.48it/s]
100%|██████████| 50000/50000 [00:00<00:00, 264493.03it/s]
100%|██████████| 50000/50000 [00:03<00:00, 13561.09it/s]


Number of tasks = 12
Splitting data with seed 0
Total size = 50,000 | train size = 40,000 | val size = 5,000 | test size = 5,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): MPNEncoder(
      (dropout_layer): Dropout(p=0, inplace=False)
      (act_func): ReLU()
      (W_i): Linear(in_features=147, out_features=500, bias=False)
      (W_h): Linear(in_features=500, out_features=500, bias=False)
      (W_o): Linear(in_features=633, out_features=500, bias=True)
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0, inplace=False)
    (7): Linear(in_features=500, out_features=12, bias=True)
  )
)
Number of parameters = 1,147,512
Epoch 0
Loss = 6.6179e-03, PNorm = 49.3502, GNorm = 3.9430, lr_0 = 5.5056e-04
Validation mae = 12.860934


Loss = 5.4815e-05, PNorm = 84.9849, GNorm = 0.0466, lr_0 = 1.0000e-03
Validation mae = 2.131425
SWAG spoch 19
Loss = 5.5210e-05, PNorm = 84.8503, GNorm = 0.0506, lr_0 = 1.0000e-03
Validation mae = 2.132689
SWAG spoch 20
Loss = 5.5627e-05, PNorm = 84.7155, GNorm = 0.0453, lr_0 = 1.0000e-03
Validation mae = 2.133446
SWAG spoch 21
Loss = 5.6054e-05, PNorm = 84.5810, GNorm = 0.0645, lr_0 = 1.0000e-03
Validation mae = 2.134710
SWAG spoch 22
Loss = 5.6513e-05, PNorm = 84.4471, GNorm = 0.1075, lr_0 = 1.0000e-03
Validation mae = 2.135571
SWAG spoch 23
Loss = 5.6928e-05, PNorm = 84.3132, GNorm = 0.0404, lr_0 = 1.0000e-03
Validation mae = 2.137227
SWAG spoch 24
Loss = 5.7423e-05, PNorm = 84.1795, GNorm = 0.0336, lr_0 = 1.0000e-03
Validation mae = 2.138277
Model 0, sample 0 test mae = 2.232634
Model 0, sample 1 test mae = 2.210806
Model 0, sample 2 test mae = 2.221074
Model 0, sample 3 test mae = 2.217181
Model 0, sample 4 test mae = 2.220303
Model 0, sample 5 test mae = 2.214229
Model 0, sample 

Loss = 4.9853e-05, PNorm = 87.4968, GNorm = 0.0342, lr_0 = 1.0000e-03
Validation mae = 2.086651
SWAG spoch 3
Loss = 4.9776e-05, PNorm = 87.3578, GNorm = 0.0386, lr_0 = 1.0000e-03
Validation mae = 2.084148
SWAG spoch 4
Loss = 4.9809e-05, PNorm = 87.2189, GNorm = 0.0363, lr_0 = 1.0000e-03
Validation mae = 2.081669
SWAG spoch 5
Loss = 4.9905e-05, PNorm = 87.0801, GNorm = 0.0370, lr_0 = 1.0000e-03
Validation mae = 2.078882
SWAG spoch 6
Loss = 5.0021e-05, PNorm = 86.9415, GNorm = 0.0307, lr_0 = 1.0000e-03
Validation mae = 2.077686
SWAG spoch 7
Loss = 5.0178e-05, PNorm = 86.8032, GNorm = 0.0373, lr_0 = 1.0000e-03
Validation mae = 2.076961
SWAG spoch 8
Loss = 5.0382e-05, PNorm = 86.6655, GNorm = 0.0333, lr_0 = 1.0000e-03
Validation mae = 2.076798
SWAG spoch 9
Loss = 5.0609e-05, PNorm = 86.5281, GNorm = 0.0303, lr_0 = 1.0000e-03
Validation mae = 2.076380
SWAG spoch 10
Loss = 5.0860e-05, PNorm = 86.3904, GNorm = 0.0377, lr_0 = 1.0000e-03
Validation mae = 2.076620
SWAG spoch 11
Loss = 5.1149e-05

Loss = 1.0237e-04, PNorm = 84.9852, GNorm = 0.0966, lr_0 = 1.8656e-04
Validation mae = 2.265635
Epoch 37
Loss = 9.6911e-05, PNorm = 85.2040, GNorm = 0.0682, lr_0 = 1.7782e-04
Validation mae = 2.385478
Epoch 38
Loss = 9.1579e-05, PNorm = 85.4082, GNorm = 0.0668, lr_0 = 1.6949e-04
Validation mae = 2.344072
Epoch 39
Loss = 8.8306e-05, PNorm = 85.6136, GNorm = 0.0544, lr_0 = 1.6155e-04
Validation mae = 2.269639
Epoch 40
Loss = 8.3100e-05, PNorm = 85.7801, GNorm = 0.0675, lr_0 = 1.5398e-04
Validation mae = 2.273337
Epoch 41
Loss = 7.8568e-05, PNorm = 85.9605, GNorm = 0.0957, lr_0 = 1.4677e-04
Validation mae = 2.283553
Epoch 42
Loss = 7.6853e-05, PNorm = 86.1140, GNorm = 0.0603, lr_0 = 1.3990e-04
Validation mae = 2.237637
Epoch 43
Loss = 7.3190e-05, PNorm = 86.2646, GNorm = 0.0750, lr_0 = 1.3334e-04
Validation mae = 2.325169
Epoch 44
Loss = 7.1064e-05, PNorm = 86.4036, GNorm = 0.0509, lr_0 = 1.2710e-04
Validation mae = 2.213826
Epoch 45
Loss = 6.7626e-05, PNorm = 86.5314, GNorm = 0.0498, lr_

Validation mae = 3.265083
Epoch 20
Loss = 3.1352e-04, PNorm = 78.6282, GNorm = 0.1416, lr_0 = 4.0192e-04
Validation mae = 2.679342
Epoch 21
Loss = 2.7890e-04, PNorm = 79.3054, GNorm = 0.1592, lr_0 = 3.8310e-04
Validation mae = 2.617798
Epoch 22
Loss = 2.6362e-04, PNorm = 79.9578, GNorm = 0.2558, lr_0 = 3.6515e-04
Validation mae = 2.562533
Epoch 23
Loss = 2.5085e-04, PNorm = 80.5989, GNorm = 0.1291, lr_0 = 3.4805e-04
Validation mae = 2.895226
Epoch 24
Loss = 2.5371e-04, PNorm = 81.3100, GNorm = 0.3356, lr_0 = 3.3175e-04
Validation mae = 3.066902
Epoch 25
Loss = 2.1825e-04, PNorm = 81.8059, GNorm = 0.1543, lr_0 = 3.1621e-04
Validation mae = 2.562028
Epoch 26
Loss = 1.9812e-04, PNorm = 82.2636, GNorm = 0.0983, lr_0 = 3.0140e-04
Validation mae = 2.412629
Epoch 27
Loss = 1.7559e-04, PNorm = 82.6920, GNorm = 0.0798, lr_0 = 2.8728e-04
Validation mae = 2.477852
Epoch 28
Loss = 1.6420e-04, PNorm = 83.1166, GNorm = 0.0854, lr_0 = 2.7383e-04
Validation mae = 2.430160
Epoch 29
Loss = 1.5912e-04, P

Validation mae = 8.298347
Epoch 3
Loss = 1.6387e-03, PNorm = 55.9716, GNorm = 0.8944, lr_0 = 9.0846e-04
Validation mae = 6.624432
Epoch 4
Loss = 1.3116e-03, PNorm = 57.5724, GNorm = 0.4368, lr_0 = 8.6591e-04
Validation mae = 5.477179
Epoch 5
Loss = 1.1780e-03, PNorm = 59.4190, GNorm = 0.3029, lr_0 = 8.2535e-04
Validation mae = 5.154437
Epoch 6
Loss = 1.0581e-03, PNorm = 61.2869, GNorm = 0.7567, lr_0 = 7.8670e-04
Validation mae = 5.454299
Epoch 7
Loss = 9.7366e-04, PNorm = 63.0687, GNorm = 0.4572, lr_0 = 7.4985e-04
Validation mae = 4.702783
Epoch 8
Loss = 8.6694e-04, PNorm = 64.7170, GNorm = 0.4127, lr_0 = 7.1473e-04
Validation mae = 5.222343
Epoch 9
Loss = 7.7720e-04, PNorm = 66.2566, GNorm = 0.2193, lr_0 = 6.8125e-04
Validation mae = 4.124288
Epoch 10
Loss = 7.4297e-04, PNorm = 67.8533, GNorm = 0.2003, lr_0 = 6.4934e-04
Validation mae = 4.002275
Epoch 11
Loss = 6.7003e-04, PNorm = 69.2253, GNorm = 0.2530, lr_0 = 6.1893e-04
Validation mae = 3.538144
Epoch 12
Loss = 6.1203e-04, PNorm = 

Model 4, sample 11 test mae = 2.225902
Model 4, sample 12 test mae = 2.239711
Model 4, sample 13 test mae = 2.232309
Model 4, sample 14 test mae = 2.222795
Model 4, sample 15 test mae = 2.228702
Model 4, sample 16 test mae = 2.254737
Model 4, sample 17 test mae = 2.230978
Model 4, sample 18 test mae = 2.272125
Model 4, sample 19 test mae = 2.226917
Model 4, sample 20 test mae = 2.223904
Model 4, sample 21 test mae = 2.233976
Model 4, sample 22 test mae = 2.231906
Model 4, sample 23 test mae = 2.235409
Model 4, sample 24 test mae = 2.233160
Model 4, sample 25 test mae = 2.221562
Model 4, sample 26 test mae = 2.225460
Model 4, sample 27 test mae = 2.254232
Model 4, sample 28 test mae = 2.243817
Model 4, sample 29 test mae = 2.226292
Building model 5
MoleculeModel(
  (encoder): MPN(
    (encoder): MPNEncoder(
      (dropout_layer): Dropout(p=0, inplace=False)
      (act_func): ReLU()
      (W_i): Linear(in_features=147, out_features=500, bias=False)
      (W_h): Linear(in_features=500, ou

Validation mae = 2.160096
SWAG spoch 13
Loss = 5.3760e-05, PNorm = 85.1946, GNorm = 0.0336, lr_0 = 1.0000e-03
Validation mae = 2.160510
SWAG spoch 14
Loss = 5.4075e-05, PNorm = 85.0594, GNorm = 0.0866, lr_0 = 1.0000e-03
Validation mae = 2.161310
SWAG spoch 15
Loss = 5.4431e-05, PNorm = 84.9244, GNorm = 0.0318, lr_0 = 1.0000e-03
Validation mae = 2.162271
SWAG spoch 16
Loss = 5.4808e-05, PNorm = 84.7895, GNorm = 0.0344, lr_0 = 1.0000e-03
Validation mae = 2.162906
SWAG spoch 17
Loss = 5.5195e-05, PNorm = 84.6549, GNorm = 0.0380, lr_0 = 1.0000e-03
Validation mae = 2.164039
SWAG spoch 18
Loss = 5.5594e-05, PNorm = 84.5207, GNorm = 0.0339, lr_0 = 1.0000e-03
Validation mae = 2.164914
SWAG spoch 19
Loss = 5.6021e-05, PNorm = 84.3866, GNorm = 0.0497, lr_0 = 1.0000e-03
Validation mae = 2.166355
SWAG spoch 20
Loss = 5.6432e-05, PNorm = 84.2532, GNorm = 0.0443, lr_0 = 1.0000e-03
Validation mae = 2.167605
SWAG spoch 21
Loss = 5.6885e-05, PNorm = 84.1195, GNorm = 0.0861, lr_0 = 1.0000e-03
Validation

Validation mae = 2.201949
Epoch 48
Loss = 6.1872e-05, PNorm = 87.0003, GNorm = 0.0716, lr_0 = 1.0491e-04
Validation mae = 2.210195
Epoch 49
Loss = 5.9750e-05, PNorm = 87.0970, GNorm = 0.0466, lr_0 = 1.0000e-04
Validation mae = 2.259626
Model 6 best validation mae = 2.201949 on epoch 47
----------SWAG training----------
SWAG spoch 0
Loss = 5.5395e-05, PNorm = 86.7541, GNorm = 0.0446, lr_0 = 1.0000e-03
Validation mae = 2.186991
SWAG spoch 1
Loss = 5.4465e-05, PNorm = 86.6162, GNorm = 0.0351, lr_0 = 1.0000e-03
Validation mae = 2.182276
SWAG spoch 2
Loss = 5.4196e-05, PNorm = 86.4785, GNorm = 0.0644, lr_0 = 1.0000e-03
Validation mae = 2.178206
SWAG spoch 3
Loss = 5.4078e-05, PNorm = 86.3412, GNorm = 0.0431, lr_0 = 1.0000e-03
Validation mae = 2.175538
SWAG spoch 4
Loss = 5.4084e-05, PNorm = 86.2038, GNorm = 0.0282, lr_0 = 1.0000e-03
Validation mae = 2.173502
SWAG spoch 5
Loss = 5.4137e-05, PNorm = 86.0666, GNorm = 0.0592, lr_0 = 1.0000e-03
Validation mae = 2.170803
SWAG spoch 6
Loss = 5.425

Loss = 1.4436e-04, PNorm = 83.7784, GNorm = 0.1164, lr_0 = 2.3712e-04
Validation mae = 2.365630
Epoch 32
Loss = 1.4007e-04, PNorm = 84.1282, GNorm = 0.0814, lr_0 = 2.2602e-04
Validation mae = 2.379583
Epoch 33
Loss = 1.2511e-04, PNorm = 84.3855, GNorm = 0.0671, lr_0 = 2.1543e-04
Validation mae = 2.438041
Epoch 34
Loss = 1.1294e-04, PNorm = 84.6184, GNorm = 0.0771, lr_0 = 2.0534e-04
Validation mae = 2.263824
Epoch 35
Loss = 1.0660e-04, PNorm = 84.8467, GNorm = 0.0962, lr_0 = 1.9572e-04
Validation mae = 2.303209
Epoch 36
Loss = 1.0357e-04, PNorm = 85.0828, GNorm = 0.0913, lr_0 = 1.8656e-04
Validation mae = 2.287324
Epoch 37
Loss = 9.9401e-05, PNorm = 85.2989, GNorm = 0.0711, lr_0 = 1.7782e-04
Validation mae = 2.323317
Epoch 38
Loss = 9.3249e-05, PNorm = 85.4841, GNorm = 0.1064, lr_0 = 1.6949e-04
Validation mae = 2.287220
Epoch 39
Loss = 8.6566e-05, PNorm = 85.6530, GNorm = 0.0567, lr_0 = 1.6155e-04
Validation mae = 2.292515
Epoch 40
Loss = 8.8755e-05, PNorm = 85.8408, GNorm = 0.0518, lr_

Validation mae = 4.278009
Epoch 15
Loss = 4.6900e-04, PNorm = 74.0793, GNorm = 0.1828, lr_0 = 5.1087e-04
Validation mae = 3.521656
Epoch 16
Loss = 4.4647e-04, PNorm = 75.1851, GNorm = 0.1861, lr_0 = 4.8694e-04
Validation mae = 3.031135
Epoch 17
Loss = 4.0134e-04, PNorm = 76.1353, GNorm = 0.1530, lr_0 = 4.6413e-04
Validation mae = 3.179458
Epoch 18
Loss = 3.7819e-04, PNorm = 77.0057, GNorm = 0.1488, lr_0 = 4.4239e-04
Validation mae = 2.976755
Epoch 19
Loss = 3.3598e-04, PNorm = 77.7894, GNorm = 0.1773, lr_0 = 4.2167e-04
Validation mae = 3.056293
Epoch 20
Loss = 3.3423e-04, PNorm = 78.6628, GNorm = 0.1405, lr_0 = 4.0192e-04
Validation mae = 2.998115
Epoch 21
Loss = 2.9802e-04, PNorm = 79.3672, GNorm = 0.0897, lr_0 = 3.8310e-04
Validation mae = 2.685602
Epoch 22
Loss = 2.6328e-04, PNorm = 79.9817, GNorm = 0.1191, lr_0 = 3.6515e-04
Validation mae = 2.642038
Epoch 23
Loss = 2.4530e-04, PNorm = 80.5829, GNorm = 0.1545, lr_0 = 3.4805e-04
Validation mae = 2.706590
Epoch 24
Loss = 2.4460e-04, P

Loss = 6.7903e-03, PNorm = 49.4287, GNorm = 2.5941, lr_0 = 5.5056e-04
Validation mae = 11.660155
Epoch 1
Loss = 3.5380e-03, PNorm = 51.8989, GNorm = 0.6957, lr_0 = 9.9994e-04
Validation mae = 10.461161
Epoch 2
Loss = 2.2145e-03, PNorm = 54.4673, GNorm = 0.8444, lr_0 = 9.5310e-04
Validation mae = 7.192851
Epoch 3
Loss = 1.5791e-03, PNorm = 56.3921, GNorm = 0.7354, lr_0 = 9.0846e-04
Validation mae = 5.862951
Epoch 4
Loss = 1.3517e-03, PNorm = 58.2182, GNorm = 0.6728, lr_0 = 8.6591e-04
Validation mae = 5.079397
Epoch 5
Loss = 1.1813e-03, PNorm = 59.9781, GNorm = 0.4320, lr_0 = 8.2535e-04
Validation mae = 5.042920
Epoch 6
Loss = 1.0241e-03, PNorm = 61.5920, GNorm = 0.7003, lr_0 = 7.8670e-04
Validation mae = 5.369423
Epoch 7
Loss = 9.6873e-04, PNorm = 63.3511, GNorm = 0.4516, lr_0 = 7.4985e-04
Validation mae = 4.403677
Epoch 8
Loss = 8.7348e-04, PNorm = 65.0385, GNorm = 0.3686, lr_0 = 7.1473e-04
Validation mae = 4.298822
Epoch 9
Loss = 7.9075e-04, PNorm = 66.5866, GNorm = 0.6391, lr_0 = 6.8

Model 9, sample 4 test mae = 2.208659
Model 9, sample 5 test mae = 2.213815
Model 9, sample 6 test mae = 2.209237
Model 9, sample 7 test mae = 2.224004
Model 9, sample 8 test mae = 2.237787
Model 9, sample 9 test mae = 2.206944
Model 9, sample 10 test mae = 2.229206
Model 9, sample 11 test mae = 2.226884
Model 9, sample 12 test mae = 2.208873
Model 9, sample 13 test mae = 2.212614
Model 9, sample 14 test mae = 2.213330
Model 9, sample 15 test mae = 2.213504
Model 9, sample 16 test mae = 2.217474
Model 9, sample 17 test mae = 2.240712
Model 9, sample 18 test mae = 2.223078
Model 9, sample 19 test mae = 2.212541
Model 9, sample 20 test mae = 2.219615
Model 9, sample 21 test mae = 2.220876
Model 9, sample 22 test mae = 2.226904
Model 9, sample 23 test mae = 2.206670
Model 9, sample 24 test mae = 2.254723
Model 9, sample 25 test mae = 2.218153
Model 9, sample 26 test mae = 2.204825
Model 9, sample 27 test mae = 2.214759
Model 9, sample 28 test mae = 2.247722
Model 9, sample 29 test mae = 2

### SGLD

In [33]:
args.ensemble_size = 1
args.samples = 20


# sgld

args.sgld = True
args.init_log_noise = -2

args.lr_sgld = 1e-4
args.weight_decay_sgld = 0.1

args.batch_size_sgld = 200
args.log_frequency_sgld = 200

args.burnin_epochs = 5
args.mix_epochs = 1


results_sgld = run_training(args)
#np.savez(args.save_dir+'/results_sgld', results_sgld)


0it [00:00, ?it/s]

Command line
python /Applications/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py -f /Users/georgelamb/Library/Jupyter/runtime/kernel-832232a1-7bee-48f8-801a-d6a9460319ca.json
Args
{'RMS': False,
 'activation': 'ReLU',
 'atom_messages': False,
 'batch_size_sgld': 200,
 'bias': False,
 'block': True,
 'burnin_epochs': 5,
 'c_swag': 0,
 'cache_cutoff': 10000,
 'class_balance': False,
 'config_path': None,
 'cov_mat': False,
 'crossval_index_dir': None,
 'crossval_index_file': None,
 'crossval_index_sets': None,
 'data_path': '/Users/georgelamb/Documents/GitHub/chempropBayes/data/QM9.csv',
 'dataset_type': 'regression',
 'depth': 5,
 'dropout': 0.0,
 'ensemble_size': 1,
 'epochs': 50,
 'epochs_swag': 0,
 'features_generator': None,
 'features_only': False,
 'features_path': None,
 'features_size': None,
 'ffn_hidden_size': 500,
 'ffn_num_layers': 3,
 'final_lr': 0.0001,
 'folds_file': None,
 'hidden_size': 500,
 'init_log_noise': -2,
 'init_lr': 0.0001,
 'log_frequency': 800,


42730it [00:00, 74453.10it/s]
100%|██████████| 50000/50000 [00:01<00:00, 26714.08it/s]
100%|██████████| 50000/50000 [00:03<00:00, 14011.70it/s]


Number of tasks = 12
Splitting data with seed 0
Total size = 50,000 | train size = 40,000 | val size = 5,000 | test size = 5,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): MPNEncoder(
      (dropout_layer): Dropout(p=0.0, inplace=False)
      (act_func): ReLU()
      (W_i): Linear(in_features=147, out_features=500, bias=False)
      (W_h): Linear(in_features=500, out_features=500, bias=False)
      (W_o): Linear(in_features=633, out_features=500, bias=True)
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=500, out_features=12, bias=True)
  )
)
Number of parameters = 1,147,524
Model 0 best validation mae = inf on epoch 0
----------SGLD training----------
SGLD spoch 0
Loss 

### GP

In [7]:
args.ensemble_size = 1
args.samples = 1
args.epochs = 50


# GP args
args.gp = True
args.num_inducing_points = 800
    
args.batch_size_gp = 100
args.log_frequency_gp = 400
    
args.epochs_gp = 50
args.warmup_epochs_gp = 2
args.unfreeze_epoch_gp = 20
    
args.init_lr_gp = 1e-4
args.max_lr_gp = 1e-3
args.final_lr_gp = 1e-4


results_gp = run_training(args)
#np.savez(args.save_dir+'/results_gp', results_gp)

10108it [00:00, 101075.04it/s]

Command line
python /Applications/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py -f /Users/georgelamb/Library/Jupyter/runtime/kernel-6a41b4ab-3f98-4bbb-82d2-2b0cc542ed11.json
Args
{'activation': 'ReLU',
 'atom_messages': False,
 'batch_size_gp': 100,
 'batch_size_sgld': 50,
 'bias': False,
 'block': True,
 'burnin_epochs': 10,
 'c_swag': 0,
 'cache_cutoff': 10000,
 'class_balance': False,
 'config_path': None,
 'cov_mat': False,
 'crossval_index_dir': None,
 'crossval_index_file': None,
 'crossval_index_sets': None,
 'data_path': '/Users/georgelamb/Documents/GitHub/chempropBayes/data/QM9.csv',
 'dataset_type': 'regression',
 'depth': 5,
 'dropout': 0.0,
 'dropout_FFNonly': False,
 'ensemble_size': 1,
 'epochs': 50,
 'epochs_gp': 50,
 'epochs_swag': 0,
 'features_generator': None,
 'features_only': False,
 'features_path': None,
 'features_size': None,
 'ffn_hidden_size': 500,
 'ffn_num_layers': 3,
 'final_lr': 0.0001,
 'final_lr_gp': 0.0001,
 'folds_file': None,
 'gp': Tru

46579it [00:00, 78221.22it/s] 
100%|██████████| 50000/50000 [00:00<00:00, 262106.65it/s]
 56%|█████▌    | 28082/50000 [00:01<00:01, 12414.44it/s]

KeyboardInterrupt: 

### BBP

In [9]:
args.ensemble_size = 1
args.samples = 1
args.epochs = 50


### BBP arguments ###

args.bbp = True
args.batch_size = 200
args.log_frequency = 10


### PHASE 1 ###

args.init_log_noise_bbp = -2.9

args.epochs_phase1_bbp = 10
args.lr_phase1_bbp = 5e-4


### PHASE 2 ###

args.prior_sig_bbp = 0.1

args.epochs_phase2_bbp = 20
args.lr_phase2_bbp = 1e-5


### PHASE 3 ###

args.rho_min_bbp = -5
args.rho_max_bbp = -4

args.samples_bbp = 5

args.lr_phase3_bbp = 1e-6
args.epochs_phase3_bbp = 10



results_bbp = run_training(args)

0it [00:00, ?it/s]

Command line
python /Applications/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py -f /Users/georgelamb/Library/Jupyter/runtime/kernel-6a41b4ab-3f98-4bbb-82d2-2b0cc542ed11.json
Args
{'activation': 'ReLU',
 'atom_messages': False,
 'batch_size': 200,
 'batch_size_gp': 100,
 'batch_size_sgld': 50,
 'bbp': True,
 'bias': False,
 'block': True,
 'burnin_epochs': 10,
 'c_swag': 0,
 'cache_cutoff': 10000,
 'class_balance': False,
 'config_path': None,
 'cov_mat': False,
 'crossval_index_dir': None,
 'crossval_index_file': None,
 'crossval_index_sets': None,
 'data_path': '/Users/georgelamb/Documents/GitHub/chempropBayes/data/QM9.csv',
 'dataset_type': 'regression',
 'depth': 5,
 'dropout': 0.0,
 'dropout_FFNonly': False,
 'ensemble_size': 1,
 'epochs': 50,
 'epochs_gp': 100,
 'epochs_phase1_bbp': 10,
 'epochs_phase2_bbp': 20,
 'epochs_phase3_bbp': 10,
 'epochs_swag': 0,
 'features_generator': None,
 'features_only': False,
 'features_path': None,
 'features_size': None,
 'ffn_hidd

41995it [00:02, 17967.74it/s]
100%|██████████| 50000/50000 [00:00<00:00, 132661.28it/s]
100%|██████████| 50000/50000 [00:04<00:00, 11373.88it/s]


Number of tasks = 12
Splitting data with seed 0
Total size = 50,000 | train size = 40,000 | val size = 5,000 | test size = 5,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN(
    (encoder): MPNEncoder(
      (dropout_layer): Dropout(p=0.0, inplace=False)
      (act_func): ReLU()
      (W_i): Linear(in_features=147, out_features=500, bias=False)
      (W_h): Linear(in_features=500, out_features=500, bias=False)
      (W_o): Linear(in_features=633, out_features=500, bias=True)
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=500, out_features=12, bias=True)
  )
)
Number of parameters = 1,147,512
Model 0 best validation mae = inf on epoch 0
----------BBP training PHASE 1: learning log noise--

Loss = 6.1114e-01, PNorm = 4831.1731, GNorm = 1400.7493, lr_0 = 1.0000e-06
kl term
tensor(58.6356, grad_fn=<DivBackward0>)
data
tensor(94.7497, grad_fn=<DivBackward0>)
Loss = 6.6431e-01, PNorm = 4831.1684, GNorm = 1216.0151, lr_0 = 1.0000e-06
kl term
tensor(58.6352, grad_fn=<DivBackward0>)
data
tensor(67.0776, grad_fn=<DivBackward0>)
Loss = 6.1587e-01, PNorm = 4831.1635, GNorm = 557.8955, lr_0 = 1.0000e-06
kl term
tensor(58.6348, grad_fn=<DivBackward0>)
data
tensor(31.5280, grad_fn=<DivBackward0>)
Loss = 6.4431e-01, PNorm = 4831.1589, GNorm = 1399.5858, lr_0 = 1.0000e-06
kl term
tensor(58.6344, grad_fn=<DivBackward0>)
data
tensor(82.8841, grad_fn=<DivBackward0>)
Loss = 9.8808e-01, PNorm = 4831.1537, GNorm = 1702.9593, lr_0 = 1.0000e-06
kl term
tensor(58.6340, grad_fn=<DivBackward0>)
data
tensor(63.9207, grad_fn=<DivBackward0>)
Loss = 5.8394e-01, PNorm = 4831.1485, GNorm = 785.8672, lr_0 = 1.0000e-06
kl term
tensor(58.6335, grad_fn=<DivBackward0>)
data
tensor(32.7249, grad_fn=<DivBackwa

Loss = 4.6965e-01, PNorm = 4830.9180, GNorm = 474.8772, lr_0 = 1.0000e-06
kl term
tensor(58.6149, grad_fn=<DivBackward0>)
data
tensor(19.7585, grad_fn=<DivBackward0>)
Loss = 4.1240e-01, PNorm = 4830.9131, GNorm = 513.5326, lr_0 = 1.0000e-06
kl term
tensor(58.6144, grad_fn=<DivBackward0>)
data
tensor(18.4350, grad_fn=<DivBackward0>)
Loss = 4.3185e-01, PNorm = 4830.9074, GNorm = 774.7456, lr_0 = 1.0000e-06
kl term
tensor(58.6140, grad_fn=<DivBackward0>)
data
tensor(40.0981, grad_fn=<DivBackward0>)
Loss = 6.7257e-01, PNorm = 4830.9022, GNorm = 432.6186, lr_0 = 1.0000e-06
kl term
tensor(58.6136, grad_fn=<DivBackward0>)
data
tensor(13.6440, grad_fn=<DivBackward0>)
Loss = 4.1393e-01, PNorm = 4830.8962, GNorm = 444.0232, lr_0 = 1.0000e-06
kl term
tensor(58.6132, grad_fn=<DivBackward0>)
data
tensor(17.5653, grad_fn=<DivBackward0>)
Loss = 4.8508e-01, PNorm = 4830.8909, GNorm = 934.5856, lr_0 = 1.0000e-06
kl term
tensor(58.6127, grad_fn=<DivBackward0>)
data
tensor(50.3018, grad_fn=<DivBackward0>

Loss = 3.6848e-01, PNorm = 4830.6573, GNorm = 334.2470, lr_0 = 1.0000e-06
kl term
tensor(58.5942, grad_fn=<DivBackward0>)
data
tensor(9.7208, grad_fn=<DivBackward0>)
Validation mae = 5.061095
BBP epoch 7
Loss = 5.8894e-01, PNorm = 4830.6529, GNorm = 283.5411, lr_0 = 1.0000e-06
kl term
tensor(58.5938, grad_fn=<DivBackward0>)
data
tensor(2.9355, grad_fn=<DivBackward0>)
Loss = 4.2190e-01, PNorm = 4830.6472, GNorm = 1602.2752, lr_0 = 1.0000e-06
kl term
tensor(58.5934, grad_fn=<DivBackward0>)
data
tensor(43.2326, grad_fn=<DivBackward0>)
Loss = 4.5149e-01, PNorm = 4830.6423, GNorm = 516.4380, lr_0 = 1.0000e-06
kl term
tensor(58.5930, grad_fn=<DivBackward0>)
data
tensor(10.8104, grad_fn=<DivBackward0>)
Loss = 3.4991e-01, PNorm = 4830.6365, GNorm = 714.1657, lr_0 = 1.0000e-06
kl term
tensor(58.5926, grad_fn=<DivBackward0>)
data
tensor(19.7591, grad_fn=<DivBackward0>)
Loss = 3.5128e-01, PNorm = 4830.6308, GNorm = 497.1803, lr_0 = 1.0000e-06
kl term
tensor(58.5922, grad_fn=<DivBackward0>)
data
t

Loss = 3.5850e-01, PNorm = 4830.3940, GNorm = 294.9494, lr_0 = 1.0000e-06
kl term
tensor(58.5738, grad_fn=<DivBackward0>)
data
tensor(4.0714, grad_fn=<DivBackward0>)
Loss = 4.4897e-01, PNorm = 4830.3890, GNorm = 294.3955, lr_0 = 1.0000e-06
kl term
tensor(58.5733, grad_fn=<DivBackward0>)
data
tensor(3.1817, grad_fn=<DivBackward0>)
Loss = 3.5669e-01, PNorm = 4830.3836, GNorm = 406.8137, lr_0 = 1.0000e-06
kl term
tensor(58.5729, grad_fn=<DivBackward0>)
data
tensor(41.5458, grad_fn=<DivBackward0>)
Loss = 3.5024e-01, PNorm = 4830.3780, GNorm = 417.2203, lr_0 = 1.0000e-06
kl term
tensor(58.5725, grad_fn=<DivBackward0>)
data
tensor(16.5873, grad_fn=<DivBackward0>)
Loss = 4.5755e-01, PNorm = 4830.3728, GNorm = 267.0359, lr_0 = 1.0000e-06
kl term
tensor(58.5721, grad_fn=<DivBackward0>)
data
tensor(2.4451, grad_fn=<DivBackward0>)
Loss = 3.5439e-01, PNorm = 4830.3674, GNorm = 407.0830, lr_0 = 1.0000e-06
kl term
tensor(58.5717, grad_fn=<DivBackward0>)
data
tensor(53.5416, grad_fn=<DivBackward0>)
L

AttributeError: 'NoneType' object has no attribute 'children'

In [None]:
%debug

> [0;32m/Users/georgelamb/Documents/GitHub/chempropBayes/chemprop/bayes/bbp.py[0m(75)[0;36mforward[0;34m()[0m
[0;32m     73 [0;31m[0;34m[0m[0m
[0m[0;32m     74 [0;31m            [0mstd_w[0m [0;34m=[0m [0;36m1e-6[0m [0;34m+[0m [0mF[0m[0;34m.[0m[0msoftplus[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mW_p[0m[0;34m,[0m [0mbeta[0m[0;34m=[0m[0;36m1[0m[0;34m,[0m [0mthreshold[0m[0;34m=[0m[0;36m20[0m[0;34m)[0m [0;31m# compute stds for weights[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 75 [0;31m            [0;32massert[0m [0mnp[0m[0;34m.[0m[0mall[0m[0;34m([0m[0mnp[0m[0;34m.[0m[0misfinite[0m[0;34m([0m[0mstd_w[0m[0;34m.[0m[0mdetach[0m[0;34m([0m[0;34m)[0m[0;34m.[0m[0mnumpy[0m[0;34m([0m[0;34m)[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     76 [0;31m            [0;32massert[0m [0mnp[0m[0;34m.[0m[0mall[0m[0;34m([0m[0mnp[0m[0;34m.[0m[0misfinite[0m[0;34m([0m[0mX[0m[0;34m.[0m[

### Collated results

In [21]:
# load npz files
results_MAP = np.load(args.save_dir+'/results_MAP.npz')['arr_0']
results_ens = np.load(args.save_dir+'/results_ens.npz')['arr_0']
results_MCdrop = np.load(args.save_dir+'/results_MCdrop.npz')['arr_0']
results_swagD = np.load(args.save_dir+'/results_swagD.npz')['arr_0']
results_swag = np.load(args.save_dir+'/results_swag.npz')['arr_0']
results_swagM = np.load(args.save_dir+'/results_swagM.npz')['arr_0']
results_sgld = np.load(args.save_dir+'/results_sgld.npz')['arr_0']
results_gp = np.load(args.save_dir+'/results_gp.npz')['arr_0']

In [22]:
# row and column names
row_names = get_task_names(args.data_path)+['AVG']
col_names = ['MAP',
             'MAP ens',
             'MC-drop',
             'SWAG-D',
             'SWAG',
             'MultiSWAG',
             'SGLD',
             'GP']

In [23]:
# build df for absolute MAE
results = np.array([
    results_MAP,
    results_ens,
    results_MCdrop,
    results_swagD,
    results_swag,
    results_swagM,
    results_sgld,
    results_gp
    ]).T
averages = np.mean(results,0)
df = pd.DataFrame(np.vstack([results,averages]), columns=col_names, index=row_names)
df.round(6)

Unnamed: 0,MAP,MAP ens,MC-drop,SWAG-D,SWAG,MultiSWAG,SGLD,GP
mu,0.387702,0.336056,0.350818,0.383174,0.38332,0.335484,0.384913,0.390236
alpha,0.477786,0.34766,0.514025,0.476499,0.476623,0.362156,0.475074,0.464999
homo,0.003441,0.00278,0.003225,0.003389,0.003388,0.002781,0.003403,0.003471
lumo,0.003418,0.002671,0.003418,0.003375,0.003374,0.00269,0.003373,0.003387
gap,0.004547,0.003631,0.004301,0.004482,0.004478,0.00362,0.004493,0.004519
r2,20.577695,15.85658,20.239128,20.251981,20.245412,16.006372,20.287025,20.406845
zpve,0.001121,0.000599,0.001183,0.001088,0.001085,0.000652,0.001083,0.000993
cv,0.222967,0.156884,0.228869,0.220962,0.220682,0.163157,0.220081,0.226035
u0,1.307139,0.772746,1.404273,1.281087,1.28308,0.852363,1.278532,1.193313
u298,1.304406,0.772038,1.404264,1.278419,1.277628,0.85262,1.276673,1.194174
