In [1]:
import csv
import os
import sys
import numpy as np
import torch
import pickle
import copy

from logging import Logger
from typing import List
from tqdm import trange

from torch.optim.lr_scheduler import ExponentialLR
from torch_geometric.datasets import QM9

In [2]:
# cd to chempropBayes
%cd /Users/georgelamb/Documents/GitHub/chempropBayes

/Users/georgelamb/Documents/GitHub/chempropBayes


In [3]:
# import from chempropBayes
from chemprop.train.evaluate import evaluate, evaluate_predictions
from chemprop.train.predict import predict
from chemprop.train.train import train
from chemprop.args import TrainArgs
from chemprop.data import StandardScaler, MoleculeDataLoader
from chemprop.data.utils import get_class_sizes, get_data, get_task_names, split_data
from chemprop.models import MoleculeModel, MoleculeModelBBP
from chemprop.nn_utils import param_count
from chemprop.utils import build_optimizer, build_lr_scheduler, get_loss_func, get_metric_func, load_checkpoint,\
    makedirs, save_checkpoint, save_smiles_splits
from chemprop.bayes import BayesLinear

In [4]:
# instantiate args class and load from dict
args = TrainArgs()
args.from_dict({
    'dataset_type': 'regression',
    'data_path': '/Users/georgelamb/Documents/GitHub/chempropBayes/data/QM9.csv'
})

# location for model checkpoints to be saved
args.save_dir = '/Users/georgelamb/Documents/GitHub/chempropBayes/log'

### args (non-model)

# seed for splitting and loading data
args.seed = 0

# data
args.max_data_size = 50000
args.features_path = None
args.features_generator = None

# splitting data
args.split_type = 'random'
args.split_sizes = (0.8, 0.1, 0.1)

# evaluation metric
args.metric = 'mae'

# epochs and logging
args.epochs = 50
args.log_frequency = 800

### args (model)

# seed for random initial weights
args.pytorch_seed = 0

# message passing
args.atom_messages = False
args.undirected = False
args.bias = True
args.hidden_size = 500
args.depth = 5

# FFN
args.ffn_hidden_size = args.hidden_size
args.ffn_num_layers = 3

# shared
args.activation = 'ReLU'


In [5]:
args.num_tasks = 12
model_orig = MoleculeModel(args)
model_bbp = MoleculeModelBBP(args)

In [6]:
model_orig

MoleculeModel(
  (encoder): MPN(
    (encoder): MPNEncoder(
      (dropout_layer): Dropout(p=0.0, inplace=False)
      (act_func): ReLU()
      (W_i): Linear(in_features=147, out_features=500, bias=True)
      (W_h): Linear(in_features=500, out_features=500, bias=True)
      (W_o): Linear(in_features=633, out_features=500, bias=True)
    )
  )
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=500, out_features=500, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=500, out_features=500, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=500, out_features=12, bias=True)
  )
)

In [7]:
model_bbp

MoleculeModelBBP(
  (encoder): MPN(
    (encoder): MPNEncoder(
      (dropout_layer): Dropout(p=0.0, inplace=False)
      (act_func): ReLU()
      (W_i): BayesLinear()
      (W_h): BayesLinear()
      (W_o): BayesLinear()
    )
  )
  (dropout_layer): Dropout(p=0.0, inplace=False)
  (act_func): ReLU()
  (layer_in): BayesLinear()
  (layer_hid_1): BayesLinear()
  (layer_out): BayesLinear()
)

In [8]:
for name, param in model_bbp.named_parameters():
    print(name, param.size(), param.size())

encoder.encoder.cached_zero_vector torch.Size([500]) torch.Size([500])
encoder.encoder.W_i.W_mu torch.Size([147, 500]) torch.Size([147, 500])
encoder.encoder.W_i.b_mu torch.Size([500]) torch.Size([500])
encoder.encoder.W_h.W_mu torch.Size([500, 500]) torch.Size([500, 500])
encoder.encoder.W_h.b_mu torch.Size([500]) torch.Size([500])
encoder.encoder.W_o.W_mu torch.Size([633, 500]) torch.Size([633, 500])
encoder.encoder.W_o.b_mu torch.Size([500]) torch.Size([500])
layer_in.W_mu torch.Size([500, 500]) torch.Size([500, 500])
layer_in.b_mu torch.Size([500]) torch.Size([500])
layer_hid_1.W_mu torch.Size([500, 500]) torch.Size([500, 500])
layer_hid_1.b_mu torch.Size([500]) torch.Size([500])
layer_out.W_mu torch.Size([500, 12]) torch.Size([500, 12])
layer_out.b_mu torch.Size([12]) torch.Size([12])


In [9]:
for name, param in model_orig.named_parameters():
    print(name, param.size(), param.size())

encoder.encoder.cached_zero_vector torch.Size([500]) torch.Size([500])
encoder.encoder.W_i.weight torch.Size([500, 147]) torch.Size([500, 147])
encoder.encoder.W_i.bias torch.Size([500]) torch.Size([500])
encoder.encoder.W_h.weight torch.Size([500, 500]) torch.Size([500, 500])
encoder.encoder.W_h.bias torch.Size([500]) torch.Size([500])
encoder.encoder.W_o.weight torch.Size([500, 633]) torch.Size([500, 633])
encoder.encoder.W_o.bias torch.Size([500]) torch.Size([500])
ffn.1.weight torch.Size([500, 500]) torch.Size([500, 500])
ffn.1.bias torch.Size([500]) torch.Size([500])
ffn.4.weight torch.Size([500, 500]) torch.Size([500, 500])
ffn.4.bias torch.Size([500]) torch.Size([500])
ffn.7.weight torch.Size([12, 500]) torch.Size([12, 500])
ffn.7.bias torch.Size([12]) torch.Size([12])


In [10]:
for (_, param_bbp), (_, param_orig) in zip(model_bbp.named_parameters(), model_orig.named_parameters()):
    param_bbp.data = copy.deepcopy(param_orig.data.T)
    

In [11]:
for layer in model_bbp.children():
    #if isinstance(layer, BayesLinear):
        print(layer)

MPN(
  (encoder): MPNEncoder(
    (dropout_layer): Dropout(p=0.0, inplace=False)
    (act_func): ReLU()
    (W_i): BayesLinear()
    (W_h): BayesLinear()
    (W_o): BayesLinear()
  )
)
Dropout(p=0.0, inplace=False)
ReLU()
BayesLinear()
BayesLinear()
BayesLinear()


In [12]:
for layer in model_bbp.children():
    if isinstance(layer, BayesLinear):
        layer.init_rho(1, 2)
        layer.init_prior_sig(1)

In [13]:
for layer in model_bbp.encoder.encoder.children():
    if isinstance(layer, BayesLinear):
        layer.init_prior_sig(1)

In [14]:
model_bbp.create_log_noise(args)
for name, parameter in model_bbp.named_parameters():
    if name == 'log_noise':
        parameter.requires_grad = True
    else:
        parameter.requires_grad = False

In [15]:
for name, parameter in model_bbp.named_parameters():
    print(parameter)

Parameter containing:
tensor([-2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       requires_grad=True)
Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 

In [16]:
for layer in model_bbp.children():
    if isinstance(layer, BayesLinear):
        layer.init_rho(args.rho_min_bbp, args.rho_max_bbp)
        layer.init_prior_sig(args.prior_sig_bbp)
for layer in model_bbp.encoder.encoder.children():
    if isinstance(layer, BayesLinear):
        layer.init_rho(args.rho_min_bbp, args.rho_max_bbp)
        layer.init_prior_sig(args.prior_sig_bbp) 

In [18]:
for name, parameter in model_bbp.named_parameters():
    print(parameter)

Parameter containing:
tensor([-2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       requires_grad=True)
Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 

In [46]:
temp = MoleculeModelBBP(args)

In [47]:
temp.create_log_noise(args)

In [48]:
temp

MoleculeModelBBP(
  (encoder): MPN(
    (encoder): MPNEncoder(
      (dropout_layer): Dropout(p=0.0, inplace=False)
      (act_func): ReLU()
      (W_i): BayesLinear()
      (W_h): BayesLinear()
      (W_o): BayesLinear()
    )
  )
  (dropout_layer): Dropout(p=0.0, inplace=False)
  (act_func): ReLU()
  (layer_in): BayesLinear()
  (layer_hid_1): BayesLinear()
  (layer_out): BayesLinear()
)

In [71]:
for name, parameter in temp.named_parameters():
    print(name, parameter)

log_noise Parameter containing:
tensor([-2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2., -2.])
encoder.encoder.cached_zero_vector Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0.

In [50]:
# instantiate rho for each weight
for layer in temp.children():
    if isinstance(layer, BayesLinear):
        layer.init_rho(args.rho_min_bbp, args.rho_max_bbp)
for layer in temp.encoder.encoder.children():
    if isinstance(layer, BayesLinear):
        layer.init_rho(args.rho_min_bbp, args.rho_max_bbp)

In [37]:
temp

MoleculeModelBBP(
  (encoder): MPN(
    (encoder): MPNEncoder(
      (dropout_layer): Dropout(p=0.0, inplace=False)
      (act_func): ReLU()
      (W_i): BayesLinear()
      (W_h): BayesLinear()
      (W_o): BayesLinear()
    )
  )
  (dropout_layer): Dropout(p=0.0, inplace=False)
  (act_func): ReLU()
  (layer_in): BayesLinear()
  (layer_hid_1): BayesLinear()
  (layer_out): BayesLinear()
)

In [70]:
for name, parameter in temp.named_parameters():
    if 'W_p' in name or 'b_p' in name:
        parameter.requires_grad = True
    else:
        parameter.requires_grad = False