In [11]:
import argparse
import torch
import time 
import numpy as np 
from rdkit import RDLogger
lg = RDLogger.logger()
lg.setLevel(RDLogger.CRITICAL)

from torch.nn import MSELoss
from bondnet.data.dataset import ReactionNetworkDataset
from bondnet.data.dataloader import DataLoaderReactionNetwork
from bondnet.data.featurizer import AtomFeaturizerMinimum, AtomFeaturizerFull, BondAsNodeFeaturizerMinimum, GlobalFeaturizer, BondAsNodeFeaturizerFull
from bondnet.data.grapher import HeteroMoleculeGraph
from bondnet.data.dataset import train_validation_test_split
from bondnet.model.gated_reaction_network import GatedGCNReactionNetwork
from bondnet.scripts.create_label_file import read_input_files
from bondnet.model.metric import WeightedL1Loss
from bondnet.prediction.load_model import load_dataset, load_model
from bondnet.utils import seed_torch
from torchsummary import summary


def train(optimizer, model, nodes, data_loader, loss_fn, metric_fn):

    model.train()

    epoch_loss = 0.0
    accuracy = 0.0
    count = 0.0

    for it, (batched_graph, label) in enumerate(data_loader):
        feats = {nt: batched_graph.nodes[nt].data["feat"] for nt in nodes}
        target = label["value"]
        stdev = label["scaler_stdev"]

        pred = model(batched_graph, feats, label["reaction"])
        pred = pred.view(-1)

        loss = loss_fn(pred, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step() # here is the actual optimizer step

        epoch_loss += loss.detach().item()
        accuracy += metric_fn(pred, target, stdev).detach().item()
        count += len(target)
    
    epoch_loss /= it + 1
    accuracy /= count

    return epoch_loss, accuracy

def evaluate(model, nodes, data_loader, metric_fn):
    model.eval()

    with torch.no_grad():
        accuracy = 0.0
        count = 0.0

        for batched_graph, label in data_loader:
            feats = {nt: batched_graph.nodes[nt].data["feat"] for nt in nodes}
            target = label["value"]
            stdev = label["scaler_stdev"]

            pred = model(batched_graph, feats, label["reaction"])
            pred = pred.view(-1)

            accuracy += metric_fn(pred, target, stdev).detach().item()
            count += len(target)

    return accuracy / count

def get_grapher():
    atom_featurizer = AtomFeaturizerMinimum()
    bond_featurizer = BondAsNodeFeaturizerMinimum()
    #bond_featurizer = BondAsNodeFeaturizerFull()
    # our example dataset contains molecules of charges -1, 0, and 1
    global_featurizer = GlobalFeaturizer(allowed_charges=[-2, -1, 0, 1, 2])

    grapher = HeteroMoleculeGraph(atom_featurizer, bond_featurizer, global_featurizer)
    
    return grapher

def parse_settings(file = "./input_files/input_1.txt"):

    #some default values that get written over if in the file
    test = True
    epochs = 10
    embedding_size = 24
    
    fc_hidden_size = [128, 64]
    fc_layers = -1
    fc_activation = "ReLU"
    fc_batch_norm = 0
    fc_dropout = 0.0

    gated_hidden_size = [64, 64, 64]
    gated_layers = -1
    gated_batch_norm = 0
    gated_graph_norm = 0
    gated_dropout = 0.0
    gated_activation = 'ReLU'

    num_lstm_layers = 3
    num_lstm_iters = 5

    
    

    with open(file) as f:
        lines =  f.readlines()
    
        for i in lines: 
            if(len(i.split()) > 1):
                if(i.split()[0] == 'test'):
                    test = bool(i.split()[1])
                if(i.split()[0] == 'epochs'):
                    epochs = int(i.split()[1])
                if(i.split()[0] == 'embedding_size'):
                    embedding_size = int(i.split()[1])

                if(i.split()[0] == 'gated_hidden_size'):
                    gated_hidden_size = [int(j) for j in i.split()[1:]]
                if(i.split()[0] == 'gated_layers'):
                    gated_layers = int(i.split()[1])
                if(i.split()[0] == 'gated_dropout'):
                    gated_dropout = float(i.split()[1])
                if(i.split()[0] == 'gated_graph_norm'):
                    gated_graph_norm = int(i.split()[1])
                if(i.split()[0] == 'gated_batch_norm'):
                    gated_batch_norm = int(i.split()[1])
                if(i.split()[0] == 'gated_activation'):
                    gated_activation = str(i.split()[1])

                if(i.split()[0] == 'fc_hidden_size'):
                    fc_hidden_size = [int(j) for j in i.split()[1:]]
                if(i.split()[0] == 'fc_layers'):
                    fc_layers = int(i.split()[1])
                if(i.split()[0] == 'fc_activation'):
                    fc_activation = str(i.split()[1])
                if(i.split()[0] == 'fc_batch_norm'):
                    fc_batch_norm = int(i.split()[1])
                if(i.split()[0] == 'fc_dropout'):
                    fc_dropout = float(i.split()[1])

                if(i.split()[0] == 'num_lstm_iters'):
                    num_lstm_iters = int(i.split()[1])
                if(i.split()[0] == 'num_lstm_layers'):
                    num_lstm_layers = int(i.split()[1])

        if(gated_layers == -1):
            gated_layers = len(gated_hidden_size)
        if(fc_layers == -1):
            fc_layers = len(fc_hidden_size)

        print("using the following settings:")
        print("--" * 20)

        print("epochs: {:1d}".format(epochs))
        print("Small Dataset?: " + str(test))
        print("embedding size: {:1d}".format(embedding_size))
        
        print("fc layers: {:1d}".format(fc_layers))
        print("fc hidden layer: " + str(fc_hidden_size))
        print("fc activation: " + str(fc_activation))
        print("fc batch norm: " + str(fc_batch_norm))
        print("fc dropout: {:.2f}".format(fc_dropout))

        print("gated layers: {:1d}".format(gated_layers))
        print("gated hidden layers: " + str(gated_hidden_size))
        print("gated activation: " + str(gated_activation))
        print("gated dropout: {:.2f}".format(gated_dropout))
        print("gated batch norm: " + str(gated_batch_norm))
        print("gated graph norm: " + str(gated_graph_norm))

        print("num lstm iters: " + str(num_lstm_iters))
        print("num lstm layer: " + str(num_lstm_layers))
        print("--" * 20)

        dict_ret = {}
        dict_ret["test"] = test
        dict_ret["epochs"] = epochs
        dict_ret["embedding_size"] = embedding_size
        
        dict_ret["fc_hidden_size"] = fc_hidden_size
        dict_ret["fc_layers"] = fc_layers
        dict_ret['fc_dropout'] = fc_dropout
        dict_ret['fc_batch_norm'] = fc_batch_norm
        dict_ret['fc_activation'] = fc_activation

        dict_ret["gated_hidden_size"] = gated_hidden_size
        dict_ret["gated_layers"] = gated_layers
        dict_ret["gated_activation"] = gated_activation
        dict_ret["gated_graph_norm"] = gated_graph_norm
        dict_ret["gated_batch_norm"] = gated_batch_norm
        dict_ret['gated_dropout'] = gated_dropout
        
        dict_ret["num_lstm_iters"] = num_lstm_iters
        dict_ret["num_lstm_layers"] = num_lstm_layers
        
        return dict_ret 


In [13]:
from bondnet.utils import parse_settings
dict_ret = parse_settings(file='./input_files/input_2.txt')
model = load_model(dict_ret['model_path'])
model.gated_layers

using the following settings:
----------------------------------------
restore: True
distributed: False
batch size: 100
on gpu: False
epochs: 1000
embedding size: 24
fc layers: 2
fc hidden layer: [384, 192]
gated layers: 4
gated hidden layers: [192, 192, 192, 192]
gated fc layers: 2
num lstm iters: 6
num lstm layer: 3
num gpu: 1
hyperparam save file: ./hyper.pkl
dataset state dict: home/santiagovargas/Documents/Dataset/mg/dataset_state_dict.pkl
model dir/home/santiagovargas/Documents/Dataset/mg/
Small Dataset?: False
lr: 0.001000
weight decay: 0.000
fc activation: ReLU
fc batch norm: 0
fc dropout: 0.00
gated activation: ReLU
gated dropout: 0.10
gated batch norm: True
gated graph norm: 0
gated resid: True
----------------------------------------


ModuleList(
  (0): GatedGCNConv(
    (activation): ReLU()
    (A): LinearN(
      (fc_layers): ModuleList(
        (0): Linear(in_features=24, out_features=192, bias=True)
        (1): ReLU()
        (2): Linear(in_features=192, out_features=192, bias=True)
        (3): Identity()
      )
    )
    (B): LinearN(
      (fc_layers): ModuleList(
        (0): Linear(in_features=24, out_features=192, bias=True)
        (1): ReLU()
        (2): Linear(in_features=192, out_features=192, bias=True)
        (3): Identity()
      )
    )
    (C): LinearN(
      (fc_layers): ModuleList(
        (0): Linear(in_features=24, out_features=192, bias=True)
        (1): ReLU()
        (2): Linear(in_features=192, out_features=192, bias=True)
        (3): Identity()
      )
    )
    (D): LinearN(
      (fc_layers): ModuleList(
        (0): Linear(in_features=24, out_features=192, bias=True)
        (1): ReLU()
        (2): Linear(in_features=192, out_features=192, bias=True)
        (3): Identity()
    

In [9]:
if(bool(dict_ret["test"])):
    mols, attrs, labels = read_input_files(
        'examples/train/molecules.sdf', 
        'examples/train/molecule_attributes.yaml', 
        'examples/train/reactions.yaml', 
    )
else:
    # todo 
    #mols, attrs, labels = read_input_files(
    #    'examples/train/molecules_libe.sdf', 
    #    'examples/train/molecule_attributes_libe.yaml', 
    #    'examples/train/reactions_libe.yaml', 
    #)
        
    # todo
    #mols_mg, attrs_mg, labels_mg = read_input_files(
    #    '../train/molecules_libe.sdf', 
    #    '../train/train/molecule_attributes_libe.yaml', 
    #    '../train/train/reactions_libe.yaml', 
    #)

    mols, attrs, labels = read_input_files(
        'examples/train/molecules.sdf', 
        'examples/train/molecule_attributes.yaml', 
        'examples/train/reactions.yaml', 
    )
    
    print("sheesh")
    #mols, attrs , labels = read_input_files()


#dataset = ReactionNetworkDataset(
#    grapher=get_grapher(),
#    molecules=mols,
#    labels=labels,
#    extra_features=attrs
#)
#def load_dataset(model_path, molecules, labels, extra_features):
model = load_model(dict_ret['model_path'])
dataset = load_dataset(
    dict_ret["model_path"],
    molecules=mols,
    labels=labels,
    extra_features=attrs)


trainset, valset, testset = train_validation_test_split(dataset, validation=0.1, test=0.1)

# we train with a batch size of 100
train_loader = DataLoaderReactionNetwork(trainset, batch_size=100,shuffle=True)
val_loader = DataLoaderReactionNetwork(valset, batch_size=len(valset), shuffle=False)
test_loader = DataLoaderReactionNetwork(testset, batch_size=len(testset), shuffle=False)


sheesh


ValueError: Model trained with a dataset having species: C,F,H,Mg,N,O,S; Cannot make predictions for molecule containing species: Li. Note that two models trained on different datasets are provided: the `pubchem` supports C, H, O, N and the `bdncm` supports C, H, O, F, Li. You may want to switch the model if you see this message.

In [None]:
print("required_features: 15, 7, 9")
print(dataset.feature_size)

required_features: 15, 7, 9
{'atom': 13, 'bond': 7, 'global': 8}


In [16]:
model = GatedGCNReactionNetwork(
    in_feats=dataset.feature_size, 
    embedding_size=dict_ret["embedding_size"],
    gated_num_layers=dict_ret["gated_num_layers"],
    gated_hidden_size=dict_ret["gated_hidden_size"],
    gated_activation=dict_ret["gated_activation"],
    gated_dropout=float(dict_ret["gated_dropout"]),
    gated_graph_norm=int(dict_ret["gated_graph_norm"]),
    gated_batch_norm=int(dict_ret["gated_batch_norm"]),
    fc_num_layers=dict_ret["fc_layers"],
    fc_hidden_size=dict_ret["fc_hidden_size"],
    fc_activation=dict_ret["fc_activation"],
    fc_dropout=float(dict_ret["fc_dropout"]),
    fc_batch_norm=int(dict_ret["fc_batch_norm"]),
    num_lstm_iters=dict_ret["num_lstm_iters"],
    num_lstm_layers=dict_ret["num_lstm_layers"],
    conv="GatedGCNConv"
)

# optimizer, loss function and metric function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_func = MSELoss(reduction="mean")
metric = WeightedL1Loss(reduction="sum")
feature_names = ["atom", "bond", "global"]
best = 1e10

# main training loop
print("# Epoch     Loss         TrainAcc        ValAcc")
t1 = time.time()

for epoch in range(dict_ret["epochs"]):
    if(epoch % 5 == 0):

        # train on training set 
        loss, train_acc = train( optimizer, model, feature_names, train_loader, loss_func, metric)

        # evaluate on validation set
        val_acc = evaluate(model, feature_names, val_loader, metric)

        # save checkpoint for best performing model 
        if (val_acc < best):
            best = val_acc
            torch.save(model.state_dict(), 'checkpoint.pkl')
        
        print("{:5d}   {:12.6e}   {:12.6e}   {:12.6e}".format(epoch, loss, train_acc, val_acc))
t2 = time.time()


# load best performing model and test it's performance on the test set
checkpoint = torch.load("checkpoint.pkl")
model.load_state_dict(checkpoint)
test_acc = evaluate(model, feature_names, test_loader, metric)
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])    

print("TestAcc: {:12.6e}".format(test_acc))
print("Time to Train: {:5.1f} seconds".format(float(t2 - t1)))
print("Number of Trainable Model Params: {}".format(params))


model = GatedGCNReactionNetwork(
    in_feats=dataset.feature_size,
    embedding_size=dict_ret["embedding_size"],
    gated_num_layers=dict_ret["gated_num_layers"],
    gated_hidden_size=dict_ret["gated_hidden_size"],
    gated_activation=dict_ret["gated_activation"],
    gated_dropout=float(dict_ret["gated_dropout"]),
    gated_graph_norm=int(dict_ret["gated_graph_norm"]),
    gated_batch_norm=int(dict_ret["gated_batch_norm"]),
    gated_residual=dict_ret["gated_residual"],
    gated_num_fc_layers=dict_ret["gated_num_fc_layers"],
    fc_num_layers=dict_ret["fc_layers"],
    fc_hidden_size=dict_ret["fc_hidden_size"],
    fc_activation=dict_ret["fc_activation"],
    fc_dropout=float(dict_ret["fc_dropout"]),
    fc_batch_norm=int(dict_ret["fc_batch_norm"]),
    num_lstm_iters=dict_ret["num_lstm_iters"],
    num_lstm_layers=dict_ret["num_lstm_layers"],
    conv="GatedGCNConv",
)

print("-" * 20 + "now disabling gradients" + "-" * 20)
model.gated_layers.requires_grad_(False)

#model.fc_layers.requires_grad_(False)
#model.readout_layer.requires_grad_(False)


best = 1e10

# main training loop
print("# Epoch     Loss         TrainAcc        ValAcc")
t1 = time.time()

for epoch in range(dict_ret["epochs"]):
    if(epoch % 5 == 0):
        # train on training set 
        loss, train_acc = train( optimizer, model, feature_names, train_loader, loss_func, metric)

        # evaluate on validation set
        val_acc = evaluate(model, feature_names, val_loader, metric)

        # save checkpoint for best performing model 
        
        if (val_acc < best):
            best = val_acc
            torch.save(model.state_dict(), 'checkpoint.pkl')
            
        print("{:5d}   {:12.6e}   {:12.6e}   {:12.6e}".format(epoch, loss, train_acc, val_acc))

t2 = time.time()



feature_names = ["atom", "bond", "global"]

# load best performing model and test it's performance on the test set
from os.path import expanduser
#home = expanduser("~")
#model_directory = home+"/Documents/Dataset/mg/checkpoint.pkl" 

#checkpoint = torch.load(model_directory)
#model.load_state_dict(checkpoint['model'])
#test_acc = evaluate(model, feature_names, test_loader, metric)
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])    

print("TestAcc: {:12.6e}".format(test_acc))
print("Time to Train: {:5.1f} seconds".format(float(t2 - t1)))
print("Number of Trainable Model Params: {}".format(params))


# Epoch     Loss         TrainAcc        ValAcc
    0   8.354622e-01   2.457747e+00   2.090321e+00
    5   5.997426e-01   2.131326e+00   1.854207e+00
   10   3.939752e-01   1.736316e+00   1.741784e+00
   15   4.004955e-01   1.777406e+00   1.947630e+00
   20   3.577470e-01   1.632960e+00   1.456190e+00
   25   2.316015e-01   1.361877e+00   1.209444e+00
   30   2.086340e-01   1.235057e+00   1.274223e+00
   35   2.070761e-01   1.257603e+00   1.237815e+00
   40   1.872679e-01   1.174746e+00   1.171825e+00
   45   1.694346e-01   1.121845e+00   1.209740e+00
   50   1.505942e-01   1.087599e+00   1.227573e+00
   55   1.248520e-01   1.002119e+00   1.212870e+00
   60   1.600399e-01   1.060676e+00   1.172067e+00
   65   1.364062e-01   1.022181e+00   1.232711e+00
   70   1.377577e-01   1.037374e+00   1.191099e+00
   75   1.240184e-01   9.555707e-01   1.293485e+00
   80   1.012025e-01   8.473748e-01   1.273484e+00
   85   9.228709e-02   8.663695e-01   1.098703e+00
   90   7.441843e-02   7.834238e-0

KeyboardInterrupt: 

In [16]:
from os.path import expanduser

model = GatedGCNReactionNetwork(
    in_feats=dataset.feature_size,
    embedding_size=dict_ret["embedding_size"],
    gated_num_layers=dict_ret["gated_num_layers"],
    gated_hidden_size=dict_ret["gated_hidden_size"],
    gated_activation=dict_ret["gated_activation"],
    gated_dropout=float(dict_ret["gated_dropout"]),
    gated_graph_norm=int(dict_ret["gated_graph_norm"]),
    gated_batch_norm=int(dict_ret["gated_batch_norm"]),
    gated_residual=dict_ret["gated_residual"],
    gated_num_fc_layers=dict_ret["gated_num_fc_layers"],
    fc_num_layers=dict_ret["fc_layers"],
    fc_hidden_size=dict_ret["fc_hidden_size"],
    fc_activation=dict_ret["fc_activation"],
    fc_dropout=float(dict_ret["fc_dropout"]),
    fc_batch_norm=int(dict_ret["fc_batch_norm"]),
    num_lstm_iters=dict_ret["num_lstm_iters"],
    num_lstm_layers=dict_ret["num_lstm_layers"],
    conv="GatedGCNConv",
)

print("-" * 20 + "now disabling gradients" + "-" * 20)
model.gated_layers.requires_grad_(False)


checkpoint = torch.load("checkpoint.pkl")
model.load_state_dict(checkpoint)

--------------------now disabling gradients--------------------


RuntimeError: Error(s) in loading state_dict for GatedGCNReactionNetwork:
	Missing key(s) in state_dict: "gated_layers.3.A.fc_layers.0.weight", "gated_layers.3.A.fc_layers.0.bias", "gated_layers.3.A.fc_layers.2.weight", "gated_layers.3.A.fc_layers.2.bias", "gated_layers.3.B.fc_layers.0.weight", "gated_layers.3.B.fc_layers.0.bias", "gated_layers.3.B.fc_layers.2.weight", "gated_layers.3.B.fc_layers.2.bias", "gated_layers.3.C.fc_layers.0.weight", "gated_layers.3.C.fc_layers.0.bias", "gated_layers.3.C.fc_layers.2.weight", "gated_layers.3.C.fc_layers.2.bias", "gated_layers.3.D.fc_layers.0.weight", "gated_layers.3.D.fc_layers.0.bias", "gated_layers.3.D.fc_layers.2.weight", "gated_layers.3.D.fc_layers.2.bias", "gated_layers.3.E.fc_layers.0.weight", "gated_layers.3.E.fc_layers.0.bias", "gated_layers.3.E.fc_layers.2.weight", "gated_layers.3.E.fc_layers.2.bias", "gated_layers.3.F.fc_layers.0.weight", "gated_layers.3.F.fc_layers.0.bias", "gated_layers.3.F.fc_layers.2.weight", "gated_layers.3.F.fc_layers.2.bias", "gated_layers.3.G.fc_layers.0.weight", "gated_layers.3.G.fc_layers.0.bias", "gated_layers.3.G.fc_layers.2.weight", "gated_layers.3.G.fc_layers.2.bias", "gated_layers.3.H.fc_layers.0.weight", "gated_layers.3.H.fc_layers.0.bias", "gated_layers.3.H.fc_layers.2.weight", "gated_layers.3.H.fc_layers.2.bias", "gated_layers.3.I.fc_layers.0.weight", "gated_layers.3.I.fc_layers.0.bias", "gated_layers.3.I.fc_layers.2.weight", "gated_layers.3.I.fc_layers.2.bias", "gated_layers.3.bn_node_h.weight", "gated_layers.3.bn_node_h.bias", "gated_layers.3.bn_node_h.running_mean", "gated_layers.3.bn_node_h.running_var", "gated_layers.3.bn_node_e.weight", "gated_layers.3.bn_node_e.bias", "gated_layers.3.bn_node_e.running_mean", "gated_layers.3.bn_node_e.running_var", "gated_layers.3.bn_node_u.weight", "gated_layers.3.bn_node_u.bias", "gated_layers.3.bn_node_u.running_mean", "gated_layers.3.bn_node_u.running_var", "fc_layers.2.weight", "fc_layers.2.bias". 
	Unexpected key(s) in state_dict: "readout_layer.layers.atom.lstm.weight_ih_l3", "readout_layer.layers.atom.lstm.weight_hh_l3", "readout_layer.layers.atom.lstm.bias_ih_l3", "readout_layer.layers.atom.lstm.bias_hh_l3", "readout_layer.layers.atom.lstm.weight_ih_l4", "readout_layer.layers.atom.lstm.weight_hh_l4", "readout_layer.layers.atom.lstm.bias_ih_l4", "readout_layer.layers.atom.lstm.bias_hh_l4", "readout_layer.layers.bond.lstm.weight_ih_l3", "readout_layer.layers.bond.lstm.weight_hh_l3", "readout_layer.layers.bond.lstm.bias_ih_l3", "readout_layer.layers.bond.lstm.bias_hh_l3", "readout_layer.layers.bond.lstm.weight_ih_l4", "readout_layer.layers.bond.lstm.weight_hh_l4", "readout_layer.layers.bond.lstm.bias_ih_l4", "readout_layer.layers.bond.lstm.bias_hh_l4", "fc_layers.6.weight", "fc_layers.6.bias", "fc_layers.1.weight", "fc_layers.1.bias", "fc_layers.1.running_mean", "fc_layers.1.running_var", "fc_layers.1.num_batches_tracked", "fc_layers.3.weight", "fc_layers.3.bias", "fc_layers.4.running_mean", "fc_layers.4.running_var", "fc_layers.4.num_batches_tracked". 
	size mismatch for gated_layers.0.A.fc_layers.0.weight: copying a param with shape torch.Size([64, 24]) from checkpoint, the shape in current model is torch.Size([192, 24]).
	size mismatch for gated_layers.0.A.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.A.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.0.A.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.B.fc_layers.0.weight: copying a param with shape torch.Size([64, 24]) from checkpoint, the shape in current model is torch.Size([192, 24]).
	size mismatch for gated_layers.0.B.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.B.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.0.B.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.C.fc_layers.0.weight: copying a param with shape torch.Size([64, 24]) from checkpoint, the shape in current model is torch.Size([192, 24]).
	size mismatch for gated_layers.0.C.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.C.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.0.C.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.D.fc_layers.0.weight: copying a param with shape torch.Size([64, 24]) from checkpoint, the shape in current model is torch.Size([192, 24]).
	size mismatch for gated_layers.0.D.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.D.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.0.D.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.E.fc_layers.0.weight: copying a param with shape torch.Size([64, 24]) from checkpoint, the shape in current model is torch.Size([192, 24]).
	size mismatch for gated_layers.0.E.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.E.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.0.E.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.F.fc_layers.0.weight: copying a param with shape torch.Size([64, 24]) from checkpoint, the shape in current model is torch.Size([192, 24]).
	size mismatch for gated_layers.0.F.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.F.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.0.F.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.G.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.0.G.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.G.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.0.G.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.H.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.0.H.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.H.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.0.H.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.I.fc_layers.0.weight: copying a param with shape torch.Size([64, 24]) from checkpoint, the shape in current model is torch.Size([192, 24]).
	size mismatch for gated_layers.0.I.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.I.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.0.I.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.bn_node_h.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.bn_node_h.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.bn_node_h.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.bn_node_h.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.bn_node_e.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.bn_node_e.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.bn_node_e.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.bn_node_e.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.bn_node_u.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.bn_node_u.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.bn_node_u.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.0.bn_node_u.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.A.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.A.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.A.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.A.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.B.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.B.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.B.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.B.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.C.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.C.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.C.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.C.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.D.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.D.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.D.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.D.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.E.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.E.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.E.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.E.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.F.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.F.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.F.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.F.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.G.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.G.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.G.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.G.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.H.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.H.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.H.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.H.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.I.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.I.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.I.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.1.I.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.bn_node_h.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.bn_node_h.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.bn_node_h.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.bn_node_h.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.bn_node_e.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.bn_node_e.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.bn_node_e.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.bn_node_e.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.bn_node_u.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.bn_node_u.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.bn_node_u.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.1.bn_node_u.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.A.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.A.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.A.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.A.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.B.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.B.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.B.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.B.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.C.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.C.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.C.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.C.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.D.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.D.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.D.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.D.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.E.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.E.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.E.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.E.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.F.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.F.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.F.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.F.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.G.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.G.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.G.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.G.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.H.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.H.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.H.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.H.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.I.fc_layers.0.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.I.fc_layers.0.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.I.fc_layers.2.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for gated_layers.2.I.fc_layers.2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.bn_node_h.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.bn_node_h.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.bn_node_h.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.bn_node_h.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.bn_node_e.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.bn_node_e.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.bn_node_e.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.bn_node_e.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.bn_node_u.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.bn_node_u.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.bn_node_u.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for gated_layers.2.bn_node_u.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for readout_layer.layers.atom.lstm.weight_ih_l0: copying a param with shape torch.Size([256, 128]) from checkpoint, the shape in current model is torch.Size([768, 384]).
	size mismatch for readout_layer.layers.atom.lstm.weight_hh_l0: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([768, 192]).
	size mismatch for readout_layer.layers.atom.lstm.bias_ih_l0: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for readout_layer.layers.atom.lstm.bias_hh_l0: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for readout_layer.layers.atom.lstm.weight_ih_l1: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([768, 192]).
	size mismatch for readout_layer.layers.atom.lstm.weight_hh_l1: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([768, 192]).
	size mismatch for readout_layer.layers.atom.lstm.bias_ih_l1: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for readout_layer.layers.atom.lstm.bias_hh_l1: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for readout_layer.layers.atom.lstm.weight_ih_l2: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([768, 192]).
	size mismatch for readout_layer.layers.atom.lstm.weight_hh_l2: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([768, 192]).
	size mismatch for readout_layer.layers.atom.lstm.bias_ih_l2: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for readout_layer.layers.atom.lstm.bias_hh_l2: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for readout_layer.layers.bond.lstm.weight_ih_l0: copying a param with shape torch.Size([256, 128]) from checkpoint, the shape in current model is torch.Size([768, 384]).
	size mismatch for readout_layer.layers.bond.lstm.weight_hh_l0: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([768, 192]).
	size mismatch for readout_layer.layers.bond.lstm.bias_ih_l0: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for readout_layer.layers.bond.lstm.bias_hh_l0: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for readout_layer.layers.bond.lstm.weight_ih_l1: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([768, 192]).
	size mismatch for readout_layer.layers.bond.lstm.weight_hh_l1: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([768, 192]).
	size mismatch for readout_layer.layers.bond.lstm.bias_ih_l1: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for readout_layer.layers.bond.lstm.bias_hh_l1: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for readout_layer.layers.bond.lstm.weight_ih_l2: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([768, 192]).
	size mismatch for readout_layer.layers.bond.lstm.weight_hh_l2: copying a param with shape torch.Size([256, 64]) from checkpoint, the shape in current model is torch.Size([768, 192]).
	size mismatch for readout_layer.layers.bond.lstm.bias_ih_l2: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for readout_layer.layers.bond.lstm.bias_hh_l2: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for fc_layers.0.weight: copying a param with shape torch.Size([32, 320]) from checkpoint, the shape in current model is torch.Size([384, 960]).
	size mismatch for fc_layers.0.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for fc_layers.4.weight: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([1, 192]).
	size mismatch for fc_layers.4.bias: copying a param with shape torch.Size([32]) from checkpoint, the shape in current model is torch.Size([1]).

In [18]:
model = GatedGCNReactionNetwork(
    in_feats=dataset.feature_size,
    embedding_size=dict_ret["embedding_size"],
    gated_num_layers=dict_ret["gated_num_layers"],
    gated_hidden_size=dict_ret["gated_hidden_size"],
    gated_activation=dict_ret["gated_activation"],
    gated_dropout=float(dict_ret["gated_dropout"]),
    gated_graph_norm=int(dict_ret["gated_graph_norm"]),
    gated_batch_norm=int(dict_ret["gated_batch_norm"]),
    gated_residual=dict_ret["gated_residual"],
    gated_num_fc_layers=dict_ret["gated_num_fc_layers"],
    fc_num_layers=dict_ret["fc_layers"],
    fc_hidden_size=dict_ret["fc_hidden_size"],
    fc_activation=dict_ret["fc_activation"],
    fc_dropout=float(dict_ret["fc_dropout"]),
    fc_batch_norm=int(dict_ret["fc_batch_norm"]),
    num_lstm_iters=dict_ret["num_lstm_iters"],
    num_lstm_layers=dict_ret["num_lstm_layers"],
    conv="GatedGCNConv",
)

from os.path import expanduser
home = expanduser("~")
model_ref = home+"/Documents/Dataset/mg/"

#checkpoint = torch.load(model_ref + "checkpoint.pkl")
#model.load_state_dict(checkpoint["model"])


TypeError: 'method' object is not iterable

In [42]:
import pandas as pd 
import networkx as nx 

path_mg_data = "/home/santiagovargas/Documents/Dataset/mg_dataset/"
path_json = path_mg_data + "20220613_reaction_data.json"
mg_df = pd.read_json(path_json)

In [104]:
from bondnet.core.molwrapper import create_wrapper_mol_from_atoms_and_bonds
from bondnet.core.reaction import Reaction

error = 0 
two_product_count = 0
three_prouct_count = 0

for index, row in mg_df.iterrows():

    # handle reactant 
    species = [i['name'] for i in row['reactant_molecule_graph']["molecule"]["sites"]]
    coords = [i["xyz"] for i in row['reactant_molecule_graph']["molecule"]["sites"]]
    bonds = row['reactant_bonds']
    charge = row['charge']
    
    reactant = create_wrapper_mol_from_atoms_and_bonds(
        species, coords, bonds, charge=charge
        )
    reactant_list = [reactant]

    # handle products
    # check subgraphs first
    product_list = []
    num_nodes = 0
    for i in row["composition"].items():
        num_nodes += int(i[-1])
    G = nx.Graph()
    G.add_nodes_from([int(i) for i in range(num_nodes)])
    for i in row["product_bonds"]: G.add_edge(i[0], i[1])
    sub_graphs = [G.subgraph(c) for c in nx.connected_components(G)]

    # still no handling for rxns A --> B + C +....
    if(len(sub_graphs) > 2): three_prouct_count += 1
    # handle A --> B + C
    elif(len(sub_graphs) == 2):
        product_list = []
        for sg in sub_graphs:  
            two_product_count += 1
            nodes = list(sg.nodes())
            bonds = list(sg.edges())
            bond_reindex_list = []
            species = [row['product_molecule_graph']["molecule"]["sites"][sub_ind]['name'] for sub_ind in list(sg.nodes())]
            coords = [row['product_molecule_graph']["molecule"]["sites"][sub_ind]["xyz"] for sub_ind in list(sg.nodes())]
            charge = row['charge']
            for origin_bond_ind in row["product_bonds"]:
                check = any(item in origin_bond_ind for item in nodes)
                if(check): 
                    bond_orig = nodes.index(origin_bond_ind[0])
                    bond_targ = nodes.index(origin_bond_ind[1])
                    bond_reindex_list.append([bond_orig, bond_targ])
            
                product = create_wrapper_mol_from_atoms_and_bonds(
                species, coords, bond_reindex_list, charge=charge
                )
            product_list.append(product)

    #handle A --> B 
    else: 
        species = [i['name'] for i in row['product_molecule_graph']["molecule"]["sites"]]
        coords = [i["xyz"] for i in row['product_molecule_graph']["molecule"]["sites"]]
        bonds = row['product_bonds']
        charge = row['charge']
        free_energy = row['product_free_energy']
        product = create_wrapper_mol_from_atoms_and_bonds(
            species = species, 
            coords = coords, 
            bonds = bonds, 
            charge=charge
            )
        product_list = [product]
    try: 
        rxn = Reaction(
            reactants = reactant_list, 
            products=product_list, 
            broken_bond = row["bonds_broken"],
            free_energy= row["dE_barrier"],
            identifier = row["reaction_id"]
            )
    except: 
        print(product_list)
        error += 1
print("number of rxn failed: " + str(error))
print("number of reactions with two products: " + str(two_product_count))
print("number of reactions with three products: " + str(three_prouct_count))

[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
[]
number of rxn failed: 141
number of reactions with two products: 1326
number of reactions with three products: 141


In [83]:

#for i, sg in enumerate(sub_graphs):pass
#print(list(sg.nodes()))
#mg_df.iloc[1]['product_molecule_graph']["molecule"]["sites"][18]
#mg_df.iloc[1]['product_molecule_graph']["molecule"]["sites"][2]['name']

0


In [2]:
import torch

yhat = torch.Tensor([[0.5, 1.5, 0.1], [2.2, 1.3, 1.7]])
print(yhat)
print(yhat.shape)
# tensor([[0.5000, 1.5000, 0.1000],
#         [2.2000, 1.3000, 1.7000]])

y = torch.Tensor([1, 2]).to(torch.long)
print(y)
print(y.shape)
# tensor([1, 2])

loss = torch.nn.CrossEntropyLoss()
cel = loss(input=yhat, target=y)
print(cel)
# tensor(0.8393)

tensor([[0.5000, 1.5000, 0.1000],
        [2.2000, 1.3000, 1.7000]])
torch.Size([2, 3])
tensor([1, 2])
torch.Size([2])
tensor(0.8393)


# Aug 10 

In [1]:
import time, wandb, torch
import numpy as np 
from tqdm import tqdm

from torchmetrics import R2Score
from torch.optim.lr_scheduler import ReduceLROnPlateau

from bondnet.model.metric import EarlyStopping
from bondnet.data.dataset import ReactionNetworkDatasetGraphs
from bondnet.data.dataloader import DataLoaderReactionNetwork
from bondnet.data.featurizer import (
    AtomFeaturizerGraph,
    BondAsNodeGraphFeaturizer,
    GlobalFeaturizerGraph,
)
from bondnet.data.grapher import (
    HeteroCompleteGraphFromDGLAndPandas,
)
from bondnet.data.dataset import train_validation_test_split
#from bondnet.scripts.create_label_file import read_input_files
#from bondnet.model.metric import WeightedL1Loss, WeightedMSELoss
from bondnet.utils import seed_torch, pickle_dump, parse_settings
from bondnet.model.training_utils import (
    evaluate, 
    evaluate_classifier, 
    train, 
    train_classifier, 
    load_model
)
seed_torch()


def evaluate_r2(model, nodes, data_loader):
    model.eval()
    with torch.no_grad():
        for batched_graph, label in data_loader:
            feats = {nt: batched_graph.nodes[nt].data["feat"] for nt in nodes}
            target = label["value"]
            
            pred = model(batched_graph, feats, label["reaction"])
            pred = pred.view(-1)
            target = target.view(-1)

    r2_call = R2Score()
    r2 = r2_call(pred, target)
    return r2


def get_grapher():

    atom_featurizer = AtomFeaturizerGraph()
    bond_featurizer = BondAsNodeGraphFeaturizer()
    global_featurizer = GlobalFeaturizerGraph(allowed_charges=[-2, -1, 0, 1])
    grapher = HeteroCompleteGraphFromDGLAndPandas(
        atom_featurizer, bond_featurizer, global_featurizer
    )
    return grapher


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
best = 1e10
feature_names = ["atom", "bond", "global"]
path_mg_data = "../dataset/mg_dataset/"
dict_train = parse_settings(file="./training_runs/1/settings.txt")
path_mg_data = "../../../dataset/mg_dataset/20220613_reaction_data.json"
    

using the following settings:
----------------------------------------
Small Dataset?: True
restore: True
distributed: False
on gpu: True
num gpu: 1
hyperparam save file: ./hyper.pkl
dataset state dict: home/santiagovargas/Documents/Dataset/mg/dataset_state_dict.pkl
model dir /home/santiagovargas/Documents/Dataset/mg/
classifier False
batch size: 128
epochs: 500
lr: 0.000100
weight decay: 0.000
early_stop: True
scheduler: False
transfer_epochs: 250
transfer: True
loss: False
categories: 5
embedding size: 24
fc layers: 2
fc hidden layer: [128, 64]
gated layers: 3
gated hidden layers: [64, 64, 64]
num lstm iters: 6
num lstm layer: 3
gated fc layers: 2
fc activation: ReLU
fc batch norm: 0
fc dropout: 0.00
gated activation: ReLU
gated dropout: 0.10
gated batch norm: True
gated graph norm: 0
gated resid: True
----------------------------------------


In [3]:
if(dict_train["classifier"]):
    classif_categories = 5 # update this later
else:
    classif_categories = None

if dict_train["on_gpu"]:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    dict_train["gpu"] = device
else:
    device = torch.device("cpu")
    dict_train["gpu"] = "cpu"

print("train on device: {}".format(dict_train["gpu"]))

train on device: cuda


In [4]:
path_mg_data = "../dataset/mg_dataset/20220613_reaction_data.json"

dataset = ReactionNetworkDatasetGraphs(
    grapher=get_grapher(), 
    file=path_mg_data, 
    out_file="./", 
    target = 'ts', 
    classifier = dict_train["classifier"], 
    classif_categories=classif_categories, 
    debug = dict_train["debug"],
    device = device
)

dataset_transfer = ReactionNetworkDatasetGraphs(
    grapher=get_grapher(), file=path_mg_data, out_file="./", 
    target = 'diff', 
    classifier = dict_train["classifier"], 
    classif_categories=classif_categories, 
    debug = dict_train["debug"],
    device = device 
)


reading file from: ../dataset/mg_dataset/20220613_reaction_data.json
rxn raw len: 250
Program finished in 1.1201213479998842 seconds
.............failures.............
reactions len: 26
valid ind len: 26
bond break fail count: 		0
default fail count: 		224
sdf map fail count: 		0
product bond fail count: 	0
about to group and organize
number of grouped reactions: 26
features: 66
labels: 26
molecules: 66
constructing graphs & features....
number of graphs valid: 66
number of graphs: 66
reading file from: ../dataset/mg_dataset/20220613_reaction_data.json
rxn raw len: 250
Program finished in 1.393154549999963 seconds
.............failures.............
reactions len: 26
valid ind len: 26
bond break fail count: 		0
default fail count: 		224
sdf map fail count: 		0
product bond fail count: 	0
about to group and organize
number of grouped reactions: 26
features: 66
labels: 26
molecules: 66
constructing graphs & features....
number of graphs valid: 66
number of graphs: 66


In [8]:
for it, (bg, label) in enumerate(train_loader):
    print(bg.device)


cuda:0


In [6]:
import numpy as np 
import torch, wandb
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from torchmetrics import F1Score

from bondnet.model.metric import WeightedL1Loss, WeightedMSELoss
from bondnet.model.gated_reaction_network import GatedGCNReactionNetwork
from bondnet.model.gated_reaction_network_classifier import GatedGCNReactionNetworkClassifier


def train(model, nodes, data_loader, optimizer, device=None):
    """
    basic loop for training a classifier. Gets loss and accuracy
        
    Args:
        model(pytorch model): pytorch model
        nodes(dict): node feature dictionary
        data_loader(loader obj): loader object with data to eval
        device(str): cpu/gpu
    Returns: 
        accuracy (float): accuracy
        loss (float): MSE
    """

    loss_fn = WeightedMSELoss(reduction="sum")
    metric_fn = WeightedL1Loss(reduction="mean")

    model.train()

    epoch_loss = 0.0
    accuracy = 0.0
    count = 0.0

    for it, (batched_graph, label) in enumerate(data_loader):
        feats = {nt: batched_graph.nodes[nt].data["feat"] for nt in nodes}
        target = label["value"]
        norm_atom = label["norm_atom"]
        norm_bond = label["norm_bond"]
        stdev = label["scaler_stdev"]

        if device is not None:
            feats = {k: v.to(device) for k, v in feats.items()}
            target = target.to(device)
            norm_atom = norm_atom.to(device)
            norm_bond = norm_bond.to(device)
            stdev = stdev.to(device)

        try:
            pred = model(batched_graph, feats, label["reaction"], norm_atom, norm_bond)
        except:
            pred = model(batched_graph, feats, label["reaction"])


        # pred = pred.view(-1)
        target_new_shape = (len(target), 1)
        target = target.view(target_new_shape)
        pred_new_shape = (len(pred), 1)
        pred = pred.view(pred_new_shape)


        try:
            loss = loss_fn(pred, target, stdev)
        except:
            loss = loss_fn(pred, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()  # here is the actual optimizer step

        epoch_loss += loss.detach().item()
        accuracy += metric_fn(pred, target, stdev).detach().item()
        count += len(target)

    epoch_loss /= it + 1
    accuracy /= count

    return epoch_loss, accuracy

In [7]:
dict_train['in_feats'] = dataset.feature_size
model, optimizer = load_model(dict_train)
model.to(device)

trainset, valset, testset = train_validation_test_split(
    dataset, validation=0.15, test=0.15
)

train_loader = DataLoaderReactionNetwork(trainset, batch_size=dict_train['batch_size'], 
shuffle=True)
val_loader = DataLoaderReactionNetwork(
    valset, batch_size=len(valset), shuffle=False
)
test_loader = DataLoaderReactionNetwork(
    testset, batch_size=len(testset), shuffle=False
)

scheduler = ReduceLROnPlateau(
    optimizer, mode="min", factor=0.4, patience=25, verbose=True)
stopper = EarlyStopping(patience=150)
stopper_transfer = EarlyStopping(patience=150)

if(dict_train['transfer']):

    trainset_transfer, valset_tranfer, _ = train_validation_test_split(
    dataset_transfer, validation=0.15, test=0.01
    )
    dataset_transfer_loader = DataLoaderReactionNetwork(
        trainset_transfer, batch_size=dict_train['batch_size'], 
    shuffle=True)
    dataset_transfer_loader_val = DataLoaderReactionNetwork(
        valset_tranfer, batch_size=dict_train['batch_size'], 
    shuffle=True)

    print("Initiating Training w/ transfer...")
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print("Number of Trainable Model Params: {}".format(params))
    
    
    for epoch in tqdm(range(dict_train['transfer_epochs'])):
        if(dict_train["classifier"]):
            _, _ = train_classifier(
                model, 
                feature_names, 
                dataset_transfer_loader,
                optimizer, 
                device = dict_train["gpu"], 
                categories = classif_categories
            )
            val_acc_transfer, f1_score = evaluate_classifier(
                model, 
                feature_names, 
                dataset_transfer_loader_val, 
                device = dict_train["gpu"],
                categories = classif_categories
            )
        else:
            _, _ = train(
                model, 
                feature_names, 
                dataset_transfer_loader, 
                optimizer, 
                device = dict_train["gpu"]
            )
            val_acc_transfer = evaluate(
                model, 
                feature_names, 
                dataset_transfer_loader_val, 
                device = dict_train["gpu"]
            )

        if stopper_transfer.step(val_acc_transfer):
            break

    # freeze model layers but fc
    model.gated_layers.requires_grad_(False)
    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print("Freezing Gated Layers....")
    print("Number of Trainable Model Params: {}".format(params))

t1 = time.time()
# optimizer, loss function and metric function
# main training loop
if(dict_train["classifier"]):
    print("# Epoch     Loss         TrainAcc        ValAcc        ValF1")
else: 
    print("# Epoch     Loss         TrainAcc        ValAcc        ValR2")        

for epoch in range(dict_train['epochs']):
    # train on training set
    if(dict_train["classifier"]):
        loss, train_acc = train_classifier(
            model, 
            feature_names, 
            train_loader, 
            optimizer, 
            device = dict_train["gpu"],
            categories = classif_categories
        )

        # evaluate on validation set
        val_acc, f1_score = evaluate_classifier(
            model, 
            feature_names, 
            val_loader, 
            device = dict_train["gpu"],
            categories = classif_categories
        )

        wandb.log({"acc validation": val_acc})
        wandb.log({"f1 validation": f1_score})
        print(
            "{:5d}   {:12.6e}   {:12.2e}   {:12.6e}   {:.2f}".format(
                epoch, loss, train_acc, val_acc, f1_score
            )
        )
        
    else: 
        loss, train_acc = train(
        model, 
        feature_names, 
        train_loader, 
        optimizer, 
        dict_train["gpu"]
        )
        # evaluate on validation set
        val_acc = evaluate(model, feature_names, val_loader, dict_train["gpu"])
        val_r2 = evaluate_r2(model, feature_names, val_loader)
    

        print(
            "{:5d}   {:12.6e}   {:12.2e}   {:12.6e}   {:.2f}".format(
                epoch, loss, train_acc, val_acc, val_r2
            )
        )

    # save checkpoint for best performing model
    is_best = val_acc < best
    if is_best:
        best = val_acc
        torch.save(model.state_dict(), "checkpoint.pkl")

    if(dict_train["early_stop"]):
        if stopper.step(val_acc):
            pickle_dump(
                best, dict_train["save_hyper_params"]
            )  # save results for hyperparam tune
            break
    scheduler.step(val_acc)

checkpoint = torch.load("checkpoint.pkl")
model.load_state_dict(checkpoint)

if(dict_train["classifier"]):
    test_acc, test_f1 = evaluate_classifier(
        model, 
        feature_names, 
        test_loader, 
        device = dict_train["gpu"],
        categories = classif_categories
    )
    print("Test Acc: {:12.6e}".format(test_acc))
    print("Test F1: {:12.6e}".format(test_f1))


else: 
    test_acc = evaluate(model, feature_names, test_loader)
    print("TestAcc: {:12.6e}".format(test_acc))

t2 = time.time()
print("Time to Training: {:5.1f} seconds".format(float(t2 - t1)))



Initiating Training w/ transfer...
Number of Trainable Model Params: 378129


 17%|█▋        | 43/250 [00:13<01:07,  3.08it/s]


KeyboardInterrupt: 