In [None]:
import sys
sys.path.append('..')

In [None]:
from logics_pack import global_settings, chemistry, logics, predictor, analysis, smiles_vocab, smiles_lstm
import pandas as pd
import numpy as np
import json
import torch

project_paths = global_settings.build_project_paths(project_dir='../')
expset_obj = global_settings.ExperimentSettings(project_paths['EXPERIMENT_SETTINGS_JSON'])

Perform LOGICS fine-tuning to build agent generator

In [None]:
# LOGICS fine-tuning config
config = global_settings.Object()

config.ablation = None  # we will use full LOGICS model

config.tokens_path = project_paths['SMILES_TOKENS_PATH']
config.pretrain_setting_path = project_paths['PRETRAIN_SETTING_JSON']
config.pretrained_model_path = project_paths['PROJECT_DIR'] + 'model-prior/prior_e10.ckpt'

config.featurizer = predictor.featurizer
config.predictor_path = project_paths['PROJECT_DIR'] + "model-kor/predictor/kor_rfr_cv%s.pkl"%expset_obj.get_setting("kor-pred-best-cv")

config.max_epoch = 200
config.save_period = 4
config.save_ckpt_fmt = project_paths['PROJECT_DIR'] + 'model-kor/logics/kor_logics_e%d.ckpt'
config.sample_fmt = project_paths['PROJECT_DIR'] + 'model-kor/logics/kor_logics_e%d.txt'
config.memory_fmt = project_paths['PROJECT_DIR'] + 'model-kor/logics/kor_logics_mem_e%d.csv'
config.memory_size = 100000
config.save_size = 20000
config.gen_size = config.save_size
config.exp_size = config.save_size
config.finetune_lr = 0.0001
config.finetune_bs = 32
config.sampling_bs = 256

config.device_name = 'cpu'

In [None]:
# perform fine-tuning
logics.LOGICS_training(config)

Load LOGICS agent generator and sample some examples

In [None]:
vocab_obj = smiles_vocab.Vocabulary(init_from_file=config.tokens_path)
smtk = smiles_vocab.SmilesTokenizer(vocab_obj)

with open(config.pretrain_setting_path, 'r') as f:
    model_setting = json.load(f)
    
# load agent model (epoch=100)
agent_ckpt = torch.load(config.save_ckpt_fmt%100, map_location='cpu')
lstm_agent = smiles_lstm.SmilesLSTMGenerator(vocab_obj, model_setting['emb_size'], model_setting['hidden_units'], device_name='cpu')
lstm_agent.lstm.load_state_dict(agent_ckpt['model_state_dict'])

In [None]:
# sampling
ssplr = analysis.SafeSampler(lstm_agent, batch_size=16)
generated_smiles = ssplr.sample_clean(50, maxlen=150)
display(generated_smiles)