In [1]:
import pandas as pd
import numpy as np
from chemprop import data, featurizers, models, utils
from lightning import pytorch as pl
import torch
from preprocess import calc_terminal_features

In [None]:
model = models.MPNN.load_from_checkpoint('./Eint_H2O_1203.ckpt')

In [5]:
# Load the SMILES strings
df_candidates = pd.read_excel('candidate_database_20250124.xlsx', index_col=0)
smis = df_candidates['SMILES']

In [6]:
featurizer = featurizers.SimpleMoleculeMolGraphFeaturizer()

In [7]:
test_data = []
for smi in smis:
    # return predicted Eint_H2O values for two ends of each molecule (which will be post-processed based on Eint_BP values of two ends to get the final Eint_H2O value)
    terminal_features = calc_terminal_features(smi)
    test_data.append(data.MoleculeDatapoint(
        utils.make_mol(smi, keep_h=False, add_h=False),
        V_d = terminal_features[0][0]
    ))
    test_data.append(data.MoleculeDatapoint(
        utils.make_mol(smi, keep_h=False, add_h=False),
        V_d = terminal_features[1][0]
    ))

test_dset = data.MoleculeDataset(test_data, featurizer=featurizer)
test_loader = data.build_dataloader(test_dset, shuffle=False)

In [None]:
with torch.inference_mode():
    trainer = pl.Trainer(
        logger=None,
        enable_progress_bar=True,
        accelerator="auto",
        devices=1
    )
    test_preds = trainer.predict(model, test_loader)

In [9]:
test_preds = np.concatenate(test_preds, axis=0)

In [None]:
# It is actually the predicted Eint_H2O values for two ends of molecule 3
test_preds[4], test_preds[5]
