In [2]:
###############################################################################
# This notebook shows application of the RhNet2 model to aqueous solubility
# prediction.
#
# Presented here, RhNet2 model (RhNet2SC2) learned to predict intrinsic aqueous
# solubility from 1-el density matrices (originating from DFT calculations) on
# a set of 985 drug-like molecules from Wiki-pS0 and tested on the Solubility
# Challenge (2019) test sets.
#
# The details of the model architecture are described in the corresponding paper:
# https://doi.org/10.26434/chemrxiv-2024-k2k3l
#
# Implementation:
# https://github.com/Shorku/rhnet2
#
# Second Solubility Challenge (2019, SC-2):
# https://doi.org/10.1021/acs.jcim.9b00345
# https://doi.org/10.1021/acs.jcim.0c00701
###############################################################################

In [10]:
import os

import tensorflow as tf

from data_utils import served_input_from_orca

###############################################################################
# path info
###############################################################################
orca_out_path = 'data_example'
overlap_threshold = 0.035
model_path = 'model/RhNet2SC2'

In [11]:
###############################################################################
# As an example, we will now predict water solubility of, say, Clofazimine
# (test compound, not seen by the model) from its electronic structure.
# The results of DFT calculation are available in the data_example/ folder.
# First, we will convert the output of quantum chemical package (ORCA) into
# a bunch of tensors expected by the saved models:

orca_out_file = 'clofazimine.zip'
subset_folder = 'set2'
orca_out = os.path.join(orca_out_path, subset_folder, orca_out_file)
qchem_mat = served_input_from_orca(out_file=orca_out,
                                   overlap_thresh=overlap_threshold)

In [12]:
###############################################################################
# Load the model

model = tf.saved_model.load(model_path)
infer = model.signatures['serving_default']

In [13]:
###############################################################################
# Now, we can predict the solubility of clofazimine.

clofazimine_prediction = infer(**qchem_mat)
clofazimine_solubility = clofazimine_prediction['target'][0].numpy()[0]

print(f'Clofazimine intrinsic aqueous solubility (log \u03BCM):\n'
      f'Predicted: {clofazimine_solubility:.2f}, '
      f'Reported: -3.05, '
      f'Error: {(clofazimine_solubility + 3.05):.2f}')

Clofazimine intrinsic aqueous solubility (log μM):
Predicted: -2.54, Reported: -3.05, Error: 0.51


In [17]:
###############################################################################
# Simultaneously with solubility, properties like melting point and Kow are
# predicted. These were introduced primarily for regularization purpose.
# The models were not optimized to perform well for anything but solubility.
# However, let's look at the predicted values:

clofazimine_melt = clofazimine_prediction['melt'][0].numpy()[0]*100
clofazimine_kow = clofazimine_prediction['logk'][0].numpy()[0]
print(f'Clofazimine melting point (\u2070C):\n'
      f'Predicted: {clofazimine_melt:.1f}, '
      f'Reported: 211.0, '
      f'Error: {abs(clofazimine_melt - 211.0):.1f}\n\n'
      f'Clofazimine logK_ow:\n'
      f'Predicted: {clofazimine_kow:.2f}, '
      f'Reported: 7.66, '
      f'Error: {abs(clofazimine_kow - 7.66):.2f}')

Clofazimine melting point (⁰C):
Predicted: 196.7, Reported: 211.0, Error: 14.3

Clofazimine logK_ow:
Predicted: 7.32, Reported: 7.66, Error: 0.34
