In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import pandas as pd
import numpy as np
from rdkit import Chem
import sascorer #cf. J. Cheminform. 1, 1–10 (2009).
import os
import warnings
warnings.filterwarnings("ignore")

mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams["figure.dpi"] = 300
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = ['Arial']

In [None]:
root_dir = "datasets_int_val"
MUBD_ligand_dir = [os.path.join(it, "MUBDsyn/Diverse_ligands_PS.csv") for it in os.scandir(root_dir) if it.is_dir()]
MUBDreal_decoy_dir = [os.path.join(it, "MUBDreal/Final_decoys.csv") for it in os.scandir(root_dir) if it.is_dir()]
MUBDsyn_decoy_dir = [os.path.join(it, "MUBDsyn/Final_decoys.csv") for it in os.scandir(root_dir) if it.is_dir()]

df_ligand = [pd.read_csv(n) for n in MUBD_ligand_dir]
df_ligand = pd.concat(df_ligand, axis=0)
ligand_smis = list(df_ligand["SMILES"])

df_decoy_real = [pd.read_csv(n) for n in MUBDreal_decoy_dir]
df_decoy_real = pd.concat(df_decoy_real, axis=0)
decoy_real_smis = list(df_decoy_real["SMILES"])

df_decoy_syn = [pd.read_csv(n) for n in MUBDsyn_decoy_dir]
df_decoy_syn = pd.concat(df_decoy_syn, axis=0)
decoy_syn_smis = list(df_decoy_syn["SMILES"])

ligand_sa = [sascorer.calculateScore(Chem.MolFromSmiles(smi)) for smi in ligand_smis]
df_ligand_sa = pd.DataFrame({"SMILES":ligand_smis, "SAscore":ligand_sa})

decoy_real_sa = [sascorer.calculateScore(Chem.MolFromSmiles(smi)) for smi in decoy_real_smis]
df_decoy_real_sa = pd.DataFrame({"SMILES":decoy_real_smis, "SAscore":decoy_real_sa})

decoy_syn_sa = [sascorer.calculateScore(Chem.MolFromSmiles(smi)) for smi in decoy_syn_smis]
df_decoy_syn_sa = pd.DataFrame({"SMILES":decoy_syn_smis, "SAscore":decoy_syn_sa})

In [None]:
fig, ax1 = plt.subplots(1,1, figsize=(28,14))
sns.kdeplot(df_ligand_sa["SAscore"], bw_adjust=3, clip=(1,10), ax=ax1, shade=True, color="lime", lw=4)
sns.kdeplot(df_decoy_real_sa["SAscore"], bw_adjust=3, clip=(1,10), ax=ax1, shade=True, color="mediumblue", lw=4)
sns.kdeplot(df_decoy_syn_sa["SAscore"], bw_adjust=3, clip=(1,10), ax=ax1, shade=True, color="firebrick", lw=4)
ax1.xaxis.set_tick_params(labelsize=26)
ax1.yaxis.set_tick_params(labelsize=26)

ax1.spines['right'].set_color('none')
ax1.spines['top'].set_color('none')
ax1.set_ylabel("Density", size=26)
ax1.set_xlabel("SA score", size=26)
ax1.set_xticks(np.arange(1,11))
ax1.legend(["MUBD ligands", "$\mathdefault{MUBD^{real}}$ decoys", "$\mathdefault{MUBD^{syn}}$ decoys"])
plt.text(3.2, 0.23, f"{chr(956)} = 3.65\n{chr(963)} = 1.80", fontsize=26)
plt.text(4.2, 0.35, f"{chr(956)} = 3.58\n{chr(963)} = 1.07", fontsize=26)
plt.text(1.9, 0.35, f"{chr(956)} = 3.44\n{chr(963)} = 1.25", fontsize=26)
sns.move_legend(ax1, "upper center", bbox_to_anchor=(.5, 1.1), ncol=3, 
                title=None, frameon=False, prop={"size":26})
plt.xlim(xmin=1.0)
plt.ylim(ymin=0.0)
plt.tight_layout()
fig.savefig("SA.pdf", transparent=True, bbox_inches='tight')

In [None]:
# set shade=False and run cell 3 before running this cell
names = ["MUBD ligands", "MUBDreal decoys", "MUBDsyn decoys"]
for idx, line in enumerate(ax1.get_lines()):
    x, y = line.get_data()
    mean = np.dot(y, x) / y.sum()
    std = np.dot(y, x**2) / y.sum() - mean**2
    print(names[idx])
    print(f"{chr(956)}: {mean:.2f}", f"{chr(963)}: {std:.2f}")