In [None]:
import pandas as pd
import os.path as osp
from glob import glob

from rdkit.Chem import Draw
from copy import deepcopy
from PIL import Image
import io

from rdkit import DataStructs
from utils import compute_sims, generalize, find_match, sort_lists_by_first_list, imgs2singlePDF, read_sdf, write_sdf
import argparse

In [None]:
all_mols = './tmp/delete_gen_all_mols.sdf'
query_mols = './tmp/query_mols.sdf' # query_mols is the truncated fragment files, used to highlight the substructure of all mols
ori_mol = './tmp/KRD4.sdf' # used to compute the similarity
out_pdf = './delete_gen_sim_sorted.sdf'

In [None]:
all_mols = read_sdf(all_mols)
query_mols = read_sdf(query_mols)
ori_mol = read_sdf(ori_mol)[0]

sim_mat = compute_sims([ori_mol], all_mols)
sim_metric = sim_mat[0].tolist()
sims, show_mols = sort_lists_by_first_list([sim_metric, all_mols], ascending=False) # sort similarity
print('show mols:', len(show_mols))


show_mols = deepcopy(show_mols)
[i.RemoveAllConformers() for i in show_mols] # remove conformers
number = list(range(len(show_mols)))
legends = [f'No.{i[0]}; Sim: {i[1]:.3f};' for i in zip(number, sims)]
highlight_list = [find_match(mol, query_mols) for mol in show_mols] # create highlight atoms 

# group molecules for merging
group_size = 52  
show_mols_group = [show_mols[i:i + group_size] for i in range(0, len(show_mols), group_size)]
highlight_list_group = [highlight_list[i:i + group_size] for i in range(0, len(highlight_list), group_size)]
legends_group = [legends[i:i + group_size] for i in range(0, len(legends), group_size)]


imgs = []
for show_mols, legend, highlights in zip(show_mols_group, legends_group, highlight_list_group):
    imgs.append(Draw.MolsToGridImage(show_mols, molsPerRow=4, subImgSize=(500,500), legends=legend, highlightAtomLists=highlights,maxMols=group_size))

imgs2singlePDF(imgs, out_pdf)