In [193]:
# Import modules for our exercise
import numpy as np     # numerical package
import pandas as pd    # data organization package
import matplotlib.pyplot as plt # plotting package
import os              # operational system interface package
%matplotlib inline
from PIL import Image, ImageChops, ImageDraw, ImageFont



# Import modules for dealing with chemical information
from rdkit import Chem as Chem
from rdkit.Chem import AllChem as Chem2
from rdkit.Chem import Descriptors as Desc
from rdkit.Chem import rdmolfiles as RDFile
from rdkit.Chem import fmcs, rdFMCS, Draw, rdMolDescriptors
from rdkit.Chem.Draw import DrawingOptions

import math

import os.path as op


#some initial variables
filename="./molecules.mol2" #multimol2 with molecules to be plotted
footprint_dir="./temp" #directory with footprint images
zinc_codes_filename="./zinc_codes.txt" #file with zinc codes to be used
out_dir="./zzz.compiled_images"

if os.path.isdir(out_dir) == False:
    os.mkdir(out_dir)

#font paths
bold_font_path="/Users/Garn0123/Desktop/Grad_School/rizzo_research/2020.08.footprint_combiner/SwanseaBold-D0ox.ttf"
normal_font_path="/Users/Garn0123/Desktop/Grad_School/rizzo_research/2020.08.footprint_combiner/Swansea-q3pd.ttf"

#drawing options - makes the bond lines thicker and the atom names bigger
DrawingOptions.bondLineWidth = 3.0
DrawingOptions.atomLabelFontSize = 19

output_descriptors=["Name_DOCK",\
                    "Name_MOE",\
                    "Cluster",\
                    "Cluster_size",\
                    "Molecular_weight",\
                    "DOCK_rot_bonds",\
                    "TotalScore_(FPS+DCE)",\
                    "Continuous_Score",\
                    "Continuous_vdw_energy",\
                    "Continuous_es_energy",\
                    "Footprint_Similarity_Score",\
                    "Hungarian_Matching_Similarity_Score",\
                    "FPS_vdw_fps",\
                    "FPS_es_fps",\
                    "Internal_energy_repulsive",\
                    "Num_chiral_centers",\
                    "Num_H-bonds",\
                    "DOCK_rot_bonds",\
                    "Pharmacophore_Score",\
                    "Property_Volume_Score",\
                    "Tanimoto_Score",\
                    "Lipinski_donors",\
                    "Lipinski_acceptors",\
                    "Lipinski_violations",\
                    "Formal_charge",\
                    "SlogP",]

#####Descriptors not passed to final output
#'FPS_hb_fps': '2.236', 
#'FPS_vdw_fp_numres': '805', 
#'FPS_es_fp_numres': '805', 
#'FPS_hb_fp_numres': '805', 
#'Descriptor_Score': '-13.207468', 
#'MOE_rot_bonds': '13', 
#'Lipinski_druglike': '0', 
#'logS': '-6.9725299', 
#'Ligand_efficiency': '0.096', 
#'SMILES': 'ClCC(=O)N(C(C(=O)NC1CCCCC1)c1ccc(OCCCC)cc1)CC1Oc2c(OC1)cccc2'},


## Functions

In [196]:
# Function that reads multi-molecule MOL2 files. Adapted from:
# https://chem-workflows.com/articles/2019/07/18/building-a-multi-molecule-mol2-reader-for-rdkit/
# further adapted from function written by Guilherme Duarte, Rizzo Lab
def mol2_mol_supplier_loop(file):
    ''' This function extracts all the molecules in the multi-molecule
        MOL2 file `file` and returns a list of rdkit.Chem.rdchem.mol 
        object.
        
        Variable         I/O          dtype           default value?
        ------------------------------------------------------------
        file              I           string                  None
        mols              O           list                    N/A
        mols[i]           O           rdkit.Chem.rdchem.mol   N/A
        
    '''
    mols=[]
    mol_names=[]
    mol_descriptors={}
    compiled_descriptors=[]
    recording=False
    with open(file, 'r') as f:
        for line in f:
            
            # Determines if @<TRIPOS>MOLECULE is in line, which marks the start
            # of each molecule. Sets the recording variable to True, which is
            # the boolean to write each molecule.
            if "Name_DOCK" in line:
                mol_names.append(line.split(":")[1].strip())
                recording=True
                mol = []
                
            if recording==True and "###" in line:
                mol_descriptors[line.split(":")[0].split()[1].strip()]=line.split(":")[1].strip()
                                
            # Determines if "ROOT" is in line, which marks the end of each
            # molecule. Records the line and sets the recording variable
            # to false.
            elif ("ROOT") in line:
                mol.append(line)
                recording=False
                
                # Makes final adjustments to the data. It must look
                # like the MOL2 file of a single molecule.
                block = ",".join(mol).replace(',','')
                
                # Converts the data of a single molecule to a 
                # rdkit.Chem.rdchem.mol object.
                m=Chem.MolFromMol2Block(block,
                                        sanitize=False,
                                        cleanupSubstructures=False)
                mols.append(m)
                compiled_descriptors.append(mol_descriptors)
                mol_descriptors={}
                continue
                
            if recording==True:
                mol.append(line)
                
        return(mols,mol_names,compiled_descriptors)
                
##For image trimming
def trim(im):
    bg = Image.new(im.mode, im.size, im.getpixel((0,0)))
    diff = ImageChops.difference(im, bg)
    diff = ImageChops.add(diff, diff, 2.0, -100)
    #Bounding box given as a 4-tuple defining the left, upper, right, and lower pixel coordinates.
    #If the image is completely empty, this method returns None.
    bbox = diff.getbbox()
    if bbox:
        return im.crop(bbox)
    
#for extracting all zinc codes
def extract_zinc_codes(filename):
    zinc_code_list=[]
    with open(zinc_codes,"r") as f:
        for code in f:
            zinc_code_list.append(code.strip())
    return(zinc_code_list)

#function for generating the final function
def concatenate_image_h_text(im1, im2, mol_name,descriptors):
    #variable initialization for final set of descriptors
    output_string=""
    output_string_bold=""
    
    #open new image, and paste the footprint (im1) and 2D molecule (im2) onto it
    concat_image = Image.new('RGB',(im1.width + im2.width, im1.height),(255,255,255))
    #upper left corner paste
    concat_image.paste(im1, (0, 0))
    #paste that runs over a bit onto the footprint, since footprints have a lot of white space
    concat_image.paste(im2, (im1.width-75, 0))
    #make a drawer and initialize fonts for it
    d = ImageDraw.Draw(concat_image)
    font = ImageFont.truetype(normal_font_path,size=20)
    font_bold=ImageFont.truetype(bold_font_path,size=20)
    
    ###runs through all of the descriptors specified in the first cell and
    ###pulls their values from the molecule's descriptor list
    for entry in output_descriptors:
        #appends descriptors to strings that acts as a final text output
        output_string_bold = output_string_bold +"%s:\n" % (entry)
        output_string = output_string+"%s\n" % (descriptors[entry])
    
    bold_width, bold_height = d.textsize(output_string_bold, font=font_bold)
    #writes the text
    d.text((im1.width-75,concat_image.height/2), output_string_bold,font=font_bold,fill='black')
    d.text((im1.width-75+bold_width + 5,concat_image.height/2),output_string,font=font,fill='black')
    
    #saves the image to a PNG
    concat_image.save(out_dir+"%s_compiled.png" % (mol_name))

In [197]:
#Creates the rdkit list from an input mol2
molecule_list , molecule_names, molecule_descriptors = mol2_mol_supplier_loop(filename)

#removes hydrogens
for i in range(0,len(molecule_list)):
    molecule_list[i]=Chem.RemoveHs(molecule_list[i])

#Computes 2d coordinates
for m in molecule_list: tmp=Chem2.Compute2DCoords(m)

molecule_number=0

#runs through each molecule
for m in molecule_list:
    #generates a PIL image of the 2D molecule
    molecule_image = Chem.Draw.MolToImage(m, size = (600,600))
    #loads in the footprint image
    ### TO-DO: MAKE THE FOOTPRINT IN THIS SCRIPT
    footprint_image = Image.open("./temp/%s.png" % (molecule_names[molecule_number]))
    #finds the correct descriptors to include in the image
    for entry in molecule_descriptors:
        if entry["Name_DOCK"] == molecule_names[molecule_number]:
            pass_descriptors=entry
            break
    #calls function for making the image
    concatenate_image_h_text(footprint_image,molecule_image,molecule_names[molecule_number],pass_descriptors)
    molecule_number+=1