# **prepare current working directory (cwd)**

In [None]:
%%bash
# remove everything except this notebook
shopt -s extglob
rm -rf !("notebook.ipynb") 
# to test any error in the terminal, use:
# ipython3 -c "%run notebook.ipynb"

In [None]:
# import tools
import sys, os; sys.path.append(os.path.realpath(os.path.join(os.getcwd(), "../../")))
from tools import *
# while updating modules
# import importlib; importlib.reload(visualize_molecules)
# import importlib; importlib.reload(utils)

import ipywidgets
import glob
import ast
from tqdm import tqdm
import pandas as pd

import rdkit
from rdkit import Chem
import meeko
from meeko import MoleculePreparation
from meeko import PDBQTMolecule
from meeko import RDKitMolCreate
from pymol import cmd
from vina import Vina

import shutil
from pdbfixer import PDBFixer
from openmm.app import PDBFile

# Global variables
dirs_dict = {
             'raw_ligands':'./raw_ligands/', 
             'prepared_ligands':'./prepared_ligands/', 
             # 'raw_receptors':'./raw_receptors/', 
             # 'prepared_receptors':'./prepared_receptors/', 
             # 'vina_scoring':'./vina_scoring/', 
             # 'vina_docking':'./vina_docking/'
            }
vina_seed = 1

In [None]:
# create empty directories
directory_scraping.prepare_directory_from_dict(dirs_dict)

In [None]:
# list directory before execution
!tree .

# **build and prepare ligands**

for comments and more info about this section, take a look at notebook in directory 'example_notebooks/example04 - prepare ligands'

In [None]:
%%file {dirs_dict['raw_ligands']}list_ligands.txt
O=Cc1ccc(O)c(OC)c1
COc1cc(C=O)ccc1O
CC(=O)NCCC1=CNc2c1cc(OC)cc2
CC(=O)NCCc1c[nH]c2ccc(OC)cc12

In [None]:
# create dataframe to register the optimal box size for each ligand
ligands_opt_box_size = pd.DataFrame(columns=['ligand_name', 'optimal_box_size', 'euclid_dist_origin', 'ligand_center'])

with open(dirs_dict['raw_ligands']+'list_ligands.txt') as f:
    ligand_smis = f.readlines()

# clear output directory
directory_scraping.mkdir_or_clear(dirs_dict['prepared_ligands'])

# print number of smiles in file
print("len(ligand_smis)", len(ligand_smis))
    
error_ligand_smis = []
for idx, ligand_smi in enumerate(tqdm(ligand_smis), start=1):
    # print("ligand_smi", ligand_smi)
    try:
        ligand_name = 'ligand_' + f'{idx:03}'
        ligand_smi = ligand_smi.strip()
        # print(ligand_name, ligand_smi)
        # load our ligand from SMILES into RDKit
        lig = rdkit.Chem.MolFromSmiles(ligand_smi)
        # add hydrogens (without regard to pH) 
        protonated_lig = rdkit.Chem.AddHs(lig)
        # generate 3D coordinates for the ligand
        rdkit.Chem.AllChem.EmbedMolecule(protonated_lig)
        # convert to a PDBQT string using the MoleculePreparation class from Meeko.
        meeko_prep = meeko.MoleculePreparation()
        meeko_prep.prepare(protonated_lig)
        lig_pdbqt = meeko_prep.write_pdbqt_string()
        # At this point, pdbqt_string can be written to a file for docking with AutoDock-GPU or Vina, or passed directly to Vina within Python using set_ligand_from_string(pdbqt_string)
        out_file_path = dirs_dict['prepared_ligands'] + ligand_name + '.pdbqt'
        with open(out_file_path, "w") as text_file:
            text_file.write(lig_pdbqt)
        # compute optimal box size using https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4468813/
        optimal_box_size = ligand_preparation.get_optimal_box_size_for_ligand(out_file_path)
        # print("optimal_box_size", optimal_box_size)
        # compute ligand center to make sure it is close to the origin
        ligand_center, euclid_dist_origin = ligand_preparation.get_ligand_center(meeko_prep)
        # update dataframe
        ligands_opt_box_size = ligands_opt_box_size.append({'ligand_name':ligand_name, 'optimal_box_size':optimal_box_size, 'ligand_center':ligand_center, 'euclid_dist_origin':euclid_dist_origin}, ignore_index=True)
    except Exception as e:
        # print("An exception occurred with smi:\n", ligand_smi)
        # print("exception", e)
        error_ligand_smis.append(ligand_smi)
        
# save dataframe to csv file
ligands_opt_box_size.to_csv(dirs_dict['prepared_ligands']+'ligands_opt_box_size.csv', sep='\t', index=False)
# print dataframe
ligands_opt_box_size

In [None]:
# print all problematic smiles
for lig in error_ligand_smis:
    print(lig)

# **export output to a safe directory**

In [None]:
!cp -r {dirs_dict['prepared_ligands']}* ../../../data/prev_prepared_molecules/prev_prepared_ligands/

In [None]:
%%bash
# remove everything except this notebook
shopt -s extglob
rm -rf !("notebook.ipynb")

# clear all output and save this notebook