# **prepare current working directory (cwd)**

In [None]:
%%bash
# remove everything except this notebook
shopt -s extglob
rm -rf !("notebook.ipynb") 
# to test any error in the terminal, use:
# ipython3 -c "%run notebook.ipynb"

In [None]:
# import tools
import sys, os; sys.path.append(os.path.realpath(os.path.join(os.getcwd(), "../../")))
from tools import *
# while updating modules
# import importlib; importlib.reload(visualize_molecules)
# import importlib; importlib.reload(utils)

import ipywidgets
import glob
import ast
from tqdm import tqdm
import pandas as pd

import rdkit
from rdkit import Chem
import meeko
from meeko import MoleculePreparation
from meeko import PDBQTMolecule
from meeko import RDKitMolCreate
from pymol import cmd
from vina import Vina

import shutil
from pdbfixer import PDBFixer
from openmm.app import PDBFile

# Global variables
dirs_dict = {'raw_ligands':'./raw_ligands/', 'prepared_ligands':'./prepared_ligands/', 
             'raw_receptors':'./raw_receptors/', 'prepared_receptors':'./prepared_receptors/', 
             'vina_scoring':'./vina_scoring/', 'vina_docking':'./vina_docking/'}
vina_seed = 1

In [None]:
# create empty directories
directory_scraping.prepare_directory_from_dict(dirs_dict)

In [None]:
# list directory before execution
!tree .

# **build and prepare ligands**

for comments and more info about this section, take a look at notebook in directory 'example_notebooks/example04 - prepare ligands'

In [None]:
%%file {dirs_dict['raw_ligands']}list_ligands.txt
O=Cc1ccc(O)c(OC)c1
COc1cc(C=O)ccc1O
CC(=O)NCCC1=CNc2c1cc(OC)cc2
CC(=O)NCCc1c[nH]c2ccc(OC)cc12

In [None]:
# create dataframe to register the optimal box size for each ligand
ligands_opt_box_size = pd.DataFrame(columns=['ligand_name', 'optimal_box_size', 'euclid_dist_origin', 'ligand_center'])

with open(dirs_dict['raw_ligands']+'list_ligands.txt') as f:
    ligand_smis = f.readlines()

# clear output directory
directory_scraping.mkdir_or_clear(dirs_dict['prepared_ligands'])

# print number of smiles in file
print("len(ligand_smis)", len(ligand_smis))
    
error_ligand_smis = []
for idx, ligand_smi in enumerate(tqdm(ligand_smis), start=1):
    # print("ligand_smi", ligand_smi)
    try:
        ligand_name = 'ligand_' + f'{idx:03}'
        ligand_smi = ligand_smi.strip()
        # print(ligand_name, ligand_smi)
        # load our ligand from SMILES into RDKit
        lig = rdkit.Chem.MolFromSmiles(ligand_smi)
        # add hydrogens (without regard to pH) 
        protonated_lig = rdkit.Chem.AddHs(lig)
        # generate 3D coordinates for the ligand
        rdkit.Chem.AllChem.EmbedMolecule(protonated_lig)
        # convert to a PDBQT string using the MoleculePreparation class from Meeko.
        meeko_prep = meeko.MoleculePreparation()
        meeko_prep.prepare(protonated_lig)
        lig_pdbqt = meeko_prep.write_pdbqt_string()
        # At this point, pdbqt_string can be written to a file for docking with AutoDock-GPU or Vina, or passed directly to Vina within Python using set_ligand_from_string(pdbqt_string)
        out_file_path = dirs_dict['prepared_ligands'] + ligand_name + '.pdbqt'
        with open(out_file_path, "w") as text_file:
            text_file.write(lig_pdbqt)
        # compute optimal box size using https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4468813/
        optimal_box_size = ligand_preparation.get_optimal_box_size_for_ligand(out_file_path)
        # print("optimal_box_size", optimal_box_size)
        # compute ligand center to make sure it is close to the origin
        ligand_center, euclid_dist_origin = ligand_preparation.get_ligand_center(meeko_prep)
        # update dataframe
        ligands_opt_box_size = ligands_opt_box_size.append({'ligand_name':ligand_name, 'optimal_box_size':optimal_box_size, 'ligand_center':ligand_center, 'euclid_dist_origin':euclid_dist_origin}, ignore_index=True)
    except Exception as e:
        # print("An exception occurred with smi:\n", ligand_smi)
        # print("exception", e)
        error_ligand_smis.append(ligand_smi)
        
# save dataframe to csv file
ligands_opt_box_size.to_csv(dirs_dict['prepared_ligands']+'ligands_opt_box_size.csv', sep='\t', index=False)
# print dataframe
ligands_opt_box_size

In [None]:
# print all problematic smiles
for lig in error_ligand_smis:
    print(lig)

# **download and prepare receptors with pockets**

## batch download receptors

for comments and more info about this section, take a look at notebook in directory 'example02 - download raw molecules'

In [None]:
%%file {dirs_dict['raw_receptors']}list_receptors.txt
3P0V, 3GKW, 5GTY

In [None]:
output = receptor_preparation.batch_download_receptors(dirs_dict['raw_receptors']+'list_receptors.txt', dirs_dict['raw_receptors'])
print(output)
%ls {dirs_dict['raw_receptors']} -l

## clean receptors and convert them to pdbqt

for comments and more info about this section, take a look at notebook in directory 'example03 - prepare receptors'

In [None]:
pdg_gz_files,_= directory_scraping.list_files_in_dir(directory=dirs_dict['raw_receptors'], search_pattern='*.pdb.gz')
# extract/decompress gz files
for pdg_gz_file in tqdm(pdg_gz_files):
    # print(pdg_gz_file)
    !gzip -d $pdg_gz_file
%ls {dirs_dict['raw_receptors']} -l

In [None]:
receptor_paths, receptor_names = directory_scraping.list_files_in_dir(directory=dirs_dict['raw_receptors'], search_pattern='*.pdb*')

errors_and_warnings = []
for i,receptor_path in enumerate(tqdm(receptor_paths)):
    # print("\n", "-"*50, "\n\n", receptor_path, "\n")
    receptor_name = receptor_names[i];
    # check if output pdbqt file already exists
    if os.path.exists(dirs_dict['prepared_receptors'] + receptor_name + ".pdbqt"):
        continue
    # Summarizes the contents of a PDB file, like the wc command in UNIX.
    # !pdb_wc $receptor_path
    # -------------------------------------------------------------------------------
    # step01: remove extra chains if more than one
    fixer = PDBFixer(filename=receptor_path)
    numChains = len(list(fixer.topology.chains()))
    if numChains > 1:
        step01_out = dirs_dict['prepared_receptors'] + receptor_name + "_step01.pdb"
        # !pdb_selchain -A {receptor_path} > {step01_out}
        fixer.removeChains(range(1, numChains))
        PDBFile.writeFile(fixer.topology, fixer.positions, open(step01_out, 'w'))
        errors_and_warnings.append(("warning", "n_chains=" + str(numChains), receptor_name))
    else:
        step01_out = receptor_path
    # continue if last step didn't work
    if not os.path.exists(step01_out):
        errors_and_warnings.append(("error", "performing step_01", receptor_name))
        continue
    # -------------------------------------------------------------------------------
    # step02: remove water molecules
    # [-U]  cleanup type:
    #              'nphs': merge charges and remove non-polar hydrogens
    #              'lps': merge charges and remove lone pairs
    #              'waters': remove water residues
    #              'nonstdres': remove chains composed entirely of residues of
    #                       types other than the standard 20 amino acids
    #              'deleteAltB': remove XX@B atoms and rename XX@A atoms->XX
    #              (default is 'nphs_lps_waters_nonstdres')
    step02_out = dirs_dict['prepared_receptors'] + receptor_name + "_step02.pdbqt"
    !prepare_receptor -r {step01_out} -o {step02_out} -U 'waters'
    # continue if last step didn't work
    if not os.path.exists(step02_out):
        errors_and_warnings.append(("error", "performing step_02", receptor_name))
        continue
    # -------------------------------------------------------------------------------
    # [-e]  delete every nonstd residue from any chain
    #               'True': any residue whose name is not in this list:
    #                       ['CYS','ILE','SER','VAL','GLN','LYS','ASN', 
    #                       'PRO','THR','PHE','ALA','HIS','GLY','ASP', 
    #                       'LEU', 'ARG', 'TRP', 'GLU', 'TYR','MET', 
    #                       'HID', 'HSP', 'HIE', 'HIP', 'CYX', 'CSS']
    #               will be deleted from any chain. 
    #               NB: there are no  nucleic acid residue names at all 
    #               in the list and no metals. 
    #              (default is False which means not to do this)
    step03_out = dirs_dict['prepared_receptors'] + receptor_name + "_step03.pdbqt"
    !prepare_receptor -r {step02_out} -o {step03_out} -e
    # continue if last step didn't work
    if not os.path.exists(step03_out):
        errors_and_warnings.append(("error", "performing step_03", receptor_name))
        continue
    # -------------------------------------------------------------------------------
    # [-A]  type(s) of repairs to make: 
    #              'bonds_hydrogens': build bonds and add hydrogens 
    #              'bonds': build a single bond from each atom with no bonds to its closest neighbor
    #              'hydrogens': add hydrogens
    #              'checkhydrogens': add hydrogens only if there are none already
    #              'None': do not make any repairs 
    #              (default is 'None')
    step04_out = dirs_dict['prepared_receptors'] + receptor_name + "_step04.pdbqt"
    !prepare_receptor -r {step03_out} -o {step04_out} -A 'hydrogens'
    # continue if last step didn't work
    if not os.path.exists(step04_out):
        errors_and_warnings.append(("error", "performing step_04", receptor_name))
        continue
    # -------------------------------------------------------------------------------
    # change name to last step and remove all middle-stage files
    !mv {step04_out} {dirs_dict['prepared_receptors']}{receptor_name}".pdbqt"
    !rm {dirs_dict['prepared_receptors']}*_step*
    # -------------------------------------------------------------------------------
# print and count files in output dir
%ls {dirs_dict['prepared_receptors']} -l
!ls -1q {dirs_dict['prepared_receptors']} | wc -l

In [None]:
errors_and_warnings
# print only different
# set([x[1] for x in errors_and_warnings])

## find pockets in receptors

for comments and more info about this section, take a look at notebook in directory 'example07 - find pockets in prepared_receptors'

In [None]:
# List prepared_receptors
receptor_paths, receptor_names = directory_scraping.list_files_in_dir(directory=dirs_dict['prepared_receptors'], search_pattern='*.pdb*')

In [None]:
# Execute fpocket for each receptor
for i,receptor_path in enumerate(tqdm(receptor_paths)):
    # print("\n", "-"*50, "\n\n", receptor_path, "\n")
    receptor_name = receptor_names[i]; #print(receptor_name)
    # -------------------------------------------------------------------------------
    # step01: compute pockets for each receptor
    !fpocket -f {receptor_path} -d > 'prepared_receptors/fp_'{receptor_name}'.csv'
    # print("fpocket finished running")
print("\n", "-"*50, "\n")
%ls {dirs_dict['prepared_receptors']} -l

In [None]:
# Summarize fpocket data into tables
list_pockets_data = []
for i,receptor_path in enumerate(tqdm(receptor_paths)):
    # print("\n", "-"*50, "\n\n", receptor_path, "\n")
    receptor_name = receptor_names[i];
    pocket_paths = sorted(glob.glob(dirs_dict['prepared_receptors'] + receptor_name + '_out/*.pqr'))
    # pockets_data = pd.read_csv(dirs_dict['prepared_receptors']+'fp_' + receptor_name + '.csv',sep=' ',index_col=[0])
    pockets_data = pd.read_csv(dirs_dict['prepared_receptors']+'fp_' + receptor_name + '.csv',sep=' ',index_col=False).set_index('cav_id')
    for cav_id, pocket_path in enumerate(pocket_paths, start=1):    
        drug_score = pockets_data["drug_score"].loc[cav_id]
        # compute the box center and size out from the pocket
        pocket_num = cav_id
        cmd.load(filename=pocket_path,format='pqr',object=pocket_num)
        center,size = utils.getbox(selection=pocket_num,extending=0,software='vina')
        center = [center['center_x'], center['center_y'], center['center_z']]
        size = [size['size_x'], size['size_y'], size['size_z']]
        size_max = max(size)
        pockets_data.loc[pocket_num,'receptor_name'] = receptor_name
        pockets_data.loc[pocket_num,'center'] = str(center)
        pockets_data.loc[pocket_num,'size'] = str(size)
        pockets_data.loc[pocket_num,'size_max'] = size_max
    # update pockets data list index. add 'drug_id' to the beginning of each number
    pockets_data.index = pockets_data.index.map(str)
    pockets_data.index = receptor_name + '_' + pockets_data.index
    # append it to list of dataframes
    list_pockets_data.append(pockets_data)

# Merge pockets data and sort it by the drugability score
merged_pockets_data = pd.concat(list_pockets_data).sort_values(by=['drug_score'], ascending=False)
# Save merged data to csv file
merged_pockets_data.to_csv(dirs_dict['prepared_receptors']+'merged_pockets_data.csv', sep='\t')
# Re-read file to make sure everything is fine
# merged_pockets_data = pd.read_csv(dirs_dict['prepared_receptors']+'merged_pockets_data.csv',sep='\t')

In [None]:
# Inspect pockets_data to decide on which pockets will be used from here onwards
merged_pockets_data = pd.read_csv(dirs_dict['prepared_receptors']+'merged_pockets_data.csv',sep='\t')
columns_to_keep = ['cav_id', 'drug_score', 'receptor_name', 'center', 'size', 'size_max', 
                   # 'volume', 'hydrophobicity_score', 'volume_score', 'charge_score','polarity_score', 
                  ]
merged_pockets_data = merged_pockets_data[columns_to_keep]
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(merged_pockets_data)

# **vina-score all ligands into filtered-pockets**

for comments and more info about this section, take a look at notebooks in directories 'example08 - score all ligand per protein pockets' and 'example05 - locate ligand inside the box'

In [None]:
merged_pockets_data = pd.read_csv(dirs_dict['prepared_receptors']+'merged_pockets_data.csv',sep='\t')
columns_to_keep = ['cav_id', 'drug_score', 'receptor_name', 'center', 'size', 'size_max', 
                   # 'volume', 'hydrophobicity_score', 'volume_score', 'charge_score','polarity_score', 
                  ]
merged_pockets_data = merged_pockets_data[columns_to_keep]

In [None]:
# when creating the box around the pocket, extend it by a few angstroms on each dimension
box_extra_angstroms = 1

# List prepared molecules
receptor_paths, receptor_names = directory_scraping.list_files_in_dir(directory=dirs_dict['prepared_receptors'], search_pattern='*.pdb*')
ligand_paths, ligand_names = directory_scraping.list_files_in_dir(directory=dirs_dict['prepared_ligands'], search_pattern='*.pdb*')

# load ligands_opt_box_size from csv file
ligands_opt_box_size = pd.read_csv(dirs_dict['prepared_ligands']+'ligands_opt_box_size.csv',sep='\t')
ligands_opt_box_size

In [None]:
# IMPORTANT: CHANGE THIS CELL AS YOU PREFER
# filter the pockets according to your needs
drug_score_min = 0.5
max_num_pockets = 20

pockets_data = merged_pockets_data[merged_pockets_data['drug_score'] >= drug_score_min] \
                        .sort_values(by='drug_score', ascending=False) \
                        [0:max_num_pockets]
pockets_data

In [None]:
# %%time
# %%capture --no-stdout

# Create new dataframe to save vina scores
vina_scores = pd.DataFrame(columns=['receptor_name', 'pocket_id', 'ligand_name', 'pocket_box_center', 'vina_box_size', 'vina_score_before_min', 'vina_score_after_min', 'output_path'])

# Iterate over receptors
for receptor_idx,receptor_path in enumerate(tqdm(receptor_paths)):
    # print("\n\n"+"-"*50, "RECEPTOR {} out of {}".format(i+1, len(receptor_paths)), receptor_path, sep='\n')
    receptor_name = receptor_names[receptor_idx];
    # Iterate over the pockets available for the current receptor
    for pocket_idx, pocket_row in tqdm(pockets_data[pockets_data['receptor_name'] == receptor_name].sort_values(by='drug_score', ascending=False).iterrows()):
        pocket_cav_id = pocket_row['cav_id']
        # print('Pocket number: ', pocket_id)
        # print('drug_score: ', pocket_row['drug_score'])
        pocket_box_center = ast.literal_eval(pocket_row['center'])
        # print("pocket_box_center:", pocket_box_center)
        pocket_box_size = pocket_row['size_max']
        errors = []
        # Iterate over ligands
        for ligand_idx,ligand_path in enumerate(tqdm(ligand_paths)):
            try:
                # print("\n\t"+"-"*50, "\tLIGAND {} out of {}".format(j+1, len(ligand_paths)),"\t" + ligand_path + "\n", sep='\n')
                ligand_name = ligand_names[ligand_idx];
                # Choose a box size for vina taking into consideration the size of the pocket and the size of the ligand
                ligand_box_size = ligands_opt_box_size.loc[ligands_opt_box_size['ligand_name'] == ligand_name].iloc[0]['optimal_box_size']
                # print("pocket_box_size:", pocket_box_size, "ligand_box_size", ligand_box_size)
                # print("pocket_box_size:", type(pocket_box_size), "ligand_box_size", type(ligand_box_size))
                vina_box_size = [max(pocket_box_size, ligand_box_size) + box_extra_angstroms] * 3
                # print("vina_box_size", vina_box_size)
                # Load ligand and move it to the center of the box
                lig_pdbqt_string = ligand_preparation.translate_ligand_from_pdbqt(pdbqt_path=ligand_path, new_center=pocket_box_center, verbose=False)
                # Prepare vina object
                v = Vina(sf_name='vina', seed=vina_seed)  # default values: class vina.vina.Vina(sf_name='vina', cpu=0, seed=0, no_refine=False, verbosity=1)
                v.set_receptor(receptor_path)
                v.set_ligand_from_string(lig_pdbqt_string)
                v.compute_vina_maps(center=pocket_box_center, box_size=vina_box_size)
                # Score the current pose
                energy = v.score()
                vina_score_before_min = energy[0]
                # print('Score before minimization: %.3f (kcal/mol)' % vina_score_before_min)

                # Minimized locally the current pose
                energy_minimized = v.optimize()
                vina_score_after_min = energy_minimized[0]
                # print('Score after minimization : %.3f (kcal/mol)' % vina_score_after_min)
                output_path = dirs_dict['vina_scoring'] + pocket_cav_id + '_' + ligand_name + '_minimized.pdbqt'
                v.write_pose(output_path, overwrite=True)

                # Save vina score in dataframe
                vina_scores = vina_scores.append({'receptor_name': receptor_name, 'pocket_id': pocket_cav_id, 'ligand_name': ligand_name, 
                                                  'pocket_box_center': pocket_box_center, 'vina_box_size': vina_box_size, 
                                                  'vina_score_before_min': vina_score_before_min, 'vina_score_after_min': vina_score_after_min,
                                                 'output_path': output_path}, ignore_index=True)
            except Exception as e:
                # print("An exception occurred")
                # print("exception", e)
                errors.append((receptor_name, pocket_cav_id, ligand_name))
            # -----------------------------
# Save vina_scoring data to csv file
vina_scores = vina_scores.sort_values(by='vina_score_after_min', ascending=True)
vina_scores.to_csv(dirs_dict['vina_scoring']+'vina_scores.csv', sep='\t', index=False)
# print("\n", "-"*50, "\n")
%ls {dirs_dict['vina_scoring']} -l

In [None]:
# print info from all errors
for error_info in errors:
    print(error_info)

In [None]:
# load data from file
vina_scores = pd.read_csv(dirs_dict['vina_scoring']+'vina_scores.csv',sep='\t')
# Inspect vina_scoring data to decide on which dockings will be used from here onwards
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(vina_scores)

# **vina-dock filtered-ligands into filtered-pockets**

for comments and more info about this section, take a look at notebook in directory 'example09 - multiple docking'

In [None]:
# IMPORTANT: CHANGE THIS CELL AS YOU PREFER
# filter the vina_scores according to your needs
vina_score_max = -1
max_num_dockings = 20

vina_scores = pd.read_csv(dirs_dict['vina_scoring']+'vina_scores.csv',sep='\t')
vina_scores_filtered = vina_scores[vina_scores['vina_score_after_min'] <= vina_score_max] \
                        .sort_values(by='vina_score_after_min', ascending=True) \
                        [0:max_num_dockings]
vina_scores_filtered

<span style='background:lightblue'> **Important** </span>

<span style='background:lightblue'>
from the official website: https://autodock-vina.readthedocs.io/en/latest/docking_python.html
</span>

```python
# Dock the ligand
v.dock(exhaustiveness=32, n_poses=20)
v.write_poses('1iep_ligand_vina_out.pdbqt', n_poses=5, overwrite=True)
```

<span style='background:lightblue'>
Finally, we run the molecular docking. Here we will ask Vina to run 32 consecutive Monte-Carlo samplings using the `exhaustiveness` argument and store 20 poses (n_poses) during the search. At the end, we will write a PDBQT file called `1iep_ligand_vina_out.pdbqt` containing only the 5 first poses (`n_poses`), ranked by score. Of course, this can be change to 20 to include all the poses that were saved during the calculations, at the condition that the energy difference between the best pose and the 20th pose if less than 3 kcal/mol. This behavior can be changed using the `energy_range` argument to an higher value.
</span>

In [None]:
vina_exhaustiveness=32    # default:32 
vina_n_poses_to_dock=20   # default:20
vina_n_poses_to_write=5  # default:5

In [None]:
# Create new dataframe to save vina scores
vina_dockings = pd.DataFrame(columns=['receptor_name', 'pocket_id', 'ligand_name', 'vina_docking_best', 'vina_docking_values', 'pocket_box_center', 'vina_box_size', 'output_path'])

errors = []
for scoring_row_idx, scoring_row in tqdm(vina_scores_filtered.iterrows()):
# for scoring_row_idx, scoring_row in tqdm(vina_scores_filtered.iterrows()):
    print("-"*200)
    try:
        # print("scoring_row", scoring_row)
        receptor_name = scoring_row['receptor_name']
        receptor_path = dirs_dict['prepared_receptors'] + receptor_name + '.pdbqt'
        pocket_id = scoring_row['pocket_id']
        pocket_box_center = ast.literal_eval(scoring_row['pocket_box_center'])
        vina_box_size = ast.literal_eval(scoring_row['vina_box_size'])
        ligand_name = scoring_row['ligand_name']
        ligand_path = dirs_dict['prepared_ligands'] + ligand_name + '.pdbqt'
        lig_pdbqt_string = ligand_preparation.translate_ligand_from_pdbqt(pdbqt_path=ligand_path, new_center=pocket_box_center, verbose=False)
        # print("scoring_row_idx", scoring_row_idx, "pocket_id", pocket_id, "ligand_name", ligand_name)
        display(vina_scores_filtered.iloc[[scoring_row_idx]])

        # Prepare vina object
        v = Vina(sf_name='vina', seed=vina_seed)  # default values: class vina.vina.Vina(sf_name='vina', cpu=0, seed=0, no_refine=False, verbosity=1)
        v.set_receptor(receptor_path)
        v.set_ligand_from_string(lig_pdbqt_string)
        v.compute_vina_maps(center=pocket_box_center, box_size=vina_box_size)

        # Score the current pose
        energy = v.score()
        vina_score_before_min = energy[0]
        print('Score before minimization: %.3f (kcal/mol)' % vina_score_before_min)

        # Minimized locally the current pose
        energy_minimized = v.optimize()
        vina_score_after_min = energy_minimized[0]
        print('Score after minimization : %.3f (kcal/mol)' % vina_score_after_min)
        # v.write_pose(dirs_dict['vina_scoring'] + pocket_cav_id + '_' + ligand_name + '_minimized.pdbqt', overwrite=True)

        # Dock the ligand
        v.dock(exhaustiveness=vina_exhaustiveness, n_poses=vina_n_poses_to_dock)
        output_path = dirs_dict['vina_docking'] + pocket_id + '_' + ligand_name + '_vina_out.pdbqt'
        v.write_poses(output_path, n_poses=vina_n_poses_to_write, overwrite=True)
        print("docking finished")

        # Get docking values from output
        vina_docking_values = utils.get_scores_from_pdbqt(output_path)

        # Save vina score in dataframe
        vina_dockings = vina_dockings.append({'receptor_name': receptor_name, 'pocket_id': pocket_id, 'ligand_name': ligand_name, 
                                              'vina_docking_best': min(vina_docking_values), 'vina_docking_values': vina_docking_values,
                                              'pocket_box_center': pocket_box_center, 'vina_box_size': vina_box_size,
                                              'output_path': output_path}, ignore_index=True)
    except Exception as e:
        # print("An exception occurred")
        # print("exception", e)
        errors.append((receptor_name, pocket_cav_id, ligand_name))

# Save vina_scoring data to csv file
vina_dockings = vina_dockings.sort_values(by='vina_docking_best', ascending=True)
vina_dockings.to_csv(dirs_dict['vina_docking']+'vina_dockings.csv', sep='\t', index=False)
%ls {dirs_dict['vina_docking']} -l    

In [None]:
# print info from all errors
for error_info in errors:
    print(error_info)

In [None]:
vina_dockings

## list directory after execution

In [None]:
!tree .