<a href="https://colab.research.google.com/github/Monocyte572/autodock-vina-Error-/blob/main/autodock.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# smiles to pdbqt

In [4]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import MolToPDBBlock
import os
import traceback

csv_path = "/content/train.csv"
output_folder = "/content/output_folder"

df = pd.read_csv(csv_path)

os.makedirs(output_folder, exist_ok=True)

log_file = os.path.join(output_folder, "error_log.txt")

success_df = pd.DataFrame(columns=df.columns)

with open(log_file, "w") as log:
    for index, row in df.iterrows():
        smiles = row["Canonical_Smiles"]
        compound_name = row["ID"]

        try:
            if pd.isna(smiles) or not Chem.MolFromSmiles(smiles):
                raise ValueError(f"Invalid SMILES in row {index + 2}")

            mol = Chem.MolFromSmiles(smiles)
            mol = Chem.AddHs(mol)
            AllChem.EmbedMolecule(mol)

            pdbqt_filename = os.path.join(output_folder, f"{compound_name}.pdbqt")
            with open(pdbqt_filename, "w") as pdbqt_file:
                pdbqt_file.write(MolToPDBBlock(mol))


            success_df = success_df.append(row, ignore_index=True)

        except Exception as e:
            log.write(f"Error processing row {index + 2}: {str(e)}\n")
            traceback.print_exc(file=log)


success_df.to_csv(csv_path, index=False)

print("Conversion complete. Check the error log for details.")


Conversion complete. Check the error log for details.


In [2]:
!pip install rdkit

Collecting rdkit
  Downloading rdkit-2025.3.3-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.0 kB)
Downloading rdkit-2025.3.3-cp311-cp311-manylinux_2_28_x86_64.whl (34.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.9/34.9 MB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdkit
Successfully installed rdkit-2025.3.3


# autodock vina

In [None]:
#!/usr/bin/env python
import os
from glob import glob
from tqdm import tqdm
from multiprocessing import Pool
from tempfile import TemporaryDirectory
import subprocess
import warnings
from pathlib import Path
import shutil
import pandas as pd
from functools import partial
from pprint import pprint
import sys
from scripts.prepare_receptor import ADFRSUITE
if ADFRSUITE not in sys.path:
    sys.path.append(ADFRSUITE)
from scripts import (
    run_docking,
    run_prepare_ligand,
    run_prepare_receptor,
    get_docking_details,
)
warnings.filterwarnings("ignore")

def to_sdf(ligand_path, out_path):
    """
    Convert a ligand file to a 3D SDF file

    Args:
        ligand_path (str): Path to the input ligand file
        out_path (str): Path to the output SDF file (must end with .sdf)
    """
    subprocess.run(
        ["obabel", ligand_path, "-O", out_path, "--gen3d"],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL,
    )

def run_prepare_ligand_pipeline(ligand_path, output_dir):
    with TemporaryDirectory() as td:
        sdf_path = f"{td}/{Path(ligand_path).stem}.sdf"
        to_sdf(ligand_path=ligand_path, out_path=sdf_path)
        run_prepare_ligand(
            ligand_path=sdf_path, output_dir=output_dir
        )

def docking_pfunc(input_vals):
    """
    Primary docking function for multiprocessing

    Args:
        input_vals (tuple): A tuple containing the receptor file path,
                            ligand file path, docking results directory path,
                            and score_only flag

    Returns:
        tuple: (receptor_name, ligand_name, affinity, docking_result_dir)
    """
    return run_docking(*input_vals)

def main():
    from argparse import ArgumentParser

    parser = ArgumentParser(add_help=True)
    parser.add_argument(
        "-l", "--ligand_dir",
        type=str, required=True,
        help="Path to the ligand directory; supports mol, mol2, sdf formats"
    )
    parser.add_argument(
        "-r", "--receptor_dir",
        type=str, required=True,
        help="Path to the receptor directory; supports cif(mmcif), pdb, pdbqt formats"
    )
    parser.add_argument(
        "-o", "--output_dir",
        type=str, required=True,
        help="Output directory path"
    )
    parser.add_argument(
        "-n", "--n_workers",
        type=int, default=os.cpu_count(),
        help="Number of worker processes"
    )
    parser.add_argument(
        "-e", "--multitasking_engine",
        type=str, default="process", choices=["process", "thread"],
        help="Parallelization engine"
    )
    parser.add_argument(
        "-d", "--distance_threshold",
        type=float, default=4.0,
        help="Distance threshold (in angstroms) for identifying interacting residues"
    )
    parser.add_argument(
        "-k", "--do_search_key_sites",
        action="store_true",
        help="Enable key site search"
    )
    parser.add_argument(
        "-b", "--buffer",
        type=int, default=5,
        help="Buffer (in residue IDs) around interacting residues to include in key site search"
    )
    parser.add_argument(
        "-ko", "--search_key_sites_outdir",
        type=str, default=None,
        help="Output directory for key sites search results"
    )
    parser.add_argument(
        "-so", "--score_only",
        action="store_true",
        help="Only compute docking scores without pose output"
    )
    args = parser.parse_args()
    pprint(vars(args))

    ligand_files = glob(f"{args.ligand_dir}/*")
    receptor_files = glob(f"{args.receptor_dir}/*")

    prepared_ligand_dir = f"{args.output_dir}/ligands_pdbqt"
    prepared_receptor_dir = f"{args.output_dir}/receptors_pdbqt"
    docking_results_dir = f"{args.output_dir}/docking_results"

    Path(prepared_ligand_dir).mkdir(parents=True, exist_ok=True)
    Path(prepared_receptor_dir).mkdir(parents=True, exist_ok=True)
    Path(docking_results_dir).mkdir(parents=True, exist_ok=True)

    # Prepare ligands in parallel
    with Pool(args.n_workers) as p:
        pfunc = partial(run_prepare_ligand_pipeline, output_dir=prepared_ligand_dir)
        list(
            tqdm(
                p.imap(pfunc, ligand_files),
                total=len(ligand_files),
                desc="prepare ligands",
            )
        )

    # Prepare receptors in parallel
    with Pool(args.n_workers) as p:
        pfunc = partial(run_prepare_receptor, output_dir=prepared_receptor_dir)
        list(
            tqdm(
                p.imap(pfunc, receptor_files),
                total=len(receptor_files),
                desc="prepare receptors",
            )
        )

    # Build docking input list
    docking_inputs = [
        [receptor, ligand, docking_results_dir, args.score_only]
        for ligand in glob(f"{prepared_ligand_dir}/*")
        for receptor in glob(f"{prepared_receptor_dir}/*")
    ]

    # Run docking in parallel
    with Pool(args.n_workers) as p:
        docking_results = list(
            tqdm(
                p.imap(docking_pfunc, docking_inputs),
                total=len(docking_inputs),
                desc="docking",
            )
        )

    # Save summary results
    result_df = pd.DataFrame(
        docking_results,
        columns=["receptor", "ligand", "affinity", "docking_result_dir"],
    )
    result_save_path = str(Path(args.output_dir, "docking_results.csv"))
    result_df.to_csv(result_save_path, index=False)

    # Detailed docking report
    get_docking_details(
        **{
            **vars(args),
            **dict(
                docking_results_path=result_save_path,
                out_filepath=str(Path(args.output_dir, "docking_results_details.csv")),
            ),
        }
    )

if __name__ == "__main__":
    main()
