In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
from QligFEP.pdb_utils import (
    nest_pdb,
    unnest_pdb,
    read_pdb_to_dataframe,
    write_dataframe_to_pdb,
)
from QligFEP.CLI.pdb_to_amber import asp_search
import pandas as pd

# Define functions

In [45]:
# Need to remove Hs from GLY
# Need to cap the last residue

rename_mapping = {
    "ACE": {
        "H3": "HH33",
        "H2": "HH32",
        "H1": "HH31",
    },
    "NMA": {
        "C": "CA",
        "H3": "HA3",
        "H2": "HA2",
        "H1": "HA1",
    },
}


def reindex_pdb_residues(pdb_path: Path, out_pdb_path: str):
    pdb_df = read_pdb_to_dataframe(pdb_path)
    uniq_indexes = pdb_df.set_index(
        ["residue_seq_number", "residue_name", "chain_id", "insertion_code"]
    ).index
    resn_mapping = {resn: idx for idx, resn in enumerate(uniq_indexes.unique(), 1)}
    pdb_df["residue_seq_number"] = uniq_indexes.map(resn_mapping)
    pdb_df["insertion_code"] = ""
    # pdb_df = pdb_df.assign(residue_seq_number=uniq_indexes.map(resn_mapping))
    write_dataframe_to_pdb(pdb_df, out_pdb_path)


def correct_amino_acid_atom_names(npdb_i, resname, rename_mapping):
    """corrects the amino acid atom names according to the mapping provided

    Args:
        npdb_i: nested pdb data structure for a single residue
        resname: the residue name
        rename_mapping: a dictionary mapping old names to new names
    """
    if resname in rename_mapping:
        for old_name, new_name in rename_mapping[resname].items():
            npdb_i = [extract_and_replace(x, old_name, new_name) for x in npdb_i]
            # certify that we have the alignment as expected for pdb files
    return npdb_i


def extract_and_replace(line, old_name, new_name):
    """extracts the atom name and replaces it with the new name"""
    atom_name = line[12:16].strip()
    if atom_name != old_name:
        return line
    new_atom_name = atom_name.replace(old_name, new_name).strip()
    if len(new_atom_name) == 4:
        return line[:12] + new_atom_name + line[16:]
    else:
        # return left aligned atom name always with len() == 3 but with a " " in the beginning
        return line[:12] + f" {new_atom_name:<3}" + line[16:]


def fix_pdb(pdb_path: Path, rename_mapping):
    renamed_pdb_path = pdb_path.with_name(pdb_path.stem + "_renamed.pdb")
    with open(pdb_path) as f:
        pdb_lines = f.readlines()

    npdb = nest_pdb(pdb_lines)
    npdb = asp_search(npdb)

    for i, res in enumerate(npdb):
        resname = res[-1][17:20]
        if resname == "HIS":  # rename to HIP according to our FF library
            npdb[i] = [x.replace("HIS", "HIP") for x in npdb[i]]
            resname = "HIP"
        if resname == "NME":  # we use NMA in our FF library
            npdb[i] = [x.replace("NME", "NMA") for x in npdb[i]]
            resname = "NMA"
        npdb[i] = correct_amino_acid_atom_names(npdb[i], resname, rename_mapping)
    pdb_lines = unnest_pdb(npdb)

    with open(renamed_pdb_path, "w") as f:
        for line in pdb_lines:
            f.write(line)
    return pdb_lines


def cap_and_reindex_pdb(inp_pdb: Path):
    """Function that removes additionaly hydrogens from N terminal not covered
    in our library files, caps the last residue and reindexes the atoms

    Args:
        inp_pdb: path for the pdb file
    """

    pdb_df = read_pdb_to_dataframe(inp_pdb)

    # remove extra Hs from the first Gly residue
    if pdb_df["residue_name"].values[0] in ["GLY", "LEU", "GLU", "ASH", "ILE", "ASN"]:
        first_residue = pdb_df["residue_seq_number"].values[0]
        # remove atoms with atom_name H2 and H3, and rename H1 to H
        subset_first = pdb_df[
            (pdb_df["residue_seq_number"] == first_residue)
            & (~pdb_df["atom_name"].isin(["H2", "H3"]))
        ].copy()
        subset_first["atom_name"] = subset_first["atom_name"].str.replace("H1", "H")
        rm_idxs = pdb_df.query("residue_seq_number == @first_residue").index
        pdb_df = pd.concat(
            [subset_first, pdb_df.drop(index=rm_idxs)], ignore_index=True
        )
    # cap the last residue
    last_residue = pdb_df["residue_name"].values[-1]
    last_residue_number = pdb_df["residue_seq_number"].values[-1]  # noqa: F841
    if last_residue in ["ILE", "NME"]:
        rm_idxs = pdb_df.query("residue_seq_number == @last_residue_number").index
        pdb_df.drop(index=rm_idxs, inplace=True)
    _len = len(pdb_df)
    pdb_df["atom_serial_number"] = range(1, _len + 1)
    write_dataframe_to_pdb(pdb_df, inp_pdb)

# Rename the protein files

In [46]:
pdb_paths = sorted(Path().glob("*/protein/protein.pdb"))

In [47]:
pdb_path = Path("thrombin/protein/protein.pdb")

reindexed_path = pdb_path.with_stem(pdb_path.stem + "_reindexed")
reindex_pdb_residues(pdb_path, out_pdb_path=reindexed_path)

In [48]:
for pdb_path in pdb_paths:
    reindexed_path = pdb_path.with_stem(pdb_path.stem + "_reindexed")
    reindex_pdb_residues(pdb_path, out_pdb_path=reindexed_path)
    fix_pdb(reindexed_path, rename_mapping)
    # cap_and_reindex_pdb(pdb_path.with_stem(pdb_path.stem + "_renamed"))

# Rename the water & cofactor files

In [49]:
atom_renaming_dict = {
    # salts
    "MG": "MAG",
    "ZN": "ZIN",
}
residue_renaming_dict = {
    "MG": "MAG",
    "ZN": "ZIN",
}


def rename_cofactor_atoms(pdb_path: Path, atom_renaming_dict: dict):
    pdb_df = read_pdb_to_dataframe(pdb_path).assign(
        atom_name=lambda x: x["atom_name"].replace(atom_renaming_dict),
        residue_name=lambda x: x["residue_name"]
        .str.strip(" ")
        .replace(residue_renaming_dict),
    )
    write_dataframe_to_pdb(pdb_df, pdb_path)

In [50]:
pdb_paths = sorted(Path().glob("*/protein/protein_reindexed_renamed.pdb"))

for pdb_path in pdb_paths:
    if pdb_path.stat().st_size != 0:
        rename_cofactor_atoms(pdb_path, atom_renaming_dict)

# Merge renamed protein & cofactor files

In [51]:
# prot_root_paths = sorted(Path().glob("*/protein/"))

# for _path in prot_root_paths:
#     processed_pdbs = []
#     protfile = _path / "protein_renamed.pdb"
#     cofactor = _path / "cofactors_crystalwater_renamed.pdb"

#     prot_df = read_pdb_to_dataframe(protfile)

#     # reindex both atom_serial_number and residue_seq_number
#     prot_df["atom_serial_number"] = range(1, len(prot_df) + 1)
#     residue_seq_mapping = {
#         old: new
#         for old, new in zip(
#             prot_df["residue_seq_number"].unique(),
#             range(1, len(prot_df["residue_seq_number"].unique()) + 1),
#         )
#     }
#     prot_df["residue_seq_number"] = prot_df["residue_seq_number"].replace(
#         residue_seq_mapping
#     )
#     last_prot_res = prot_df["residue_seq_number"].max()
#     last_prot_atom = prot_df["atom_serial_number"].max()
#     processed_pdbs.append(prot_df)

#     if cofactor.exists():
#         print("Including cofactors for ", _path)
#         cof_df = read_pdb_to_dataframe(cofactor)
#         cof_df["atom_serial_number"] = range(
#             last_prot_res + 1, last_prot_res + len(cof_df) + 1
#         )
#         residue_seq_mapping = {
#             old: new
#             for old, new in zip(
#                 cof_df["residue_seq_number"].unique(),
#                 range(
#                     last_prot_res + 1,
#                     last_prot_res + len(cof_df["residue_seq_number"].unique() + 1),
#                 ),
#             )
#         }
#         cof_df["residue_seq_number"] = cof_df["residue_seq_number"].replace(
#             residue_seq_mapping
#         )
#         processed_pdbs.append(cof_df)
#     final_df = pd.concat(processed_pdbs, ignore_index=True)
#     write_dataframe_to_pdb(final_df, protfile.parent / "protfile_final.pdb")

# Preparing data

running qprep through the notebook. First we get the Center of Geometry (COG) of all the ligands, and then use it prepare the water spheres of the respective systems. The COG is the center of the water sphere

In [52]:
import shutil
from QligFEP.CLI.qprep_cli import main, QprepError, QprepAtomLibMissingError
from QligFEP.CLI.cog_cli import MolecularCOG
import argparse
import os

prot_root_paths = sorted([p.absolute() for p in Path().glob("*/protein/")])
cwd = Path.cwd()

for _path in prot_root_paths:
    print("Processing: ", _path)
    processed_pdbs = []
    protfile = _path / "protein_reindexed_renamed.pdb"
    qprep_dir = _path / "qprep"
    if not qprep_dir.exists():
        qprep_dir.mkdir()
    shutil.copy(protfile, qprep_dir / "protein.pdb")

    # change the working directory to the qprep directory
    os.chdir(qprep_dir)

    # calculate the center of geometry for the ligands
    ligpath = _path.parent / "ligands/ligands.sdf"
    cog = MolecularCOG(ligpath)
    coords_str = cog()
    coordinates = [n for n in coords_str.strip("[]").split()]

    args = argparse.Namespace()
    args.log_level = "info"
    args.input_pdb_file = "protein.pdb"
    args.FF = "AMBER14sb"
    args.cog = coordinates
    args.sphereradius = 25
    args.cysbond = "auto"
    args.solvent_pack = 3.0

    try:
        main(args)
    except QprepError as e:
        print(e)
        pass
        # continue
    except QprepAtomLibMissingError as e:
        print(e)
        continue

os.chdir(cwd)



Processing:  /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/cdk2/protein


STOP qprep ended normally
[32m2024-09-04 17:30:51[0m | [31m[1mERROR   [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mqprep_error_check[0m:[36m65[0m - [31m[1mErrors found in qprep output file /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/cdk2/protein/qprep/qprep.out. Please check if the amino acids in your pdb file match the residue & atom conventions on the forcefield .lib & .prm files:
/zfsdata/data/david/Q/src/QligFEP/FF/AMBER14sb.prm & /zfsdata/data/david/Q/src/QligFEP/FF/AMBER14sb.lib[0m
[32m2024-09-04 17:30:51[0m | [31m[1mERROR   [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mqprep_error_check[0m:[36m65[0m - [31m[1mErrors found in qprep output file /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/cdk2/protein/qprep/qprep.out. Please check if the amino acids in your pdb file match the residue & atom conventions on the forcefield .lib & .prm files:
/zfsdata/data/david/Q/src/QligFEP/FF/AMBER14sb.prm & /zfsdata/data/david/Q/src/QligFEP/FF/AMBER1

{'>>>>> ERROR: Residue number   162 is of unknown type TPO \n>>>>> ERROR: The check of the PDB file failed.'}
Processing:  /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/cdk8/protein


STOP qprep ended normally
[32m2024-09-04 17:30:52[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m237[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2024-09-04 17:30:52[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m260[0m - [1mwater.pdb file created.[0m
[32m2024-09-04 17:30:52[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m280[0m - [1mAll water molecules are inside the sphere radius.[0m


Processing:  /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/cmet/protein


STOP qprep ended normally
[32m2024-09-04 17:30:53[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m237[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2024-09-04 17:30:53[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m260[0m - [1mwater.pdb file created.[0m
[32m2024-09-04 17:30:53[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m280[0m - [1mAll water molecules are inside the sphere radius.[0m


Processing:  /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/eg5/protein


STOP qprep ended normally
[32m2024-09-04 17:30:55[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m237[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2024-09-04 17:30:55[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m260[0m - [1mwater.pdb file created.[0m
[32m2024-09-04 17:30:55[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m280[0m - [1mAll water molecules are inside the sphere radius.[0m


Processing:  /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/hif2a/protein


STOP qprep ended normally
[32m2024-09-04 17:30:55[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m237[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2024-09-04 17:30:55[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m260[0m - [1mwater.pdb file created.[0m
[32m2024-09-04 17:30:55[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m280[0m - [1mAll water molecules are inside the sphere radius.[0m


Processing:  /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/mcl1/protein


STOP qprep ended normally
[32m2024-09-04 17:30:55[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m237[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2024-09-04 17:30:55[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m260[0m - [1mwater.pdb file created.[0m
[32m2024-09-04 17:30:55[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m280[0m - [1mAll water molecules are inside the sphere radius.[0m


Processing:  /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/p38/protein


STOP qprep ended normally
[32m2024-09-04 17:30:57[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m237[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2024-09-04 17:30:57[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m260[0m - [1mwater.pdb file created.[0m
[32m2024-09-04 17:30:57[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m280[0m - [1mAll water molecules are inside the sphere radius.[0m


Processing:  /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/pde2/protein


STOP qprep ended normally
[32m2024-09-04 17:30:59[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m237[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2024-09-04 17:30:59[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m260[0m - [1mwater.pdb file created.[0m
[32m2024-09-04 17:30:59[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m280[0m - [1mAll water molecules are inside the sphere radius.[0m


Processing:  /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/pfkfb3/protein


STOP qprep ended normally
[32m2024-09-04 17:31:01[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m237[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2024-09-04 17:31:01[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m260[0m - [1mwater.pdb file created.[0m
[32m2024-09-04 17:31:01[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m280[0m - [1mAll water molecules are inside the sphere radius.[0m


Processing:  /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/ptp1b/protein


STOP qprep ended normally
[32m2024-09-04 17:31:02[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m237[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2024-09-04 17:31:02[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m260[0m - [1mwater.pdb file created.[0m
[32m2024-09-04 17:31:02[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m280[0m - [1mAll water molecules are inside the sphere radius.[0m


Processing:  /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/shp2/protein


STOP qprep ended normally
[32m2024-09-04 17:31:05[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m237[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2024-09-04 17:31:05[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m260[0m - [1mwater.pdb file created.[0m
[32m2024-09-04 17:31:05[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m280[0m - [1mAll water molecules are inside the sphere radius.[0m


Processing:  /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/syk/protein


STOP qprep ended normally
[32m2024-09-04 17:31:06[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m237[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2024-09-04 17:31:06[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m260[0m - [1mwater.pdb file created.[0m
[32m2024-09-04 17:31:06[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m280[0m - [1mAll water molecules are inside the sphere radius.[0m


Processing:  /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/thrombin/protein


STOP qprep ended normally
[32m2024-09-04 17:31:08[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m237[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2024-09-04 17:31:08[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m260[0m - [1mwater.pdb file created.[0m
[32m2024-09-04 17:31:08[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m280[0m - [1mAll water molecules are inside the sphere radius.[0m


Processing:  /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/tnks2/protein


STOP qprep ended normally
[32m2024-09-04 17:31:09[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m237[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2024-09-04 17:31:09[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m260[0m - [1mwater.pdb file created.[0m
[32m2024-09-04 17:31:09[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m280[0m - [1mAll water molecules are inside the sphere radius.[0m


Processing:  /zfsdata/data/david/qligfepv2-BenchmarkExperiments/startFiles/tyk2/protein


STOP qprep ended normally
[32m2024-09-04 17:31:10[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m237[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2024-09-04 17:31:10[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m260[0m - [1mwater.pdb file created.[0m
[32m2024-09-04 17:31:10[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m280[0m - [1mAll water molecules are inside the sphere radius.[0m


# TODO:
After running qprep, remove the water molecules from `protein.pdb`.

# Checking for qprep errors:

In [22]:
import subprocess

outqprep_pattern = "*/protein/qprep/qprep.out"
p = subprocess.Popen(
    " ".join(["grep", "-winr", "error", outqprep_pattern]),
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    shell=True,
    text=True,
)
stdout, stderr = p.communicate()
stdout.split("\n")

['cdk2/protein/qprep/qprep.out:85:>>>>> ERROR: Residue number   162 is of unknown type TPO ',
 'cdk2/protein/qprep/qprep.out:88:>>>>> ERROR: The check of the PDB file failed.',
 '']

# Move perturbation files to directory

In [23]:
from pathlib import Path
from joblib import Parallel, delayed
import shutil


def copyfile(src: Path, dest: Path):
    try:
        shutil.copy(src, dest / src.name)
    except FileNotFoundError as e:
        print(f"error for src: {src}:\n{e}")

In [24]:
targets = [p for p in sorted(Path().glob("*/")) if p.is_dir()]

destpath = Path.cwd().parent / "perturbations"
for target in targets:
    ligands_sdf = sorted((target / "ligands/ligands").glob("*.sdf"))
    ligands_pdb = sorted((target / "ligands/").glob("*.pdb"))
    ligands_lib = sorted((target / "ligands/").glob("*.lib"))
    ligands_prm = sorted((target / "ligands/").glob("*.prm"))
    lomap_file = target / "ligands/ligands/lomap.json"
    protein_file = target / "protein/qprep/protein.pdb"
    noHOH_protfile = target / "protein/qprep/protein_noHOH.pdb"
    water_file = target / "protein/qprep/water.pdb"
    perturbation_root = destpath / target.name
    if not perturbation_root.exists():
        perturbation_root.mkdir(parents=True, exist_ok=True)
    allfiles = (
        ligands_sdf
        + ligands_pdb
        + ligands_lib
        + ligands_prm
        + [
            lomap_file,
            (noHOH_protfile if noHOH_protfile.exists() else protein_file),
            water_file,
        ]
    )
    Parallel(n_jobs=6, backend="threading")(
        delayed(copyfile)(src, perturbation_root) for src in allfiles
    )
    if (
        perturbation_root / "protein_noHOH.pdb"
    ).exists():  # rename it to just protein.pdb
        shutil.move(
            perturbation_root / "protein_noHOH.pdb", perturbation_root / "protein.pdb"
        )

error for src: cdk2/protein/qprep/water.pdb:
[Errno 2] No such file or directory: 'cdk2/protein/qprep/water.pdb'
