In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
from QligFEP.pdb_utils import (
    nest_pdb,
    unnest_pdb,
    read_pdb_to_dataframe,
    write_dataframe_to_pdb,
)
from QligFEP.CLI.pdb_to_amber import asp_search, histidine_search, nc_termini_search
import pandas as pd
import re

If you're running this just to see how to use `qprep_prot`, just define the functions below and run jump straight to the section [Preparing data](#preparing-data).

# Define functions

In [3]:
# Need to remove Hs from GLY
# Need to cap the last residue

rename_mapping = {
    "ACE": {
        "3H": "H3",
        "2H": "H2",
        "1H": "H1",
        "H2_3": "H3",
        "H2_2": "H2",
        "H2_1": "H1",
        "3HH3": "H3",
        "2HH3": "H2",
        "1HH3": "H1",
        "HH33": "H3",
        "HH32": "H2",
        "HH31": "H1",
        "C1": "C",
        "C2": "CH3",
        "O1": "O",
    },
    "NME": {
        "3HA": "H3",
        "2HA": "H2",
        "1HA": "H1",
        "HA3": "H3",
        "HA2": "H2",
        "HA1": "H1",
        "H1_3": "H3",
        "H1_2": "H2",
        "H1_1": "H1",
        "3HH3": "H3",
        "2HH3": "H2",
        "1HH3": "H1",
        "2HH2": "H2",
        "2HH1": "H1",
        "C1": "C",
        "CH3": "C",
        "CA": "C",
        "N1": "N",
        "HN1": "H",
    },
    "GLY": {
        "H1": "H",
        "H2": "H",
        "1HA": "HA3",
        "2HA": "HA2",
    },
    "ARG": {
        "H2": "H",
        "2HG": "HG3",
        "1HG": "HG2",
    },
    "GLU": {
        "H2": "H",
        "H1": "H",
        "2HB": "HB3",
        "1HB": "HB2",
        "2HG": "HG3",
        "1HG": "HG2",
    },
    "GLN": {"H2": "H", "2HG": "HG3", "1HG": "HG2"},
    "THR": {
        "H2": "H",
        "1HG2": "HG21",
        "2HG2": "HG22",
        "3HG2": "HG23",
    },
    "LEU": {"H1": "H"},
    "VAL": {
        "3HG1": "HG13",
        "2HG1": "HG12",
        "1HG1": "HG11",
        "3HG2": "HG13",
        "2HG2": "HG12",
        "1HG2": "HG11",
    },
    "ASN": {
        "3H": "H3",
        "2H": "H2",
        "1H": "H1",
        "2HB": "HB3",
        "1HB": "HB2",
        "1HD2": "HD21",
        "2HD2": "HD22",
    },
    "PRO": {
        "2H": "H3",
        "1H": "H2",
        "2HG": "HG3",
        "1HG": "HG2",
    },
    "ALA": {
        "1HB": "HB1",
        "2HB": "HB2",
        "3HB": "HB3",
    },
    "PHE": {
        "2HB": "HB3",
        "1HB": "HB2",
    },
    "TYR": {
        "2HB": "HB3",
        "1HB": "HB2",
    },
    "SER": {
        "2HB": "HB3",
        "1HB": "HB2",
    },
    "LYS": {
        "2HG": "HG3",
        "1HG": "HG2",
    },
    "ILE": {
        "2HG": "HG3",
        "1HG": "HG2",
    },
}


def reindex_pdb_residues(pdb_path: Path, out_pdb_path: str):
    pdb_df = read_pdb_to_dataframe(pdb_path)
    uniq_indexes = pdb_df.set_index(
        ["residue_seq_number", "residue_name", "chain_id", "insertion_code"]
    ).index
    resn_mapping = {resn: idx for idx, resn in enumerate(uniq_indexes.unique(), 1)}
    pdb_df["residue_seq_number"] = uniq_indexes.map(resn_mapping)
    pdb_df["insertion_code"] = ""
    # pdb_df = pdb_df.assign(residue_seq_number=uniq_indexes.map(resn_mapping))
    write_dataframe_to_pdb(pdb_df, out_pdb_path)


def correct_numbered_atom_names(npdb_i):
    """Corrects atom names that start with numbers by moving the numbers to the end.
    Uses regex to match and extract leading numbers.

    Args:
        npdb_i: nested pdb data structure for a single residue

    Returns:
        Modified npdb_i with corrected atom names
    """

    def process_atom_name(line):
        atom_name = line[12:16].strip()

        # these only exist in AMBER with 2 and 3 for some reason
        sum_after = atom_name in [
            "2HG",
            "1HG",
            "2HB",
            "1HB",
            "1HG1",
            "2HG1",
            "1HA",
            "2HA",
            "1HD",
            "2HD",
            "1HE",
            "2HE",
        ]

        pattern = re.compile(r"^(\d+)([A-Z]+\d*)")
        match = pattern.match(atom_name)

        if not match:
            return line

        # Extract the matched groups
        numbers, letters = match.groups()
        new_atom_name = letters + (str(int(numbers) + 1) if sum_after else numbers)

        # Format according to PDB specifications
        if len(new_atom_name) == 4:
            return line[:12] + new_atom_name + line[16:]
        else:
            return line[:12] + f"{new_atom_name:<4}" + line[16:]

    return [process_atom_name(line) for line in npdb_i]


def correct_amino_acid_atom_names(npdb_i, resname, rename_mapping):
    """corrects the amino acid atom names according to the mapping provided

    Args:
        npdb_i: nested pdb data structure for a single residue
        resname: the residue name
        rename_mapping: a dictionary mapping old names to new names
    """
    if resname in rename_mapping:
        for old_name, new_name in rename_mapping[resname].items():
            npdb_i = [extract_and_replace(x, old_name, new_name) for x in npdb_i]
            # certify that we have the alignment as expected for pdb files
    return npdb_i


def extract_and_replace(line, old_name, new_name):
    """extracts the atom name and replaces it with the new name"""
    atom_name = line[12:16].strip()
    if atom_name != old_name:
        return line
    new_atom_name = atom_name.replace(old_name, new_name).strip()
    if len(new_atom_name) == 4:
        return line[:12] + new_atom_name + line[16:]
    else:
        # return left aligned atom name always with len() == 3 but with a " " in the beginning
        return line[:12] + f" {new_atom_name:<3}" + line[16:]


def fix_pdb(pdb_path: Path, rename_mapping):
    renamed_pdb_path = pdb_path.with_name(pdb_path.stem + "_renamed.pdb")
    with open(pdb_path) as f:
        pdb_lines = f.readlines()

    npdb = nest_pdb(pdb_lines)
    npdb = asp_search(npdb)
    npdb = histidine_search(npdb)

    for i, res in enumerate(npdb):
        resname = res[-1][17:21].rstrip()
        # if resname == "HIS":  # rename to HIP according to our FF library
        #     npdb[i] = [x.replace("HIS", "HIP") for x in npdb[i]]
        #     resname = "HIP"
        if resname == "NMA":  # we use NME in our FF library
            npdb[i] = [x.replace("NMA", "NME") for x in npdb[i]]
            resname = "NME"
        npdb[i] = correct_numbered_atom_names(npdb[i])
        npdb[i] = correct_amino_acid_atom_names(npdb[i], resname, rename_mapping)

    npdb = nc_termini_search(npdb)  # after atom name correction, label N and C termini
    pdb_lines = unnest_pdb(npdb)

    with open(renamed_pdb_path, "w") as f:
        for line in pdb_lines:
            f.write(line)
    return pdb_lines

# Rename the protein files

In [4]:
pdb_paths = sorted(Path().glob("*/protein/protein.pdb"))

In [5]:
for pdb_path in pdb_paths:
    reindexed_path = pdb_path.with_stem(pdb_path.stem + "_reindexed")
    reindex_pdb_residues(pdb_path, out_pdb_path=reindexed_path)
    fix_pdb(reindexed_path, rename_mapping)

# Rename the water & cofactor files

In [6]:
atom_renaming_dict = {  # salts
    "MG": "MAG",
    "ZN": "ZIN",
    "NA": "SOD",
}
residue_renaming_dict = {
    "MG": "MAG",
    "ZN": "ZIN",
    "NA": "SOD",
}


def rename_cofactor_atoms(pdb_path: Path, atom_renaming_dict: dict):
    pdb_df = read_pdb_to_dataframe(pdb_path).assign(
        atom_name=lambda x: x["atom_name"].replace(atom_renaming_dict),
        residue_name=lambda x: x["residue_name"].str.strip(" ").replace(residue_renaming_dict),
    )
    write_dataframe_to_pdb(pdb_df, pdb_path)

In [7]:
pdb_paths = sorted(Path().glob("*/protein/protein_reindexed_renamed.pdb"))

for pdb_path in pdb_paths:
    if pdb_path.stat().st_size != 0:
        rename_cofactor_atoms(pdb_path, atom_renaming_dict)

# Preparing data

running qprep through the notebook. First we get the Center of Geometry (COG) of all the ligands, and then use it prepare the water spheres of the respective systems. The COG is the center of the water sphere

In [12]:
import argparse
import os
import shutil

from QligFEP.CLI.cog_cli import MolecularCOG
from QligFEP.CLI.qprep_cli import QprepAtomLibMissingError, QprepError, main

prot_root_paths = sorted([p.absolute() for p in Path().glob("*/protein/")])
cwd = Path.cwd()

for _path in prot_root_paths:
    print("Processing: ", _path.parent.name)
    processed_pdbs = []
    protfile = _path / "protein_reindexed_renamed.pdb"
    qprep_dir = _path / "qprep"
    if not qprep_dir.exists():
        qprep_dir.mkdir()
    shutil.copy(protfile, qprep_dir / "protein.pdb")

    # for pfkfb3, we need to create additional parameters for the ligands:
    if _path.parent.name == "pfkfb3":
        os.chdir(_path)
        # create the parameters for the ligands
        os.system("micromamba run -n qligfep_new qparams -i cofactors.sdf -pcof -pff AMBER14sb -p 2")
        shutil.copy(_path / "all_cofactors.pdb", qprep_dir / "all_cofactors.pdb")
        shutil.copy(
            _path / "AMBER14sb_plus_cofactor.lib",
            qprep_dir / "AMBER14sb_plus_cofactor.lib",
        )
        shutil.copy(
            _path / "AMBER14sb_plus_cofactor.prm",
            qprep_dir / "AMBER14sb_plus_cofactor.prm",
        )

    # change the working directory to the qprep directory
    os.chdir(qprep_dir)

    # calculate the center of geometry for the ligands
    ligpath = _path.parent / "ligands/ligands.sdf"
    cog = MolecularCOG(ligpath)
    # For ptp1b, we use the smallest ligand to calculate the COG & a larger sphere radius
    coords_str = cog() if _path.parent.name != "ptp1b" else "41.430 12.734 15.360"
    coordinates = [n for n in coords_str.strip("[]").split()]

    args = argparse.Namespace()
    args.log_level = "info"
    args.input_pdb_file = "protein.pdb"
    args.FF = "AMBER14sb"
    args.cog = coordinates
    args.sphereradius = 25 if _path.parent.name != "ptp1b" else 30
    args.cysbond = "auto"
    args.solvent_pack = 3.0
    args.skip_neutralization = False
    args.neutralize_boundary_offset = 3.0
    args.salt_bridge_cutoff = 4.0

    if _path.parent.name == "pfkfb3":
        args.FF = "AMBER14sb_plus_cofactor"
        args.cofactors = ["all_cofactors.pdb"]
    else:
        args.cofactors = []

    try:
        main(args)
    except QprepError as e:
        print("Qprep Error: ", e)
        pass
        # continue
    except QprepAtomLibMissingError as e:
        print("Qprep atomlib missing:", e)
        continue

os.chdir(cwd)

[32m2025-08-23 12:23:11[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m515[0m - [1mRemoving 57 crystal water molecules[0m
[32m2025-08-23 12:23:11[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m529[0m - [1mNeutralizing charged residues outside spherical boundary[0m
[32m2025-08-23 12:23:11[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mneutralize_outside_residues_dataframe[0m:[36m89[0m - [1mNeutralizing charged residues outside 22.0Å boundary[0m
[32m2025-08-23 12:23:11[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mneutralize_outside_residues_dataframe[0m:[36m98[0m - [1mFound 76 charged residues[0m


Processing:  bace


[32m2025-08-23 12:23:11[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36m_log_neutralization_stats[0m:[36m279[0m - [1mNeutralization statistics:
  Total charged residues found: 76  Residues outside boundary (22.0Å): 46  Salt bridge pairs neutralized: 0  Total residues neutralized: 46  Original total charge: -10  Final total charge: -4[0m
[32m2025-08-23 12:23:11[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m537[0m - [1mCharged residues neutralized[0m
[32m2025-08-23 12:23:12[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m549[0m - [1mFinal processed protein saved as: /home/davidararipe/projects/rbfe/qligfepv2-BenchmarkExperiments/startFiles/bace/protein/qprep/protein_noHOH_neutralized.pdb[0m
STOP qprep ended normally
[32m2025-08-23 12:23:13[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m578[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32

Processing:  cdk2


[32m2025-08-23 12:23:13[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36m_log_neutralization_stats[0m:[36m279[0m - [1mNeutralization statistics:
  Total charged residues found: 124  Residues outside boundary (22.0Å): 93  Salt bridge pairs neutralized: 1  Total residues neutralized: 94  Original total charge: +0  Final total charge: +4[0m
[32m2025-08-23 12:23:13[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m537[0m - [1mCharged residues neutralized[0m
[32m2025-08-23 12:23:14[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m549[0m - [1mFinal processed protein saved as: /home/davidararipe/projects/rbfe/qligfepv2-BenchmarkExperiments/startFiles/cdk2/protein/qprep/protein_neutralized.pdb[0m
STOP qprep ended normally
[32m2025-08-23 12:23:15[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m578[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2025-

Processing:  cdk8


[32m2025-08-23 12:23:15[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mneutralize_outside_residues_dataframe[0m:[36m98[0m - [1mFound 173 charged residues[0m
[32m2025-08-23 12:23:15[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36m_find_salt_bridges[0m:[36m202[0m - [1mSalt bridge detected: B:468 (GLU) <-> A:67 (ARG) at 3.65Å - neutralizing both[0m
[32m2025-08-23 12:23:16[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36m_log_neutralization_stats[0m:[36m279[0m - [1mNeutralization statistics:
  Total charged residues found: 173  Residues outside boundary (22.0Å): 124  Salt bridge pairs neutralized: 1  Total residues neutralized: 125  Original total charge: +9  Final total charge: -4[0m
[32m2025-08-23 12:23:16[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m537[0m - [1mCharged residues neutralized[0m
[32m2025-08-23 12:23:16[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m549[0

Processing:  cmet


[32m2025-08-23 12:23:18[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m549[0m - [1mFinal processed protein saved as: /home/davidararipe/projects/rbfe/qligfepv2-BenchmarkExperiments/startFiles/cmet/protein/qprep/protein_noHOH_neutralized.pdb[0m
STOP qprep ended normally
[32m2025-08-23 12:23:19[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m578[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2025-08-23 12:23:19[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m582[0m - [1mNEUTRALIZATION SUMMARY
Total charged residues processed: 60
Residues outside boundary: 32
Salt bridge pairs neutralized: 0
Total residues neutralized: 32
Charge change: +6 -> +0
[0m
[32m2025-08-23 12:23:19[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m616[0m - [1mwater.pdb file created.[0m
[32m2025-08-23 12:23:19[0m | [1mINFO    [0m | [36mQligFEP.CLI.qpr

Processing:  eg5


[32m2025-08-23 12:23:19[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36m_log_neutralization_stats[0m:[36m279[0m - [1mNeutralization statistics:
  Total charged residues found: 92  Residues outside boundary (22.0Å): 62  Salt bridge pairs neutralized: 1  Total residues neutralized: 63  Original total charge: +0  Final total charge: -5[0m
[32m2025-08-23 12:23:19[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m537[0m - [1mCharged residues neutralized[0m
[32m2025-08-23 12:23:20[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m549[0m - [1mFinal processed protein saved as: /home/davidararipe/projects/rbfe/qligfepv2-BenchmarkExperiments/startFiles/eg5/protein/qprep/protein_noHOH_neutralized.pdb[0m
STOP qprep ended normally
[32m2025-08-23 12:23:20[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m578[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2

Processing:  hif2a


[32m2025-08-23 12:23:21[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m549[0m - [1mFinal processed protein saved as: /home/davidararipe/projects/rbfe/qligfepv2-BenchmarkExperiments/startFiles/hif2a/protein/qprep/protein_noHOH_neutralized.pdb[0m
STOP qprep ended normally
[32m2025-08-23 12:23:21[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m578[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2025-08-23 12:23:21[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m582[0m - [1mNEUTRALIZATION SUMMARY
Total charged residues processed: 23
Residues outside boundary: 1
Salt bridge pairs neutralized: 0
Total residues neutralized: 1
Charge change: -5 -> -6
[0m
[32m2025-08-23 12:23:21[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m616[0m - [1mwater.pdb file created.[0m
[32m2025-08-23 12:23:21[0m | [1mINFO    [0m | [36mQligFEP.CLI.qpre

Processing:  jnk1


[32m2025-08-23 12:23:21[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36m_log_neutralization_stats[0m:[36m279[0m - [1mNeutralization statistics:
  Total charged residues found: 97  Residues outside boundary (22.0Å): 70  Salt bridge pairs neutralized: 1  Total residues neutralized: 71  Original total charge: +1  Final total charge: +2[0m
[32m2025-08-23 12:23:21[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m537[0m - [1mCharged residues neutralized[0m
[32m2025-08-23 12:23:22[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m549[0m - [1mFinal processed protein saved as: /home/davidararipe/projects/rbfe/qligfepv2-BenchmarkExperiments/startFiles/jnk1/protein/qprep/protein_neutralized.pdb[0m
STOP qprep ended normally
[32m2025-08-23 12:23:22[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m578[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2025-0

Processing:  mcl1


[32m2025-08-23 12:23:23[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m549[0m - [1mFinal processed protein saved as: /home/davidararipe/projects/rbfe/qligfepv2-BenchmarkExperiments/startFiles/mcl1/protein/qprep/protein_noHOH_neutralized.pdb[0m
STOP qprep ended normally
[32m2025-08-23 12:23:23[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m578[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2025-08-23 12:23:23[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m582[0m - [1mNEUTRALIZATION SUMMARY
Total charged residues processed: 44
Residues outside boundary: 18
Salt bridge pairs neutralized: 0
Total residues neutralized: 18
Charge change: +4 -> +2
[0m
[32m2025-08-23 12:23:23[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m616[0m - [1mwater.pdb file created.[0m
[32m2025-08-23 12:23:23[0m | [1mINFO    [0m | [36mQligFEP.CLI.qpr

Processing:  p38


[32m2025-08-23 12:23:24[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36m_log_neutralization_stats[0m:[36m279[0m - [1mNeutralization statistics:
  Total charged residues found: 84  Residues outside boundary (22.0Å): 53  Salt bridge pairs neutralized: 1  Total residues neutralized: 54  Original total charge: -8  Final total charge: +0[0m
[32m2025-08-23 12:23:24[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m537[0m - [1mCharged residues neutralized[0m
[32m2025-08-23 12:23:24[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m549[0m - [1mFinal processed protein saved as: /home/davidararipe/projects/rbfe/qligfepv2-BenchmarkExperiments/startFiles/p38/protein/qprep/protein_noHOH_neutralized.pdb[0m
STOP qprep ended normally
[32m2025-08-23 12:23:25[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m578[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2

Processing:  pfkfb3


[32m2025-08-23 12:23:29[0m | [1mINFO    [0m | [36mQligFEP.openff2Q[0m:[36mprocess_ligands[0m:[36m113[0m - [1mCalculating charges[0m
100%|██████████| 3/3 [00:00<00:00, 21.74it/s]
[32m2025-08-23 12:23:42[0m | [1mINFO    [0m | [36mQligFEP.openff2Q[0m:[36mprocess_ligands[0m:[36m118[0m - [1mDone! Writing .lib, .prm and .pdb files for each ligand[0m
[32m2025-08-23 12:23:42[0m | [1mINFO    [0m | [36mQligFEP.openff2Q[0m:[36mwrite_cofactor_plus_ff_files[0m:[36m515[0m - [1mNo "impropers" parameters found for cofactor pfkfb3_automap_cofactor1[0m
[32m2025-08-23 12:23:42[0m | [1mINFO    [0m | [36mQligFEP.openff2Q[0m:[36mwrite_cofactor_plus_ff_files[0m:[36m515[0m - [1mNo "impropers" parameters found for cofactor pfkfb3_automap_cofactor2[0m
[32m2025-08-23 12:23:43[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m515[0m - [1mRemoving 1101 crystal water molecules[0m
[32m2025-08-23 12:23:43[0m | [1mINFO    [0m | [36mQligF

Processing:  ptp1b


[32m2025-08-23 12:23:45[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36m_log_neutralization_stats[0m:[36m279[0m - [1mNeutralization statistics:
  Total charged residues found: 84  Residues outside boundary (27.0Å): 44  Salt bridge pairs neutralized: 0  Total residues neutralized: 44  Original total charge: -6  Final total charge: +6[0m
[32m2025-08-23 12:23:45[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m537[0m - [1mCharged residues neutralized[0m
[32m2025-08-23 12:23:46[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m549[0m - [1mFinal processed protein saved as: /home/davidararipe/projects/rbfe/qligfepv2-BenchmarkExperiments/startFiles/ptp1b/protein/qprep/protein_noHOH_neutralized.pdb[0m
STOP qprep ended normally
[32m2025-08-23 12:23:47[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m578[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32

Processing:  shp2


[32m2025-08-23 12:23:47[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mneutralize_outside_residues_dataframe[0m:[36m98[0m - [1mFound 147 charged residues[0m
[32m2025-08-23 12:23:47[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36m_find_salt_bridges[0m:[36m202[0m - [1mSalt bridge detected: A:157 (GLU) <-> A:150 (ARG) at 3.76Å - neutralizing both[0m
[32m2025-08-23 12:23:47[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36m_find_salt_bridges[0m:[36m202[0m - [1mSalt bridge detected: A:311 (GLU) <-> A:315 (LYS) at 3.82Å - neutralizing both[0m
[32m2025-08-23 12:23:47[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36m_find_salt_bridges[0m:[36m202[0m - [1mSalt bridge detected: A:263 (ARG) <-> A:74 (GLU) at 3.65Å - neutralizing both[0m
[32m2025-08-23 12:23:47[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36m_log_neutralization_stats[0m:[36m279[0m - [1mNeutralization statistics:
  Total charged residues foun

Processing:  syk


[32m2025-08-23 12:23:49[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36m_log_neutralization_stats[0m:[36m279[0m - [1mNeutralization statistics:
  Total charged residues found: 70  Residues outside boundary (22.0Å): 40  Salt bridge pairs neutralized: 1  Total residues neutralized: 41  Original total charge: +2  Final total charge: +1[0m
[32m2025-08-23 12:23:49[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m537[0m - [1mCharged residues neutralized[0m
[32m2025-08-23 12:23:50[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m549[0m - [1mFinal processed protein saved as: /home/davidararipe/projects/rbfe/qligfepv2-BenchmarkExperiments/startFiles/syk/protein/qprep/protein_noHOH_neutralized.pdb[0m
STOP qprep ended normally
[32m2025-08-23 12:23:50[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m578[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2

Processing:  thrombin


[32m2025-08-23 12:23:51[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m549[0m - [1mFinal processed protein saved as: /home/davidararipe/projects/rbfe/qligfepv2-BenchmarkExperiments/startFiles/thrombin/protein/qprep/protein_noHOH_neutralized.pdb[0m
STOP qprep ended normally
[32m2025-08-23 12:23:51[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m578[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2025-08-23 12:23:51[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m582[0m - [1mNEUTRALIZATION SUMMARY
Total charged residues processed: 32
Residues outside boundary: 0
Salt bridge pairs neutralized: 0
Total residues neutralized: 0
Charge change: +0 -> +0
[0m
[32m2025-08-23 12:23:51[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m616[0m - [1mwater.pdb file created.[0m
[32m2025-08-23 12:23:51[0m | [1mINFO    [0m | [36mQligFEP.CLI.q

Processing:  tnks2


[32m2025-08-23 12:23:52[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m549[0m - [1mFinal processed protein saved as: /home/davidararipe/projects/rbfe/qligfepv2-BenchmarkExperiments/startFiles/tnks2/protein/qprep/protein_noHOH_neutralized.pdb[0m
STOP qprep ended normally
[32m2025-08-23 12:23:52[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m578[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2025-08-23 12:23:52[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m582[0m - [1mNEUTRALIZATION SUMMARY
Total charged residues processed: 47
Residues outside boundary: 24
Salt bridge pairs neutralized: 0
Total residues neutralized: 24
Charge change: +3 -> +3
[0m
[32m2025-08-23 12:23:52[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m616[0m - [1mwater.pdb file created.[0m
[32m2025-08-23 12:23:53[0m | [1mINFO    [0m | [36mQligFEP.CLI.qp

Processing:  tyk2


[32m2025-08-23 12:23:53[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36m_log_neutralization_stats[0m:[36m279[0m - [1mNeutralization statistics:
  Total charged residues found: 75  Residues outside boundary (22.0Å): 38  Salt bridge pairs neutralized: 0  Total residues neutralized: 38  Original total charge: -3  Final total charge: -7[0m
[32m2025-08-23 12:23:53[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m537[0m - [1mCharged residues neutralized[0m
[32m2025-08-23 12:23:53[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m549[0m - [1mFinal processed protein saved as: /home/davidararipe/projects/rbfe/qligfepv2-BenchmarkExperiments/startFiles/tyk2/protein/qprep/protein_neutralized.pdb[0m
STOP qprep ended normally
[32m2025-08-23 12:23:54[0m | [1mINFO    [0m | [36mQligFEP.CLI.qprep_cli[0m:[36mmain[0m:[36m578[0m - [1mqprep run finished. Check the output `qprep.out` for more information.[0m
[32m2025-0

### Preparation notes:

Running the above cell straightaway won't fully prepare the data. Some manual corrections are needed for a few systems.

Such manual corrections are applied to the `startFiles/protein/<target_name>/protein_reindexed_renamed.pdb` files, created from the cells listed above [Preparing Data](#preparing-data). Changes are hereby listed:

- For `jnk1`, we manually change `PRO`'s hydrogens to:
```txt
ATOM   5825  H2  PRO F 554       2.528 -10.284  12.564  1.00 99.90           H  
ATOM   5826  H3  PRO F 554       2.525  -9.514  14.023  1.00 99.90           H  
```
Instead of H1, H2.

- For `mcl1` we manually correct the numbering for the H atoms of `NGLY` on the generated [protein_reindexed_renamed.pdb](mcl1/protein/protein_reindexed_renamed.pdb) to:
```txt
ATOM      7 H1   NGLYA   1       1.888 -24.407   6.214  1.00  0.00           H  
ATOM      8 H2   NGLYA   1       3.604 -24.642   6.460  0.00  0.00           H  
```

- For `cdk2`, remove all atoms within chain `B`.

- For `cdk8`, change:
```txt
ATOM   5996 N    ASH B 366       3.010  19.285 -39.389  1.00  0.00           N1+
ATOM   5997 CA   ASH B 366       1.882  18.314 -39.417  1.00  0.00           C  
ATOM   5998 C    ASH B 366       1.618  17.753 -38.024  1.00  0.00           C  
ATOM   5999 O    ASH B 366       2.533  17.269 -37.359  1.00  0.00           O  
ATOM   6000 CB   ASH B 366       2.182  17.178 -40.397  1.00  0.00           C  
ATOM   6001 CG   ASH B 366       2.549  17.685 -41.781  1.00  0.00           C  
ATOM   6002 OD1  ASH B 366       2.281  16.971 -42.770  1.00  0.00           O  
ATOM   6003 OD2  ASH B 366       3.106  18.800 -41.878  1.00  0.00           O1-
ATOM   6004 H1   ASH B 366       3.169  19.645 -40.319  1.00  0.00           H  
ATOM   6005 H2   ASH B 366       2.783  20.048 -38.768  1.00  0.00           H  
ATOM   6006 H3   ASH B 366       3.846  18.822 -39.060  1.00  0.00           H  
ATOM   6007 HA   ASH B 366       0.986  18.835 -39.756  1.00  0.00           H  
ATOM   6008 HB3  ASH B 366       1.314  16.523 -40.470  1.00  0.00           H  
ATOM   6009 HB2  ASH B 366       2.997  16.569 -40.007  1.00  0.00           H  
```
to
```txt
ATOM   5996 N    NASPB 366       3.010  19.285 -39.389  1.00  0.00           N1+
ATOM   5997 CA   NASPB 366       1.882  18.314 -39.417  1.00  0.00           C  
ATOM   5998 C    NASPB 366       1.618  17.753 -38.024  1.00  0.00           C  
ATOM   5999 O    NASPB 366       2.533  17.269 -37.359  1.00  0.00           O  
ATOM   6000 CB   NASPB 366       2.182  17.178 -40.397  1.00  0.00           C  
ATOM   6001 CG   NASPB 366       2.549  17.685 -41.781  1.00  0.00           C  
ATOM   6002 OD1  NASPB 366       2.281  16.971 -42.770  1.00  0.00           O  
ATOM   6003 OD2  NASPB 366       3.106  18.800 -41.878  1.00  0.00           O1-
ATOM   6004 H1   NASPB 366       3.169  19.645 -40.319  1.00  0.00           H  
ATOM   6005 H2   NASPB 366       2.783  20.048 -38.768  1.00  0.00           H  
ATOM   6006 H3   NASPB 366       3.846  18.822 -39.060  1.00  0.00           H  
ATOM   6007 HA   NASPB 366       0.986  18.835 -39.756  1.00  0.00           H  
ATOM   6008 HB3  NASPB 366       1.314  16.523 -40.470  1.00  0.00           H  
ATOM   6009 HB2  NASPB 366       2.997  16.569 -40.007  1.00  0.00           H  
```

Manually change: `16 H1   LYS A` to `16 H    LYS A`

Remove all atoms within chain `B` of `cdk8`, starting from atom number 5995.

- For `syk`, change the c-terminal `VAL` to `CVAL` on the generated [protein_reindexed_renamed.pdb](syk/protein/protein_reindexed_renamed.pdb):, as in:
```txt
ATOM   4384 N    CVALA 272      -3.521   8.703  43.058  1.00  0.00           N  
ATOM   4385 CA   CVALA 272      -4.703   8.052  42.505  1.00  0.00           C  
ATOM   4386 C    CVALA 272      -4.416   6.535  42.527  1.00  0.00           C  
ATOM   4387 O    CVALA 272      -4.304   5.854  41.506  1.00  0.00           O  
ATOM   4388 CB   CVALA 272      -5.083   8.659  41.118  1.00  0.00           C  
ATOM   4389 CG1  CVALA 272      -6.387   8.070  40.546  1.00  0.00           C  
ATOM   4390 CG2  CVALA 272      -5.210  10.195  41.142  1.00  0.00           C  
ATOM   4391 OXT  CVALA 272      -4.232   5.951  43.738  1.00  0.00           O  
ATOM   4392 H    CVALA 272      -2.640   8.512  42.575  1.00  0.00           H  
ATOM   4393 HA   CVALA 272      -5.544   8.210  43.175  1.00  0.00           H  
ATOM   4394 HB   CVALA 272      -4.279   8.417  40.420  1.00  0.00           H  
ATOM   4395 HG11 CVALA 272      -7.232   8.277  41.204  1.00  0.00           H  
ATOM   4396 HG12 CVALA 272      -6.614   8.497  39.570  1.00  0.00           H  
ATOM   4397 HG13 CVALA 272      -6.324   6.991  40.418  1.00  0.00           H  
ATOM   4398 HG21 CVALA 272      -4.266  10.673  41.391  1.00  0.00           H  
ATOM   4399 HG22 CVALA 272      -5.510  10.588  40.171  1.00  0.00           H  
ATOM   4400 HG23 CVALA 272      -5.941  10.526  41.880  1.00  0.00           H  
```

- For `tnks2`, change the following atoms on the generated [protein_reindexed_renamed.pdb](tnks2/protein/protein_reindexed_renamed.pdb):
```txt
ATOM      5 H    NGLYA   1     -15.995 -29.766  -3.844  1.00  0.00           H  
ATOM      6 H    NGLYA   1     -17.407 -29.279  -4.543  1.00  0.00           H  
ATOM      7 H3   NGLYA   1     -15.991 -28.530  -4.935  1.00  0.00           H  
```
to:
```txt
ATOM      5 H1   NGLYA   1     -15.995 -29.766  -3.844  1.00  0.00           H  
ATOM      6 H2   NGLYA   1     -17.407 -29.279  -4.543  1.00  0.00           H  
ATOM      7 H3   NGLYA   1     -15.991 -28.530  -4.935  1.00  0.00           H  
```

- For `pfkfb3`, change the following atom:
```txt
ATOM     15 H11  ASN A   2      94.530  79.928 260.581  1.00 61.80           H  
```
to `H`, instead of `H11`.


After applying such changes, run again the Preparing Data cell to successfully create the water spheres. Two key obtained files are generated:
- `protein_noHOH.pdb`: the protein file with co-factors and without molecules, transferred to `perturbations/target/protein.pdb`.
- `water.pdb`: the watersphere created through `qprep` containing the crystal waters.

# Checking for qprep errors:

In [9]:
import subprocess

outqprep_pattern = "*/protein/qprep/qprep.out"
p = subprocess.Popen(
    " ".join(["grep", "-winr", "error", outqprep_pattern]),
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    shell=True,
    text=True,
)
stdout, stderr = p.communicate()
stdout.split("\n")

['']

# Move perturbation files to directory

In [13]:
from pathlib import Path
from joblib import Parallel, delayed
import shutil


def copyfile(src: Path, dest: Path):
    try:
        shutil.copy(src, dest / src.name)
    except FileNotFoundError as e:
        print(f"error for src: {src}:\n{e}")

In [15]:
from pathlib import Path
import shutil
from QligFEP.chemIO import MoleculeIO


def get_final_protein_file(qprep_dir):
    """
    Find the final processed protein file with all modifications applied.
    Priority order (most processed to least):
    1. *_noHOH_cofactors_neutralized.pdb
    2. *_noHOH_neutralized.pdb
    3. *_cofactors_neutralized.pdb
    4. *_noHOH_cofactors.pdb
    5. *_noHOH.pdb
    6. *_cofactors.pdb
    7. *_neutralized.pdb
    8. protein.pdb (fallback)
    """
    qprep_path = Path(qprep_dir)

    # Define priority order (most processed first)
    patterns = [
        "*_noHOH_cofactors_neutralized.pdb",  # All modifications
        "*_noHOH_neutralized.pdb",  # No cofactors, but neutralized
        "*_cofactors_neutralized.pdb",  # Cofactors + neutralized (no crystal water removal)
        "*_noHOH_cofactors.pdb",  # No waters + cofactors (no neutralization)
        "*_noHOH.pdb",  # Only crystal waters removed
        "*_cofactors.pdb",  # Only cofactors added
        "*_neutralized.pdb",  # Only neutralized
        "protein.pdb",  # Original fallback
    ]

    for pattern in patterns:
        matches = list(qprep_path.glob(pattern))
        if matches:
            if len(matches) > 1:
                raise ValueError(f"Multiple files found for pattern {pattern}: {matches}")
            return matches[0]

    raise FileNotFoundError(f"No protein file found in {qprep_dir}")


targets = [p for p in sorted(Path().glob("*/")) if p.is_dir()]

destpath = Path.cwd().parent / "perturbations"
for target in targets:
    ligands_sdf = sorted((target / "ligands/").glob("*.sdf"))
    ligands_pdb = sorted((target / "ligands/").glob("*.pdb"))
    ligands_lib = sorted((target / "ligands/").glob("*.lib"))
    ligands_prm = sorted((target / "ligands/").glob("*.prm"))
    lomap_file = target / "ligands/mapping.json"
    water_file = target / "protein/qprep/water.pdb"

    # Get the final processed protein file
    try:
        final_protein_file = get_final_protein_file(target / "protein/qprep/")
        print(f"Target {target.name}: Using {final_protein_file.name}")
    except (FileNotFoundError, ValueError) as e:
        print(f"Error for target {target.name}: {e}")
        continue

    perturbation_root = destpath / target.name
    if not perturbation_root.exists():
        perturbation_root.mkdir(parents=True, exist_ok=True)

    allfiles = (
        ligands_pdb
        + ligands_lib
        + ligands_prm
        + [
            lomap_file,
            final_protein_file,  # Use the final processed protein
            water_file,
        ]
    )

    molio = MoleculeIO(str(list(ligands_sdf)[0]))
    # molio.write_to_single_sdf(str(perturbation_root / "ligands.sdf"))  # reindex hydrogens

    Parallel(n_jobs=6, backend="threading")(delayed(copyfile)(src, perturbation_root) for src in allfiles)

    # Always rename the final protein file to protein.pdb in destination
    final_protein_dest = perturbation_root / final_protein_file.name
    if final_protein_dest.exists() and final_protein_dest.name != "protein.pdb":
        shutil.move(final_protein_dest, perturbation_root / "protein.pdb")
        print(f"Renamed {final_protein_file.name} to protein.pdb")

Target bace: Using protein_noHOH_neutralized.pdb
error for src: bace/ligands/mapping.json:
[Errno 2] No such file or directory: 'bace/ligands/mapping.json'
Renamed protein_noHOH_neutralized.pdb to protein.pdb
Target cdk2: Using protein_neutralized.pdb




error for src: cdk2/ligands/mapping.json:
[Errno 2] No such file or directory: 'cdk2/ligands/mapping.json'
Renamed protein_neutralized.pdb to protein.pdb
Target cdk8: Using protein_noHOH_neutralized.pdb




error for src: cdk8/ligands/mapping.json:
[Errno 2] No such file or directory: 'cdk8/ligands/mapping.json'
Renamed protein_noHOH_neutralized.pdb to protein.pdb
Target cmet: Using protein_noHOH_neutralized.pdb
error for src: cmet/ligands/mapping.json:
[Errno 2] No such file or directory: 'cmet/ligands/mapping.json'
Renamed protein_noHOH_neutralized.pdb to protein.pdb
Target eg5: Using protein_noHOH_neutralized.pdb
error for src: eg5/ligands/mapping.json:
[Errno 2] No such file or directory: 'eg5/ligands/mapping.json'
Renamed protein_noHOH_neutralized.pdb to protein.pdb
Target hif2a: Using protein_noHOH_neutralized.pdb
error for src: hif2a/ligands/mapping.json:
[Errno 2] No such file or directory: 'hif2a/ligands/mapping.json'
Renamed protein_noHOH_neutralized.pdb to protein.pdb
Target jnk1: Using protein_neutralized.pdb




error for src: jnk1/ligands/mapping.json:
[Errno 2] No such file or directory: 'jnk1/ligands/mapping.json'
Renamed protein_neutralized.pdb to protein.pdb
Target mcl1: Using protein_noHOH_neutralized.pdb




error for src: mcl1/ligands/mapping.json:
[Errno 2] No such file or directory: 'mcl1/ligands/mapping.json'
Renamed protein_noHOH_neutralized.pdb to protein.pdb
Target p38: Using protein_noHOH_neutralized.pdb




error for src: p38/ligands/mapping.json:
[Errno 2] No such file or directory: 'p38/ligands/mapping.json'
Renamed protein_noHOH_neutralized.pdb to protein.pdb
Target pfkfb3: Using protein_noHOH_cofactors_neutralized.pdb
error for src: pfkfb3/ligands/mapping.json:
[Errno 2] No such file or directory: 'pfkfb3/ligands/mapping.json'
Renamed protein_noHOH_cofactors_neutralized.pdb to protein.pdb
Target ptp1b: Using protein_noHOH_neutralized.pdb




error for src: ptp1b/ligands/mapping.json:
[Errno 2] No such file or directory: 'ptp1b/ligands/mapping.json'
Renamed protein_noHOH_neutralized.pdb to protein.pdb
Error for target pymol-sessions: No protein file found in pymol-sessions/protein/qprep
Target shp2: Using protein_noHOH_neutralized.pdb




error for src: shp2/ligands/mapping.json:
[Errno 2] No such file or directory: 'shp2/ligands/mapping.json'
Renamed protein_noHOH_neutralized.pdb to protein.pdb
Target syk: Using protein_noHOH_neutralized.pdb
error for src: syk/ligands/mapping.json:
[Errno 2] No such file or directory: 'syk/ligands/mapping.json'
Renamed protein_noHOH_neutralized.pdb to protein.pdb
Target thrombin: Using protein_noHOH_neutralized.pdb




error for src: thrombin/ligands/mapping.json:
[Errno 2] No such file or directory: 'thrombin/ligands/mapping.json'
Renamed protein_noHOH_neutralized.pdb to protein.pdb
Target tnks2: Using protein_noHOH_neutralized.pdb




error for src: tnks2/ligands/mapping.json:
[Errno 2] No such file or directory: 'tnks2/ligands/mapping.json'
Renamed protein_noHOH_neutralized.pdb to protein.pdb
Target tyk2: Using protein_neutralized.pdb




error for src: tyk2/ligands/mapping.json:
[Errno 2] No such file or directory: 'tyk2/ligands/mapping.json'
Renamed protein_neutralized.pdb to protein.pdb
