In [23]:
import MDAnalysis as mda
from pathlib import Path

In [24]:
from glob import glob

In [25]:
pdbs = glob("./input_data//*.pdb")

In [26]:
common_co_crystals = ["GOL", "IMD", "SO4", "EDO", "PO4", "DMS", "CIT"]

exclude_co_crystals = " ".join([f"and not resname {c}" for c in common_co_crystals ])

In [27]:
for pdb in pdbs:
    u = mda.Universe(pdb)
    print(f"pdb: {pdb}")
    # print(u.atoms.segments)
    others = u.select_atoms(f"chainid A and not protein and not water {exclude_co_crystals}")
    protein_A = u.select_atoms("protein and chainid A")
    print(set(others.resnames))
    combined = protein_A + others
    stem = Path(pdb).stem
    combined.write(f"./processed/{stem}_processed.pdb")

pdb: ./input_data/8ewr.pdb
{'WZN', 'HEM'}
pdb: ./input_data/7ufd.pdb
{'HEM', 'T0K'}
pdb: ./input_data/7kvp.pdb
{'HEM', 'X6J'}
pdb: ./input_data/6da8.pdb
{'G0J', 'HEM'}
pdb: ./input_data/3tjs.pdb
{'HEM', 'D0R'}
pdb: ./input_data/6bd5.pdb
{'D7M', 'HEM'}
pdb: ./input_data/7kvh.pdb
{'X5Y', 'HEM'}
pdb: ./input_data/6oob.pdb
{'MWS', 'HEM'}
pdb: ./input_data/6da5.pdb
{'HEM', 'G1J'}
pdb: ./input_data/7kvq.pdb
{'HEM', 'X7D'}
pdb: ./input_data/5vcc.pdb
{'HEM'}
pdb: ./input_data/6dal.pdb
{'G1D', 'HEM'}
pdb: ./input_data/3nxu.pdb
{'RIT', 'HEM'}
pdb: ./input_data/7ufa.pdb
{'NI6', 'HEM'}
pdb: ./input_data/5vcg.pdb
{'08Y', 'HEM'}
pdb: ./input_data/6unj.pdb
{'QDY', 'HEM'}
pdb: ./input_data/5vc0.pdb
{'RIT', 'HEM'}
pdb: ./input_data/8ewp.pdb
{'X2Q', 'HEM'}
pdb: ./input_data/7ufe.pdb
{'NXR', 'HEM'}
pdb: ./input_data/6unh.pdb
{'QEA', 'HEM'}
pdb: ./input_data/7kvo.pdb
{'X6S', 'HEM'}
pdb: ./input_data/6unl.pdb
{'QDJ', 'HEM'}
pdb: ./input_data/4k9u.pdb
{'HEM', '5AW'}
pdb: ./input_data/6bd8.pdb
{'D81', 'HEM'}

In [7]:
# ALIGN with pymol and write each to seperate file

In [28]:
proc = glob("./processed//*aligned*.pdb")

In [30]:
for pdb in proc:
    u = mda.Universe(pdb)
    print(f"pdb: {pdb}")
    # print(u.atoms.segments)
    protein_A = u.select_atoms("protein and chainid A or resname HEM")
    lig = u.select_atoms(f"chainid A and not protein and not resname HEM")
    if len(lig) == 0:
        continue
    lig.residues.resnames = ["LIG"]*len(lig.residues)
    combined = protein_A + lig
    stem = Path(pdb).stem
    combined.write(f"./processed/{stem}_final_renamed.pdb")

pdb: ./processed/7kvh_processed_aligned.pdb
pdb: ./processed/7kvi_processed_aligned.pdb
pdb: ./processed/7ksa_processed_aligned.pdb
pdb: ./processed/7ufb_processed_aligned.pdb
pdb: ./processed/6ung_processed_aligned.pdb
pdb: ./processed/8ewd_processed_aligned.pdb
pdb: ./processed/5vcg_processed_aligned.pdb
pdb: ./processed/7ufa_processed_aligned.pdb
pdb: ./processed/6bd8_processed_aligned.pdb
pdb: ./processed/7kvn_processed_aligned.pdb
pdb: ./processed/6dag_processed_aligned.pdb
pdb: ./processed/6dab_processed_aligned.pdb
pdb: ./processed/7kvq_processed_aligned.pdb
pdb: ./processed/6unl_processed_aligned.pdb
pdb: ./processed/4k9w_processed_aligned.pdb
pdb: ./processed/7kvm_processed_aligned.pdb
pdb: ./processed/7uf9_processed_aligned.pdb
pdb: ./processed/6daa_processed_aligned.pdb
pdb: ./processed/6bdm_processed_aligned.pdb
pdb: ./processed/8ewl_processed_aligned.pdb
pdb: ./processed/7kvk_processed_aligned.pdb
pdb: ./processed/8dyc_processed_aligned.pdb
pdb: ./processed/6unk_processed_