In [6]:
import numpy as np
import re

In [88]:
def write_pdb(atom_nbs, atom_types, res_types, res_nbs, chain_ids, X, Y, Z, pops=None, bfactors=None, chain_ids2=None, atom_labels=None, output="output.pdb"):
    if pops is None:
        pops = [1.0 for el in atom_nbs]
    if atom_labels is None:
        atom_labels = [el[1] for el in atom_types]
    with open(output, "w") as f:
        for i, el in enumerate(atom_nbs):
            if i > 0:
                if chain_ids[i-1] != chain_ids[i]:
                    # end of precedent protein
                    f.write("TER                                                                             \n")
            if bfactors is not None:
                #             ids  type rtyp chain rnb  X        Y       Z      pop   bfact          atomlabels
                f.write("ATOM{:>7} {:^5}{:^3} {}{:^8}{:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}{:>6.2f}           {}  \n".format(atom_nbs[i], atom_types[i], res_types[i], chain_ids[i], res_nbs[i], X[i], Y[i], Z[i], pops[i], bfactors[i], atom_labels[i]))
            else:
                f.write("ATOM{:>7} {:^5}{:^3} {}{:^8}{:>8.3f}{:>8.3f}{:>8.3f}{:>6.2f}                 {}  \n".format(atom_nbs[i], atom_types[i], res_types[i], chain_ids[i], res_nbs[i], X[i], Y[i], Z[i], pops[i], bfactors[i], atom_labels[i]))
        #f.write("TER                                                                             \n")
        f.write("END                                                                             ")


def read_pdb(pdbfile):
    with open(pdbfile, "r") as f:
        data = [el.rstrip("\n") for el in f.readlines() if el.split()[0] == 'ATOM']
    atom_nbs = [int(el[4:11]) for el in data]
    atom_types = [el[12:17].rstrip(" ").lstrip(" ") for el in data]
    res_types = [el[17:20].rstrip(" ").lstrip(" ") for el in data]
    chain_ids = [el[21] for el in data]
    res_nbs = [int(el[22:30]) for el in data]
    X = [float(el[30:38]) for el in data]
    Y = [float(el[38:46]) for el in data]
    Z = [float(el[46:54]) for el in data]
    pops = [float(el[54:60]) for el in data]
    bfactors = [float(el[60:66]) for el in data]
    atom_labels = [el[77] for el in data]
    return atom_nbs, atom_types, res_types, res_nbs, chain_ids, X, Y, Z, pops, bfactors, None, atom_labels

### Read the input pdb and extract the protein information

In [89]:
atom_nbs, atom_types, res_types, res_nbs, chain_ids, X, Y, Z, pops, bfactors, chain_ids2, atom_labels = read_pdb("4g6k.pdb")

### Shift the residues of chain L by 500 to avoid residue number overlaps in HADDOCK
##### Change the editing depending on your needs

In [90]:
for i, el in enumerate(chain_ids):
    if el == "L":
        res_nbs[i] += 500

### save a clean PDB

In [91]:
output = "test.pdb"
write_pdb(atom_nbs, atom_types, res_types, res_nbs, chain_ids, X, Y, Z, pops, bfactors, chain_ids2, atom_labels, output=output)