# MDAnalysis of the AF3 model of the Dimer capsid bound to the 8PMX Fab domain

In [1]:
import numpy as np
import MDAnalysis as mda
from MDAnalysis.analysis import distances
from Bio import PDB
from Bio.PDB.PDBIO import PDBIO
from Bio.PDB.StructureBuilder import StructureBuilder

### Convert the cif file into pdb

In [2]:
parser = PDB.MMCIFParser()
structure = parser.get_structure(structure_id="test", filename="AF_model.cif")

io=PDBIO()
io.set_structure(structure)
io.save("AF3_model1.pdb")

### Read the pdb in MDAnalysis

In [3]:
u = mda.Universe("AF3_model1.pdb", "AF3_model1.pdb")

### Select the atoms for distance calculation
##### We will select carbon alphas of the capsid and of the antibody

In [10]:
# get all the atoms
capsid_atoms = u.select_atoms('all')

# get the CA atoms from the capsid and from the antibody Fab domain. You must edit the chains that must be selected if necessary
capsid_CA = u.select_atoms('name CA and chainID A B')
antibody_CA = u.select_atoms('name CA and chainID C D')

print("capsid number of residues: {}".format(len([el.resid for el in capsid_CA])))
print("antibody number of residues: {}".format(len([el.resid for el in antibody_CA])))

# get more detailed info (useful later)

capsid_chainIDs = [el.chainID for el in capsid_CA]
capsid_res_nbs = [el.resid for el in capsid_CA]
capsid_resnames_dict = {el.resid: el.resname for el in capsid_CA}

# get the list of the capsid chains
capsid_chains = list(set(capsid_chainIDs))

# Make the dictionnary for building the PDB file with the contacts in the bfactor column (in the visualization PDB making last)
capsid_atoms_dict = {chain: {resnb: [el for el in capsid_atoms if el.chainID == chain and el.resid == resnb] for resnb in capsid_res_nbs} for chain in capsid_chains}

### Calculate the distance matrix between these two selections

In [6]:
distance_matrix = distances.distance_array(capsid_CA, antibody_CA)
print(distance_matrix.shape)

### set a reasonable distance cutoff for contact:

In [7]:
distance_cutoff = 8.0

### Identify the contacts

In [8]:
contacts = np.zeros((len(capsid_res_nbs)))
for i, el in enumerate(capsid_res_nbs):
    for j in range(distance_matrix.shape[1]):
        if distance_matrix[i][j] < distance_cutoff:
            contacts[i] = 1
            break

# We now have contact list in the same order as the other lists.
# Make a dictionnary associating the residue number with the presence of contacts or not: (resnb, chain) -> contact
contact_dict = {(el, capsid_chainIDs[i]): contacts[i] for i, el in enumerate(capsid_res_nbs)}


print("{} residues were found at the interation surface with the antibody".format(np.sum(contacts)))

### save the contact list in the bfactor column of a pdb file for visualisation:

In [9]:
output_name = "AF_model1_withcontacts.pdb"

out_structure_builder = StructureBuilder()
out_structure_builder.init_structure(structure_id="out")
out_structure_builder.init_model(model_id="test")
for chain in capsid_atoms_dict.keys():
    out_structure_builder.init_chain(chain_id=chain)
    out_structure_builder.init_seg(segid=chain)
    for resnb in capsid_atoms_dict[chain].keys():
        out_structure_builder.init_residue(resname=capsid_resnames_dict[resnb], field=" ", resseq=resnb, icode=' ')
        for el in capsid_atoms_dict[chain][resnb]:
            b_factor = contact_dict[(resnb, chain)]
            out_structure_builder.init_atom(name=el.name, coord=el.position, b_factor=b_factor, occupancy=1.0, altloc=' ', fullname=el.name, element=el.name[0])

out_structure = out_structure_builder.get_structure()

io=PDBIO()
io.set_structure(out_structure)
io.save(output_name)