In [1]:
from crimm.Adaptors.PropKaAdaptors import PropKaProtonator
from crimm.Modeller import TopologyLoader, ParameterLoader
from crimm.Modeller.TopoFixer import fix_chain
from crimm import fetch_alphafold, fetch_rcsb



## Patch residues in chains based on propKa predicted protonation states
In this example, we are going to predict protonation states on titratable residues from an AlphaFold structure

In [2]:
# load topology definitions and parameters for protein
param = ParameterLoader('protein')
topo = TopologyLoader('protein')

In [3]:
# fetch AlphaFold structure for 1A8I chain A
structure = fetch_alphafold('P00489')
chain = structure.models[0].chains[0]
chain

NGLWidget()

<Polypeptide(L) id=A Residues=843>
  Description: Glycogen phosphorylase, muscle form


In [4]:
# generate topology and define terminal patches
topo.generate_chain_topology(chain, first_patch = 'ACE', last_patch = 'CT3', coerce=True)
param.fill_ic(topo)
param.apply(chain.topo_elements)
# build patches
built_atoms = fix_chain(chain)



## PropKa Protonation State Prediction
The `PropKaProtonator` uses propKa as a backend for prediction but is intended to use as a module for scripting.

The protonator requires 3 argments to instantiate: topology, parameter, and the pH value for residue protonation predictions.

In [5]:
protonator = PropKaProtonator(topo, param, pH=7.4)

Use `load_model()` method to load a model object, and the protonation state prediction on each protein chain in the model will be done automatically. 

The `apply_patches()` method is design to use with CHARMM/pyCHARMM. Since CHARMM residue topology parameter files only has a subset of titratable residues defined, namely

**Protonations**
1. ASP -> ASPP
2. GLU -> GLUP

**Deprotonations**
1. LYS -> LSN (neutral lysine)
2. CYS -> CYSD
3. SER -> SERD

**Special Cases**
1. HIS -> HSP Protonated histidine is actually a complete residue definition rather than a patched definition from CHARMM. 

The function `apply_patches()` will only modify or replace the residue topology definition from the above.

In [6]:
protonator.load_model(structure.models[0])
protonator.apply_patches()
# Currently, to reflect the changed residue definition on the structure
# fix_chain() has to be called explicitly
built_atoms = fix_chain(chain)

Unexpected number (11) of atoms in residue MET   1 A   in conformation 1A
Unexpected number (9) of atoms in residue PRO 843 A   in conformation 1A


The residue definition patched/replaced can be accessed in `protonator.patches`

In [7]:
protonator.patches

{'A': {124: 'GLUP', 297: 'GLUP', 342: 'HSP', 665: 'GLUP', 681: 'LSN'}}

Alternatively, the `TopologyLoader` object has the attribute `patched_defs` for all the patched residues including terminal patches

In [8]:
topo.patched_defs

{'MET_ACE': <Residue Definition name=MET code=M atoms=23 Patched with ACE>,
 'PRO_CT3': <Residue Definition name=PRO code=P atoms=20 Patched with CT3>,
 'GLU_GLUP': <Residue Definition name=GLU code=E atoms=16 Patched with GLUP>,
 'LYS_LSN': <Residue Definition name=LYS code=K atoms=21 Patched with LSN>}

## Report from propKa
All other prediction made from propKa can be shown with `protonator.report()` or as a dictionary from `protonator.to_dict()` or if you have pandas installed, `protonator.to_dataframe()`

In [9]:
df = protonator.to_dataframe()

In [10]:
df[df['resname'] == 'LYS'].head()

Unnamed: 0,chain_id,resseq,resname,pka,model_pka,buriedness
4,A,10,LYS,11.040684,10.5,0.0
6,A,12,LYS,10.353526,10.5,0.0
10,A,29,LYS,10.122493,10.5,0.0
11,A,30,LYS,11.319016,10.5,0.0
15,A,42,LYS,10.810584,10.5,0.0


Finally, to apply these patches in pyCHARMM, the `patch` function from `pycharmm.generate` should be used 

In [11]:
from crimm.Adaptors.pyCHARMMAdaptors import load_chain, load_topology, load_parameters
from pycharmm.generate import patch

In [12]:
load_topology(topo, append = False)
load_parameters(param, append = False)
load_chain(chain)

  
 CHARMM>     read rtf card -
 CHARMM>     name /tmp/tmpyif5u1ry
 VOPEN> Attempting to open::/tmp/tmpyif5u1ry::
 MAINIO> Residue topology file being read from unit  91.
 TITLE> *RTF LOADED FROM CRIMM
 TITLE> 36  2
 VCLOSE: Closing unit   91 with status "KEEP"
  
 CHARMM>     
  
  
 CHARMM>     read param card -
 CHARMM>     name /tmp/tmpyj_nr_ip -
 CHARMM>     flex
 VOPEN> Attempting to open::/tmp/tmpyj_nr_ip::

          PARAMETER FILE BEING READ FROM UNIT 91
 TITLE> *PRM LOADED FROM CRIMM
 TITLE> *>>>> CHARMM36 ALL-HYDROGEN PARAMETER FILE FOR PROTEINS <<<<<<<<<<
 TITLE> *>>>>> INCLUDES PHI, PSI CROSS TERM MAP (CMAP) CORRECTION <<<<<<<<
 TITLE> *>>>>>>>>>>>>>>>>>>>>>>>>>> JAN. 2016 <<<<<<<<<<<<<<<<<<<<<<<<<<<<
 TITLE> * ALL COMMENTS TO THE CHARMM WEB SITE: WWW.CHARMM.ORG
 TITLE> *             PARAMETER SET DISCUSSION FORUM
 TITLE> *
 PARMIO> NONBOND, HBOND lists and IMAGE atoms cleared.
 VCLOSE: Closing unit   91 with status "KEEP"
  
 CHARMM>     
  
  
 CHARMM>     read sequence 

In [13]:
for chain_id, patch_dict in protonator.patches.items():
    segid = f'PRO{chain_id.upper()}'
    for resid, patch_name in patch_dict.items():
        if patch_name == 'HSP':
            # 'HSP' is not a patch but an actual residue definition
            # by using protonator.apply_patches() it is already redifined and
            # when load_chain() is used to load in pyCHARMM
            # it would recognize the residue definition
            continue
        patch(patch_name, ' '.join((segid, str(resid))))

  
 CHARMM>     patch GLUP PROA 124
 ATOM  PROA GLU  124  HE2  ADDED.

 Message from MAPIC: Atom numbers are changed.
 AUTGEN: Autogenerating specified angles and dihedrals.
 AUTOGEN: 24990 angles are removed before regeneration for selected atoms.
 AUTOGEN: 36625 dihedrals are removed before regeneration for selected atoms.
 PATCH: Check angles and dihedrals autogenerated.
 PSFSUM> PSF modified: NONBOND lists and IMAGE atoms cleared.
 PSFSUM> Summary of the structure file counters :
         Number of segments      =        1   Number of residues   =      843
         Number of atoms         =    13686   Number of groups     =     4084
         Number of bonds         =    13841   Number of angles     =    24991
         Number of dihedrals     =    36627   Number of impropers  =     2425
         Number of cross-terms   =      843   Number of autogens   =        0
         Number of HB acceptors  =     1270   Number of HB donors  =     1565
         Number of NB exclusions =        0

After appling the patches, the number of atoms and topology elements should match in pyCHARMM and crimm.

In [14]:
print(f"atoms={len(list(chain.get_atoms()))}")
print(chain.topo_elements)

atoms=13687
<TopologyElementContainer for <Polypeptide(L) id=A Residues=843> with bonds=13842, angles=24990, dihedrals=36628, impropers=2425, cmap=0>
