In [1]:
#Refine FFTDock poses via minimization in explicit protein
#Inputs:
    #cofactor/pdb_with_fad/tropb.pdb
    #dock/ligands/2.pdb
    #dock/ligands/2.str #CGenFF stream file
    #dock/poses/tropb_2_fftdock
    #toppar
#Outputs:
    #dock/poses/tropb_2_prot
    #dock/scores/tropb_2_prot.csv

In [2]:
import os 
import re
import pandas as pd
import numpy as np
os.environ['CHARMM_LIB_DIR'] = "/home/azamh/charmm-dev/lib"
os.chdir('/home/azamh/demo/seq_struct_func/dock')

# These are a subset of the pycharmm modules that were installed when
# pycharmm was installed in your python environment
import pycharmm
import pycharmm.generate as gen
import pycharmm.ic as ic
import pycharmm.coor as coor
import pycharmm.energy as energy
import pycharmm.dynamics as dyn
import pycharmm.nbonds as nbonds
import pycharmm.minimize as minimize
import pycharmm.crystal as crystal
import pycharmm.image as image
import pycharmm.psf as psf
import pycharmm.read as read
import pycharmm.write as write
import pycharmm.settings as settings
import pycharmm.cons_harm as cons_harm
import pycharmm.cons_fix as cons_fix
import pycharmm.select as select
import pycharmm.shake as shake
import pycharmm.settings as settings
import pycharmm.grid as grid
import pycharmm.charmm_file as charmm_file
from pycharmm.select_atoms import SelectAtoms
from pycharmm.lingo import charmm_script

from pycharmm.lib import charmm as libcharmm

[gollum153][[12515,1],0][../../../../../openmpi-3.1.2/opal/mca/btl/openib/btl_openib_component.c:1671:init_one_device] error obtaining device attributes for mlx5_0 errno says Protocol not supported
--------------------------------------------------------------------------

  Local host:   gollum153
  Local device: mlx5_0
--------------------------------------------------------------------------


In [3]:
#Arguments
protein = 'tropb'
ligand = '2'
toppardir = '../toppar'
liganddir = './ligands'
proteindir = '../cofactor/pdb_with_fad'
fftdockdir = f'poses/{protein}_{ligand}_fftdock'
dockdir = f'poses/{protein}_{ligand}_prot'
os.makedirs(dockdir, exist_ok=True)

In [4]:
## Read in the topology and parameter file 
settings.set_bomb_level(-1)
read.rtf(os.path.join(toppardir, 'top_all36_prot.rtf'))
read.rtf(os.path.join(toppardir,'top_all36_cgenff.rtf'), append = True)
read.rtf(os.path.join(toppardir,'probes.rtf'), append = True)
read.prm(os.path.join(toppardir, 'par_all36m_prot.prm'), flex = True)
read.prm(os.path.join(toppardir, 'par_all36_cgenff.prm'), append = True, flex = True)
read.prm(os.path.join(toppardir, 'probes.prm'), append = True, flex = True)
settings.set_bomb_level(0)
charmm_script(f'stream {os.path.join(liganddir, ligand)}.str')
charmm_script(f'stream {os.path.join(toppardir, "st2_fadh.str")}')

  
 CHARMM>     read rtf card -
 CHARMM>     name ../toppar/top_all36_prot.rtf
 VOPEN> Attempting to open::../toppar/top_all36_prot.rtf::
 MAINIO> Residue topology file being read from unit  91.
 TITLE> *>>>>>>>>CHARMM36 ALL-HYDROGEN TOPOLOGY FILE FOR PROTEINS <<<<<<
 TITLE> *>>>>> INCLUDES PHI, PSI CROSS TERM MAP (CMAP) CORRECTION <<<<<<<
 TITLE> *>>>>>>>>>>>>>>>>>>>>>>>>>> MAY 2011 <<<<<<<<<<<<<<<<<<<<<<<<<<<<
 TITLE> * ALL COMMENTS TO THE CHARMM WEB SITE: WWW.CHARMM.ORG
 TITLE> *             PARAMETER SET DISCUSSION FORUM
 TITLE> *
 VCLOSE: Closing unit   91 with status "KEEP"
  
 CHARMM>     
  
  
 CHARMM>     read rtf card -
 CHARMM>     name ../toppar/top_all36_cgenff.rtf -
 CHARMM>     append
 VOPEN> Attempting to open::../toppar/top_all36_cgenff.rtf::
 MAINIO> Residue topology file being read from unit  91.
 TITLE> *  --------------------------------------------------------------------------  *
 TITLE> *          CGENFF: TOPOLOGY FOR THE CHARMM GENERAL FORCE FIELD V. 4.6      

1

In [5]:
#Build ligand
ligand_pdb = os.path.join(liganddir, f'{ligand}.pdb')
read.sequence_pdb(ligand_pdb)
gen.new_segment(seg_name = "LIGA")
read.pdb(ligand_pdb, resid = True)

  
 CHARMM>     read sequence pdb -
 CHARMM>     name ./ligands/2.pdb
 VOPEN> Attempting to open::./ligands/2.pdb::
 MAINIO> Sequence information being read from unit  91.
 TITLE>  *

          RESIDUE SEQUENCE --     1 RESIDUES
          LIG 
 VCLOSE: Closing unit   91 with status "KEEP"
  
 CHARMM>     
  
 NO PATCHING WILL BE DONE ON THE FIRST RESIDUE
 NO PATCHING WILL BE DONE ON THE LAST  RESIDUE
 AUTGEN: Autogenerating specified angles and dihedrals.
 GENPSF> Segment   1 has been generated. Its identifier is LIGA.
 PSFSUM> PSF modified: NONBOND lists and IMAGE atoms cleared.
 PSFSUM> Summary of the structure file counters :
         Number of segments      =        1   Number of residues   =        1
         Number of atoms         =       32   Number of groups     =        1
         Number of bonds         =       32   Number of angles     =       55
         Number of dihedrals     =       72   Number of impropers  =        2
         Number of cross-terms   =        0   Numbe

In [6]:
#Setup nonbonds
my_nbonds = pycharmm.NonBondedScript(
    cutnb=12.0, ctonnb=10.0, ctofnb=10.0,
    eps=0.75,
    cdie=False,
    rdie=True,
    switch=True, vswitch=True)
# Implement these non-bonded parameters by "running" them.
my_nbonds.run()

  
 CHARMM>     nbonds cutnb 12.0 -
 CHARMM>     ctonnb 10.0 -
 CHARMM>     ctofnb 10.0 -
 CHARMM>     eps 0.75 -
 CHARMM>     rdie -
 CHARMM>     switch -
 CHARMM>     vswitch

 NONBOND OPTION FLAGS: 
     ELEC     VDW      ATOMs    RDIElec  SWITch   VATOm    VSWItch 
     BYGRoup  NOEXtnd  NOEWald 
 CUTNB  = 12.000 CTEXNB =999.000 CTONNB = 10.000 CTOFNB = 10.000
 CGONNB =  0.000 CGOFNB = 10.000
 WMIN   =  1.500 WRNMXD =  0.500 E14FAC =  1.000 EPS    =  0.750
 NBXMOD =      5
 There are        0 atom  pairs and        0 atom  exclusions.
 There are        0 group pairs and        0 group exclusions.
 GTNBCT> CUTNB,CTOFNB,CTONNB=      12.0      10.0      10.0

      ***** CUTNB,CTOFNB,CTONNB are not in correct order.
      ******************************************
      BOMLEV (  0) IS NOT REACHED. WRNLEV IS  5

 <MAKINB> with mode   5 found     87 exclusions and     69 interactions(1-4)
 <MAKGRP> found      0 group exclusions.
 Generating nonbond list with Exclusion mode = 5
 == PRIM

<pycharmm.script.NonBondedScript at 0x2b9b690f9d20>

In [7]:
#Minimize ligand in vacuum for initial energy
minimize.run_sd(nstep=1000, tolenr=1e-3, tolgrd=1e-4)


 NONBOND OPTION FLAGS: 
     ELEC     VDW      ATOMs    RDIElec  SWITch   VATOm    VSWItch 
     BYGRoup  NOEXtnd  NOEWald 
 CUTNB  = 12.000 CTEXNB =999.000 CTONNB = 10.000 CTOFNB = 10.000
 CGONNB =  0.000 CGOFNB = 10.000
 WMIN   =  1.500 WRNMXD =  0.500 E14FAC =  1.000 EPS    =  0.750
 NBXMOD =      5
 There are      409 atom  pairs and      156 atom  exclusions.
 There are        0 group pairs and        0 group exclusions.
 GTNBCT> CUTNB,CTOFNB,CTONNB=      12.0      10.0      10.0

      ***** CUTNB,CTOFNB,CTONNB are not in correct order.
      ******************************************
      BOMLEV (  0) IS NOT REACHED. WRNLEV IS  5

 Generating nonbond list with Exclusion mode = 5
 == PRIMARY == SPACE FOR      481 ATOM PAIRS AND        0 GROUP PAIRS

 General atom nonbond list generation found:
      409 ATOM PAIRS WERE FOUND FOR ATOM LIST
        1 GROUP PAIRS REQUIRED ATOM SEARCHES

 PRNHBD: CUToff Hydrogen Bond  distance =    0.5000   Angle =   90.0000
         CuT switching 

True

In [8]:
## Build protein
protein_psf = os.path.join(proteindir, f'{protein}_fad.psf')
protein_pdb = os.path.join(proteindir, f'{protein}_fad.pdb')
read.psf_card(protein_psf, append = True)
read.pdb(protein_pdb, resid = True)

  
 CHARMM>     read psf card -
 CHARMM>     name ../cofactor/pdb_with_fad/tropb_fad.psf -
 CHARMM>     append
 VOPEN> Attempting to open::../cofactor/pdb_with_fad/tropb_fad.psf::
 MAINIO> Protein structure file being appended from unit  91.
 psf_read_formatted: Reading PSF in the expanded format.
 TITLE>  * EXECUTING CHARMM SCRIPT FROM PYTHON
 TITLE>  *  DATE:    12/10/22     14:18:36      CREATED BY USER: azamh
 TITLE>  *
 PSFSUM> PSF modified: NONBOND lists and IMAGE atoms cleared.
 PSFSUM> Summary of the structure file counters :
         Number of segments      =        3   Number of residues   =      449
         Number of atoms         =     7072   Number of groups     =     2043
         Number of bonds         =     7162   Number of angles     =    12887
         Number of dihedrals     =    18839   Number of impropers  =     1277
         Number of cross-terms   =      447   Number of autogens   =        0
         Number of HB acceptors  =      652   Number of HB donors  =  

In [9]:
#Fix protein and cofactor atoms
cons_fix.setup(selection = ~SelectAtoms(seg_id='LIGA'))

True

In [10]:
#Get initial energy of system by translating ligand away from protein
charmm_script('coor tranlate xdir 400 ydir 400 zdir 400 select segid LIGA end')

1

  
 CHARMM>     coor tranlate xdir 400 ydir 400 zdir 400 select segid LIGA end
 SELRPN>     32 atoms have been selected out of   7072
 TRANSLATION VECTOR   400.000000  400.000000  400.000000
 SELECTED COORDINATES TRANSLATED IN THE MAIN SET.

  


In [11]:
#Get initial energy
def get_energy_df(pose_name):
    df = energy.get_energy().set_index('name').drop(columns = 'feature').transpose()
    df.index = [pose_name]
    df.index.name = 'pose'
    df.columns.name = 'term'
    return df
initial_energy_df = get_energy_df('initial')
print(initial_energy_df)


 NONBOND OPTION FLAGS: 
     ELEC     VDW      ATOMs    RDIElec  SWITch   VATOm    VSWItch 
     BYGRoup  NOEXtnd  NOEWald 
 CUTNB  = 12.000 CTEXNB =999.000 CTONNB = 10.000 CTOFNB = 10.000
 CGONNB =  0.000 CGOFNB = 10.000
 WMIN   =  1.500 WRNMXD =  0.500 E14FAC =  1.000 EPS    =  0.750
 NBXMOD =      5
 There are        0 atom  pairs and        0 atom  exclusions.
 There are        0 group pairs and        0 group exclusions.
 GTNBCT> CUTNB,CTOFNB,CTONNB=      12.0      10.0      10.0

      ***** CUTNB,CTOFNB,CTONNB are not in correct order.
      ******************************************
      BOMLEV (  0) IS NOT REACHED. WRNLEV IS  5

 <MAKINB> with mode   5 found  20049 exclusions and  18468 interactions(1-4)
 <MAKGRP> found   5960 group exclusions.
 Generating nonbond list with Exclusion mode = 5
 == PRIMARY == SPACE FOR  1279721 ATOM PAIRS AND        0 GROUP PAIRS
 NBONDA>>  Maximum group spatial extent (12A) exceeded.
   Size is       20.12 Angstroms and starts with atom:    6

In [12]:
#Minimize all fftdock poses
nsave = 500
pose_energy_dfs = [initial_energy_df]

#Hide large output
settings.set_verbosity(0)
settings.set_warn_level(-2)
for i in range(1, nsave + 1):
    print('minimizing fftdock pose', i)
    
    #Read FFTDock pose
    fftdock_pose = os.path.join(fftdockdir, f'{protein}_{ligand}_{i}.crd')
    read.pdb(fftdock_pose, resid=True)
    energy.show()
    
    #Perform minimization in explicit protein
    minimize.run_sd(nstep=50)
    minimize.run_abnr(nstep=1000, tolenr = 1e-3)

    #Get refined energy
    pose_energy_df = get_energy_df(i)
    pose_energy_dfs.append(pose_energy_df)

    #write pdb
    pose_pdb = os.path.join(dockdir, f'{protein}_{ligand}_{i}.pdb')
    write.coor_pdb(pose_pdb, sele = 'segid LIGA end')

settings.set_verbosity(5)
settings.set_warn_level(0)

minimizing fftdock pose 1
minimizing fftdock pose 2
minimizing fftdock pose 3
minimizing fftdock pose 4
minimizing fftdock pose 5
minimizing fftdock pose 6
minimizing fftdock pose 7
minimizing fftdock pose 8
minimizing fftdock pose 9
minimizing fftdock pose 10
minimizing fftdock pose 11
minimizing fftdock pose 12
minimizing fftdock pose 13
minimizing fftdock pose 14
minimizing fftdock pose 15
minimizing fftdock pose 16
minimizing fftdock pose 17
minimizing fftdock pose 18
minimizing fftdock pose 19
minimizing fftdock pose 20
minimizing fftdock pose 21
minimizing fftdock pose 22
minimizing fftdock pose 23
minimizing fftdock pose 24
minimizing fftdock pose 25
minimizing fftdock pose 26
minimizing fftdock pose 27
minimizing fftdock pose 28
minimizing fftdock pose 29
minimizing fftdock pose 30
minimizing fftdock pose 31
minimizing fftdock pose 32
minimizing fftdock pose 33
minimizing fftdock pose 34
minimizing fftdock pose 35
minimizing fftdock pose 36
minimizing fftdock pose 37
minimizing

-2

In [13]:
#Concat energy dataframes
energy_df = pd.concat(pose_energy_dfs).fillna(0)
energy_df

term,energy,deltae,grms,bonds,angles,ureyb,dihedrals,impropers,vdwaals,elec,hbonds,asp,user
pose,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
initial,-14.885048,-1.278977e-13,1.834568,2.273115,3.203081,0.216589,6.462700,0.009944,14.654264,-41.704742,0.0,0.0,0.0
1,-1.886479,-1.299857e+01,1.211485,2.349655,16.934136,0.549208,43.922863,0.015866,26.465952,-92.124158,0.0,0.0,0.0
2,65.925263,-6.781174e+01,3.086646,4.612438,24.877168,1.239871,47.814934,3.716743,73.742378,-90.078269,0.0,0.0,0.0
3,40.865165,2.506010e+01,1.855198,3.131496,23.512255,0.631715,60.018098,0.553488,29.889567,-76.871455,0.0,0.0,0.0
4,10.497213,3.036795e+01,1.847578,2.131081,9.631265,0.529235,33.382124,0.041067,33.523521,-68.741080,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,-6.061015,4.395703e+01,10.694298,6.316135,16.101484,0.844611,20.909595,0.099382,35.514951,-85.847173,0.0,0.0,0.0
497,22.620147,-2.868116e+01,1.461466,3.536595,10.058342,0.741245,38.886896,0.074635,36.511980,-67.189546,0.0,0.0,0.0
498,35.615896,-1.299575e+01,3.558728,3.710906,23.663197,1.236659,43.775409,0.065266,52.182042,-89.017583,0.0,0.0,0.0
499,3.979576,3.163632e+01,2.225404,2.267818,16.538047,0.619316,49.229269,0.031322,16.565605,-81.271801,0.0,0.0,0.0


In [14]:
#Get final-initial energy
delta_energy_df = energy_df.subtract(energy_df.loc['initial'].values, axis = 1)
delta_energy_df

term,energy,deltae,grms,bonds,angles,ureyb,dihedrals,impropers,vdwaals,elec,hbonds,asp,user
pose,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
initial,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0
1,12.998569,-12.998569,-0.623083,0.076540,13.731054,0.332619,37.460163,0.005922,11.811688,-50.419416,0.0,0.0,0.0
2,80.810311,-67.811742,1.252078,2.339323,21.674087,1.023282,41.352234,3.706799,59.088113,-48.373527,0.0,0.0,0.0
3,55.750213,25.060098,0.020630,0.858381,20.309174,0.415126,53.555397,0.543544,15.235303,-35.166713,0.0,0.0,0.0
4,25.382261,30.367952,0.013010,-0.142033,6.428183,0.312646,26.919424,0.031123,18.869256,-27.036338,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,8.824033,43.957035,8.859730,4.043020,12.898402,0.628022,14.446895,0.089438,20.860686,-44.142431,0.0,0.0,0.0
497,37.505195,-28.681161,-0.373102,1.263480,6.855261,0.524656,32.424195,0.064691,21.857716,-25.484804,0.0,0.0,0.0
498,50.500944,-12.995749,1.724160,1.437791,20.460116,1.020070,37.312709,0.055322,37.527778,-47.312841,0.0,0.0,0.0
499,18.864624,31.636321,0.390836,-0.005297,13.334965,0.402727,42.766569,0.021378,1.911341,-39.567059,0.0,0.0,0.0


In [15]:
#Save energies
scorefile = f'scores/{protein}_{ligand}_prot.csv'
delta_energy_df.to_csv(scorefile)