# Alanine dipeptide pyCHARMM example


## Build an alanine-dipeptide molecule minimize, visualize
## Solvate in a box of water using MMTSB Toolset

### Note that the environment variable CHARMM_LIB_DIR must be defined. (Note: CHARMM_LIB_DIR should point to `<charmm_install_path>/lib`)

# pyCHARMM header files plus some of the necessary functionality

In [None]:
# This script provides a simple example of building an
# alanine dipeptide and minimizing the structure and then
# calculating the energy to illustrate functionality to be
# exposed in pyCHARMM.
#  copyright C.L. Brooks III, June 1, 2022

# These are general python modules needed for this  tutorial
import os
import sys

# This should specify the path to the install pyCHARMM library
pyCHARMM_LIB = '/Users/brookscl/charmm/c47-dev-release/install-pycharmm-nompi'
if os.getenv('CHARMM_LIB_DIR') == None:
    os.environ['CHARMM_LIB_DIR'] = pyCHARMM_LIB
    print(os.getenv('CHARMM_LIB_DIR'))

# These are a subset of the pycharmm modules that were installed when
# pycharmm was installed in your python environment
import pycharmm
import pycharmm.generate as gen
import pycharmm.ic as ic
import pycharmm.coor as coor
import pycharmm.energy as energy
import pycharmm.dynamics as dyn
import pycharmm.nbonds as nbonds
import pycharmm.minimize as minimize
import pycharmm.crystal as crystal
import pycharmm.image as image
import pycharmm.psf as psf
import pycharmm.read as read
import pycharmm.write as write
import pycharmm.settings as settings
import pycharmm.cons_harm as cons_harm
import pycharmm.cons_fix as cons_fix
import pycharmm.select as select
import pycharmm.shake as shake

from pycharmm.lib import charmm as libcharmm


##  First read in the topology and parameter files
### The topology file contains the information pertinent to building molecular systems, either as independent molecules or as "residues" linked together to form more complex structures, i.e., proteins and nucleic acids. The parameter file contains the parameters that provide the information for the force field based calculations. CHARMM has topology and parameter files that are non-overlapping representations of different "regions" of chemical space, e.g., proteins ("_prot"), nucleic acids ("_na"), ethers, lipids, small drug-like molecules ("_cgenff"), etc. Here we will utilize the protein, water and ion topology and parameter files. These files are required to "generate" (see below) a psf that is necessary preceeding any molecular mechanics calculation.

In [None]:
# Read in the topology (rtf) and parameter file (prm) for proteins
# equivalent to the CHARMM scripting command: read rtf card name toppar/top_all36_prot.rtf
read.rtf('../toppar/top_all36_prot.rtf')
# equivalent to the CHARMM scripting command: read param card flexible name toppar/par_all36m_prot.prm
read.prm('../toppar/par_all36m_prot.prm', flex=True)

# stream in the water/ions parameter using the pycharmm.lingo module
# equivalent to the CHARMM scripting command: stream toppar/toppar_water_ions.str
pycharmm.lingo.charmm_script('stream ../toppar/toppar_water_ions.str')
# end toppar/toppar_water_ions.str


## Specify the sequence of residues, generate them and build them.
### To do this we need to input the seqnece of "residues" we wish to do calculations on, generate the particlar sequence with any blocking groups needed, here we will terminate the alanine peptide with an acetyl N-terminal "patch" and an N-methyl amide C-terminal "patch". Note we will call this "segment" of structure ADP and during the generation of the psf data structures in CHARMM we will setup the ic (internal coordinate) tables that will allow us to build the structure from the known bond distances, angles and dihedrals after seeding the building by specifying two atoms along a line and the third to define the plane of the molecule.

In [None]:
# read in the sequence of the protein to be generated
# only useful for the same residue
# equivalent to the CHARMM scripting command:
# read sequence 1
# ala
read.sequence_string('ALA')

# equivalent to the CHARMM scripting command: generate ADP first ACE last CT3 setup
gen.new_segment(seg_name='ADP', first_patch='ACE', last_patch='CT3', setup_ic=True)

# equivalent to the CHARMM scripting command: ic param
ic.prm_fill(replace_all=False)
# equivalent to the CHARMM scripting command: ic seed 1 CAY 1 CY 1 N
ic.seed(res1=1, atom1='CAY', res2=1, atom2='CY', res3=1, atom3='N')
# equivalent to the CHARMM scripting command: ic build
ic.build()

# The coor orie command is useful to expose since it allows one to
# orient the system in preparation for other calculations
# equivalent to the CHARMM scripting command: coor orient
coor.orient(by_rms=False,by_mass=False,by_noro=False)
# equivalent to the CHARMM scripting command: print coor
coor.show()
# If pdb directory doesn't alrady exist make it here.
if not os.path.isdir('pdb'): os.system('mkdir pdb')
# equivalent to the CHARMM scripting command: write coor pdb name pdb/initial.pdb
write.coor_pdb('pdb/initial.pdb')

## We will use nglview graphics to show the structures. This function remaps the default element colors based so we can use different coloring schemes for the elements in different images.

In [None]:
def shift_colors():
    '''Create a random color scheme for elements using nglview ColorRegistry'''
    import random
    import nglview as nv
    from nglview.color import ColormakerRegistry
    # Default element colors from nglview
    ElementColors = {
      'H': 0xFFFFFF,
      'HE': 0xD9FFFF,
      'LI': 0xCC80FF,
      'BE': 0xC2FF00,
      'B': 0xFFB5B5,
      'C': 0x909090,
      'N': 0x3050F8,
      'O': 0xFF0D0D,
      'F': 0x90E050,
      'NE': 0xB3E3F5,
      'NA': 0xAB5CF2,
      'MG': 0x8AFF00,
      'AL': 0xBFA6A6,
      'SI': 0xF0C8A0,
      'P': 0xFF8000,
      'S': 0xFFFF30,
      'CL': 0x1FF01F,
      'AR': 0x80D1E3,
      'K': 0x8F40D4,
      'CA': 0x3DFF00,
      'SC': 0xE6E6E6,
      'TI': 0xBFC2C7,
      'V': 0xA6A6AB,
      'CR': 0x8A99C7,
      'MN': 0x9C7AC7,
      'FE': 0xE06633,
      'CO': 0xF090A0,
      'NI': 0x50D050,
      'CU': 0xC88033,
      'ZN': 0x7D80B0,
      'GA': 0xC28F8F,
      'GE': 0x668F8F,
      'AS': 0xBD80E3,
      'SE': 0xFFA100,
      'BR': 0xA62929,
      'KR': 0x5CB8D1,
      'RB': 0x702EB0,
      'SR': 0x00FF00,
      'Y': 0x94FFFF,
      'ZR': 0x94E0E0,
      'NB': 0x73C2C9,
      'MO': 0x54B5B5,
      'TC': 0x3B9E9E,
      'RU': 0x248F8F,
      'RH': 0x0A7D8C,
      'PD': 0x006985,
      'AG': 0xC0C0C0,
      'CD': 0xFFD98F,
      'IN': 0xA67573,
      'SN': 0x668080,
      'SB': 0x9E63B5,
      'TE': 0xD47A00,
      'I': 0x940094,
      'XE': 0x940094,
      'CS': 0x57178F,
      'BA': 0x00C900,
      'LA': 0x70D4FF,
      'CE': 0xFFFFC7,
      'PR': 0xD9FFC7,
      'ND': 0xC7FFC7,
      'PM': 0xA3FFC7,
      'SM': 0x8FFFC7,
      'EU': 0x61FFC7,
      'GD': 0x45FFC7,
      'TB': 0x30FFC7,
      'DY': 0x1FFFC7,
      'HO': 0x00FF9C,
      'ER': 0x00E675,
      'TM': 0x00D452,
      'YB': 0x00BF38,
      'LU': 0x00AB24,
      'HF': 0x4DC2FF,
      'TA': 0x4DA6FF,
      'W': 0x2194D6,
      'RE': 0x267DAB,
      'OS': 0x266696,
      'IR': 0x175487,
      'PT': 0xD0D0E0,
      'AU': 0xFFD123,
      'HG': 0xB8B8D0,
      'TL': 0xA6544D,
      'PB': 0x575961,
      'BI': 0x9E4FB5,
      'PO': 0xAB5C00,
      'AT': 0x754F45,
      'RN': 0x428296,
      'FR': 0x420066,
      'RA': 0x007D00,
      'AC': 0x70ABFA,
      'TH': 0x00BAFF,
      'PA': 0x00A1FF,
      'U': 0x008FFF,
      'NP': 0x0080FF,
      'PU': 0x006BFF,
      'AM': 0x545CF2,
      'CM': 0x785CE3,
      'BK': 0x8A4FE3,
      'CF': 0xA136D4,
      'ES': 0xB31FD4,
      'FM': 0xB31FBA,
      'MD': 0xB30DA6,
      'NO': 0xBD0D87,
      'LR': 0xC70066,
      'RF': 0xCC0059,
      'DB': 0xD1004F,
      'SG': 0xD90045,
      'BH': 0xE00038,
      'HS': 0xE6002E,
      'MT': 0xEB0026,
      'DS': 0xFFFFFF,
      'RG': 0xFFFFFF,
      'CN': 0xFFFFFF,
      'UUT': 0xFFFFFF,
      'FL': 0xFFFFFF,
      'UUP': 0xFFFFFF,
      'LV': 0xFFFFFF,
      'UUH': 0xFFFFFF,
      'D': 0xFFFFC0,
      'T': 0xFFFFA0
    }
    element_shifted = []
    for k in ElementColors:
        element_shifted.append(['#'+hex(random.randrange(0,2**24))[2:],'_'+str(k)])
    cs = ColormakerRegistry
    cs.add_selection_scheme('element-shifted',element_shifted)
    return

In [None]:
# Use nglview to see this initial structure
import nglview as nv
view = nv.NGLWidget()
view.add_component('pdb/initial.pdb')
view.clear_representations()
view.add_representation('licorice',selection='not (water or ion)',color_scheme='element')
view.center()
view

## Now lets minimize the structure and view the minimized structure in pymol
### To do this we will first specify the non-bonded parameters we want to use for these molecular mechanics calculations. Note we are spcifying that we will use the fswitch and vfswitch force-switching functions to truncate the electrostatic and vdW interactions. The switching will occur between ctonnb and ctofnb. We will build a non-bonded list to process pairs of interacting atoms using a cutoff radius of cutnb. We will use atom-based electrostatic (atom) and vdW (vatom) non-bonded list generaton schemes and will repreent the Coulomb electrostatic interactions with a constant dielectric constant (versus an r-dependent dielectric constant) of 1 (cdie=1.0). There are multiple ways of inputting the nonbonded parameters, here we create a python object my_nbonds which has a .run() attribute. 
### Next we will use CHARMM's abnr mminimizer to minimize the structure we just build using the non-bonded methods we just specified. The minimizer is set-up to run 1000 steps of minimization unless the energy change falls below 0.001 or the gradient of the energy falls below 0.001, in which case minimization will stop. Finally we print out the final energy.

In [None]:
# Specify nonbonded python object called my_nbonds - this just sets it up
# equivalant CHARMM scripting command: nbonds cutnb 18 ctonnb 13 ctofnb 17 cdie eps 1 atom vatom fswitch vfswitch
my_nbonds = pycharmm.NonBondedScript(
    cutnb=18.0, ctonnb=13.0, ctofnb=17.0,
    eps=1.0,
    cdie=True,
    atom=True, vatom=True,
    fswitch=True, vfswitch=True)

# Implement these non-bonded parameters by "running" them.
my_nbonds.run()

# equivalent CHARMM scripting command: minimize abnr nstep 1000 tole 1e-3 tolgr 1e-3
minimize.run_abnr(nstep=1000, tolenr=1e-3, tolgrd=1e-3)
# equivalent CHARMM scripting command: energy
energy.show()

## Sideline: We can grab the current coordinates from CHARMM, manipulate them and put them back into CHARMM if we wish. pyCHARMM provides the coor.get_positions() and coor.set_positions() commands to enable this. See the manipulations below (which only add 0 to the coordinates) and then give them back to CHARMM

In [None]:
# equivalent CHARMM scripting command: coor show
coor.show()
# equivalent CHARMM scripting command: coor stat
# current is a python dictionary containing the output from coor stat
current = coor.stat()
print('==================> xave {:.2f} yave {:.2f} zave {:.2f}'.format(current['xave'],current['yave'],current['zave']))
print(current)
# xyz will be a pandas dataframe
xyz = coor.get_positions()
print(xyz.info)
xyz += 1
coor.set_positions(xyz)
displaced = coor.stat()
print('==================> xave {:.2f} yave {:.2f} zave {:.2f}'.format(displaced['xave'],displaced['yave'],displaced['zave']))
print(displaced)
# xyz will be a pandas dataframe
xyz = coor.get_positions()
xyz += -1
coor.set_positions(xyz)
returned = coor.stat()
print('==================> xave {:.2f} yave {:.2f} zave {:.2f}'.format(returned['xave'],returned['yave'],returned['zave']))
# Save the minimizaed coordinates
write.coor_pdb('pdb/minimized.pdb')

In [None]:
view = nv.NGLWidget()
view.add_component('pdb/initial.pdb')
view.clear_representations()
view.add_representation('licorice',selection='not (water or ion)',color_scheme='element')
shift_colors()
view.add_component('pdb/minimized.pdb')
view.clear_representations(component=1)
view.add_representation('licorice',selection='not (water or ion)',color='element-shifted',component=1)
view.center()
view

## The code below just illustrates some of the variables one can access from psf and parameters as well as some of the selection syntax you can use.

In [None]:
# Explore some of the pyCHARMM selection syntax
my_atoms = pycharmm.SelectAtoms(seg_id='ADP')
# my_atoms is a pyCHARMM object, but can be converted to a bool list with the list
# command as shown here.
flags = list(my_atoms)
print('DEBUG: SelectAtoms res_name test')
print(flags)

my_atoms = pycharmm.SelectAtoms(chem_type='HA3')
flags = list(my_atoms)
print('DEBUG: SelectAtoms chem_type test')
print(flags)

# Examine access to some of the CHARMM parameter and psf data

# number of atom type codes in topology/parameter
natc = pycharmm.param.get_natc()
# list of atom type codes from parameter/topology files
atc = pycharmm.param.get_atc()
print('DEBUG: list of atom type codes from top/par files')
for i in range(natc):
    print('ATC {} = {}'.format(i,atc[i]))
print('DEBUG: end atom type codes')

# Get the atom type codes for atoms in the psf
n_atoms = psf.get_natom()
iac = psf.get_iac()
print('DEBUG: begin chem types ' + str(natc))
for i in range(n_atoms):
    print('    ', i, ' : ', atc[iac[i]])
print('DEBUG: end chem types')

# Get the atom IUPAC names from PSF
print('DEBUG: begin atom types')
atypes = psf.get_atype()
for i, atom_type in enumerate(atypes):
    print('    ', i, ' : ', atom_type)
print('DEBUG: end atom types')

# Select only atoms with specific IUPAC
my_atoms = pycharmm.SelectAtoms(atom_type='HA')
flags = list(my_atoms)
print('DEBUG: SelectAtoms atom_type test')
print(flags)

# Same thing but only for residue 1
my_atoms = pycharmm.SelectAtoms().by_res_and_type('ADP', '1', 'HA')
flags = list(my_atoms)
for i, flag in enumerate(flags):
    if flag:
        print('    atom i ', i, ' : ', flag)


## Now we will save the coordinates and the CHARMM psf file (a file that contains the key information about the conectivity and related properties for the molecule we just built), then delete all of the atoms in our system and start again by reading in the psf and coordinates. This is an alternative if you already have generated a psf and saved coordinates elsewhere.

In [None]:
# write coor pdb name pdb/adp.pdb
write.coor_pdb('pdb/adp.pdb')
# write psf card name pdb/adp.psf
write.psf_card('pdb/adp.psf')

# delete atom select all end
psf.delete_atoms(pycharmm.SelectAtoms().all_atoms())

# read psf card name pdb/adp.psf
read.psf_card('pdb/adp.psf')

# read coor pdb name pdb/adp.pdb resid
adp_pdb_file = 'pdb/adp.pdb'
read.pdb(adp_pdb_file, resid=True)

# Solvate the peptide in TIP3P water
## In the following we will use the MMTSB toolset to solvate the blocked alanine residue in a cubic box of TIP3P water using the convpdb.pl commands noted below.

In [None]:
# CHARMM scripting command: system "convpdb.pl -solvate -cutoff 10 -cubic -out charmm22 pdb/adp.pdb
# | convpdb.pl -segnames adp ala -nsel TIP3 > pdb/wt00.pdb"
solvate_command = 'convpdb.pl -solvate -cutoff 10 -cubic -out charmm22 pdb/adp.pdb | '
solvate_command +='convpdb.pl -segnames -nsel TIP3 > pdb/wt00.pdb'
# run the command as a system subprocess
os.system(solvate_command)

## Look at the solvated system, does it look ok? Do you see any non-bonded contacts?

In [None]:
view = nv.NGLWidget()
view.add_component('pdb/adp+wat_min.pdb')
view.clear_representations()
view.add_representation('spacefill',selection='not (water or ion)',color_scheme='element')
view.add_representation('licorice',selection='water',color_scheme='element')
view.center()
view

## Let's minimize the structures and see what that looks like.
## Generate water segment and minimize the system, peptide + solvent, finally save the psf and coordinates
## Note that in "conditioning" the system I first fix the peptide atoms and then minimize the water. I am using the steepest descents algorithm because it works best for large systems and/or when you may have bad contacts.

In [None]:
# Here is an alternative means of reading a sequence
# read sequ pdb name pdb/wt00.pdb
read.sequence_pdb('pdb/wt00.pdb')

# Another example of the generate command
# generate wt00 noangle nodihedral
gen.new_segment('WT00', angle=False, dihedral=False)

# read coor pdb name pdb/adp.pdb resid
read.pdb('pdb/wt00.pdb', resid=True)

# get the coor statistics to construct boxlengths
# coor stat
stats = coor.stat()

# boxsize
xsize = stats['xmax'] - stats['xmin']
ysize = stats['ymax'] - stats['ymin']
zsize = stats['zmax'] - stats['zmin']
boxsize = max(xsize, ysize, zsize)

# half box size
boxhalf = boxsize / 2.0

# CHARMM scripting: crystal define cubic @boxsize @boxsize @boxsize 90 90 90
crystal.define_cubic(boxsize)
# CHARMM scripting: crystal build cutoff @boxhalf noper 0
crystal.build(boxhalf)

# Turn on image centering - bysegment for peptide, by residue for solvent
# CHARMM scripting: image byseg xcen 0 ycen 0 zcen 0 select segid adp end
image.setup_segment(0.0, 0.0, 0.0, 'ADP')
# CHARMM scripting: image byres xcen 0 ycen 0 zcen 0 select resname tip3 end
image.setup_residue(0.0, 0.0, 0.0, 'TIP3')

# Now specify nonbonded cutoffs for solvated box
cutnb = min(boxhalf,12)
cutim = cutnb
ctofnb = cutnb - 1.0
ctonnb = cutnb - 3.0

# Another nbonds example
# CHARMM scripting: nbonds cutnb @cutnb cutim @cutim ctofnb @ctofnb ctonnb @ctonnb -
#        inbfrq -1 imgfrq -1
pycharmm.NonBondedScript(
    cutnb=cutnb, cutim=cutim, ctonnb=ctonnb, ctofnb=ctofnb,
    eps=1.0,
    cdie=True,
    atom=True, vatom=True,
    fswitch=True, vfswitch=True,
    inbfrq=-1, imgfrq=-1).run()

# Fix the peptide and minimize the solvent to "fit"
# CHARMM scripting: cons fix select segid adp end
cons_fix.setup(pycharmm.SelectAtoms(seg_id='ADP'))

# Minimize the solvent positions with periodic boundary conditions using steepest descents
# CHARMM scripting: mini sd nstep 200 tole 1e-3 tolgrd 1e-3
minimize.run_sd(nstep=200, tolenr=1e-3, tolgrd=1e-3)

# Turn off fixed atoms
# CHARMM scripting: cons fix select none end
cons_fix.turn_off()

# Write the psf and coordinates for the solvated peptide
# write psf card name pdb/adp+wat.psf
write.psf_card('pdb/adp+wat.psf')
# write coor pdb name pdb/adp+wat_min.pdb
write.coor_pdb('pdb/adp+wat_min.pdb')

## Finally, let's visualize the system and see what things look like!

In [None]:
view = nv.NGLWidget()
view.add_component('pdb/adp+wat_min.pdb')
view.clear_representations()
view.add_representation('spacefill',selection='not (water or ion)',color_scheme='element')
view.add_representation('licorice',selection='water',color_scheme='element')
view.center()
view

# This is the end of this tutorial example. 
## You should have learned how to 1) build a peptide using the ic (internal coordinate) commands in CHARMM to construct initial coordinates for the system; 2) to minimize the system and examine the results of the minimization; 3) to use the MMTSB Toolset to solvate the system (with convpdb.pl); 4) to prepare the solvated system for further calculations by "conditioning" the solvent after the solvent overlay.
## As an exercise, try modifying this tutorial to build the tri-peptide of sequence A-D-A. Use the same blocking groups, call the sequence TRIP (for tri-peptide).