# Automated setup of mixtures

We've been working on streamlining setup of simulations of arbitrary mixtures in AMBER/GROMACS/OpenMM and others for some of our own research. I thought I'd demo this really quick so you can get a feel for it and see if you're interested in contributing.

*Before running the below*, you will need to conda install several packages:
* `conda install -c omnia openmoltools`
* `conda install -c bioconda nglview` 
* `conda install -c omnia parmed`

AND, you need to visit https://github.com/mobleylab/solvationtoolkit, click the "clone or download" button at the right, and download the zip file. Extract it, and then install from the command-line by typing `python setup.py install` in the directory for solvationtoolkit after it was extracted.

It is possible you will also need to issue the command `jupyter-nbextension enable nglview --py --sys-prefix`

In [1]:
from solvationtoolkit.solvated_mixtures import *

#In this particular instance I'll just look at six solutes/solvent mixtures (not an all-by-all combination) which are pre-specified
#solute names
solutes = ['phenol', 'toluene', 'benzene', 'methane', 'ethanol', 'naphthalene']
#Solvent names
solvents = ['cyclohexane', 'cyclohexane', 'cyclohexane', 'octanol', 'octanol', 'octanol']

#Number of solute/solvent molecules
Nsolu = 3
Nsolv = 100

#Construct systems
for idx in range( len( solutes) ):
    # Define new mixture
    mixture = MixtureSystem()
    # Add solute and solvent
    mixture.addComponent(name=solutes[idx], number=Nsolu)
    mixture.addComponent(name=solvents[idx], number=Nsolv)
    # Note you can optionally specify mole fraction instead, or a mix of numbers/mole fractions, etc.
    
    # Build system, including AMBER input files (but not GROMACS)
    mixture.build(amber=True, gromacs=True)

  yield pat.split(line.strip())
  yield pat.split(line.strip())



# Mixture 

tolerance 2.000000
filetype pdb
output /var/folders/k0/v23_69_j415062nr1f1y8sr00000gn/T/tmphnwaac3n.pdb
add_amber_ter


structure /var/folders/k0/v23_69_j415062nr1f1y8sr00000gn/T/tmpc5fe2siz.pdb
  number 3 
  inside box 0. 0. 0. 26.788816 26.788816 26.788816
end structure

structure /var/folders/k0/v23_69_j415062nr1f1y8sr00000gn/T/tmp4fpggss3.pdb
  number 100 
  inside box 0. 0. 0. 26.788816 26.788816 26.788816
end structure


# Mixture 

tolerance 2.000000
filetype pdb
output /var/folders/k0/v23_69_j415062nr1f1y8sr00000gn/T/tmphnwaac3n.pdb
add_amber_ter


structure /var/folders/k0/v23_69_j415062nr1f1y8sr00000gn/T/tmpc5fe2siz.pdb
  number 3 
  inside box 0. 0. 0. 26.788816 26.788816 26.788816
end structure

structure /var/folders/k0/v23_69_j415062nr1f1y8sr00000gn/T/tmp4fpggss3.pdb
  number 100 
  inside box 0. 0. 0. 26.788816 26.788816 26.788816
end structure


source leaprc.gaff
source oldff/leaprc.ff99SB
ZTX = loadmol2 in0.mol2
ZCF = loadmol2 in1.mol2
box = loadPdb tbox.

OSError: data/gromacs/phenol_cyclohexane_3_100.top exists; not overwriting

## Let's try and see if we can do a quick visualization of one of the systems via mdtraj just to make sure it looks right

In [2]:
#Import MDTraj
import mdtraj as md
#Load "trajectory" (structures)
#You can load from either format (SolvationToolkit generates both)
#traj = md.load( 'data/amber/phenol_cyclohexane_3_100.inpcrd', top = 'data/amber/phenol_cyclohexane_3_100.prmtop' )
traj = md.load( 'data/gromacs/phenol_cyclohexane_3_100.gro')

#Input viewer
import nglview

#Set up view of structure
view = nglview.show_mdtraj(traj)

#Try some of the following to modify representations
view.clear_representations()
view.add_licorice('all')
view.add_licorice('1-3', color = "blue") #For selection info, see http://arose.github.io/ngl/doc/#User_manual/Usage/Selection_language
view.add_surface('1', opacity=0.3)
view.add_surface('2, 3', color = 'red', opacity=0.3)

#Show the view. Note that this needs to be the last command used to manipulate the view, i.e. if you modify the
#representation after this, your view will be empty.
view

#VIEWER USAGE:
# - Use your typical zoom command/gesture (i.e. pinch) to zoom in and out 
# - Click and drag to reorient
# - Click on specific atoms/residues to find out details of what they are (and how they could be selected)

## Other possibly interesting things to try:
* Find the average distance from phenol to phenol
* Calculate the density or volume of the system
* etc.

(Drawing on MDTraj - see docs online)

In [None]:
# Use this box to try additional things

# Let's use a SMIRNOFF forcefield to parameterize the system, minimize, and run dynamics

For this you'll have to `conda install -c omnia openforcefield`

First we handle imports

In [3]:
# Import the SMIRNOFF forcefield engine and some useful tools
from openforcefield.typing.engines import smirnoff
from openforcefield.typing.engines.smirnoff import ForceField
from openforcefield.utils import get_data_filename, extractPositionsFromOEMol, generateTopologyFromOEMol

# At this point SMIRNOFF requires oechem, though an RDKit version is in the works
from openeye import oechem

# We use PDBFile to get OpenMM topologies from PDB files
from simtk.openmm.app import PDBFile

# We'll use OpenMM for simulations/minimization
from simtk import openmm, unit
from simtk.openmm import app
# MDTraj for working with trajectories; time for timing
import time
import mdtraj

## Now we handle assignment of force field parameters and generation of an OpenMM System

In [4]:
# Specify names of molecules that are components of the system
mol_filenames = ['phenol', 'cyclohexane']

# Load OEMols of components of system - SMIRNOFF requires OEMols of the components
# and an OpenMM topology as input
oemols = []
flavor = oechem.OEIFlavor_Generic_Default | oechem.OEIFlavor_MOL2_Default | oechem.OEIFlavor_MOL2_Forcefield
 #input flavor to use for reading mol2 files (so that it can understand GAFF atom names)
# Loop over molecule files and load oemols
for name in mol_filenames:
    mol = oechem.OEGraphMol()
    filename = 'data/monomers/'+name+'.mol2'
    ifs = oechem.oemolistream(filename)
    ifs.SetFlavor( oechem.OEFormat_MOL2, flavor)
    oechem.OEReadMolecule(ifs, mol )
    oechem.OETriposAtomNames(mol) #Right now we have GAFF atom names, which OE doesn't like; reassign
    oemols.append(mol)
    ifs.close()
    
# Load SMIRNOFF99Frosst force field (AMBER-family force field created by Christopher Bayly)
forcefield = ForceField(get_data_filename('forcefield/smirnoff99Frosst.ffxml'))

# Get OpenMM topology for mixture of phenol and cyclohexane from where SolvationToolkit created
# it on disk
pdbfile = PDBFile('data/packmol_boxes/phenol_cyclohexane_3_100.pdb')

# Assign SMIRNOFF parameters and create system; here we'll use PME with a 1.1 nm LJ cutoff.
system = forcefield.createSystem( pdbfile.topology, oemols, nonbondedMethod = smirnoff.PME, nonbondedCutoff=1.1*unit.nanometer )


## Finally we energy minimize and run dynamics

In [5]:
# Set how many steps we'll run and other run parameters
num_steps=10000
trj_freq = 100 #Trajectory output frequency
data_freq = 100 #Energy/data output frequency
temperature = 300*unit.kelvin #Temperature
time_step = 2.*unit.femtoseconds
friction = 1./unit.picosecond #Langevin friction constant

# Bookkeeping -- if you run this more than once and perhaps encountered an exception, we need to make sure the reporter is closed
try: 
    reporter.close()
except: pass
    
# Set up integrator, platform for running simulation    
integrator = openmm.LangevinIntegrator(temperature, friction, time_step)
platform = openmm.Platform.getPlatformByName('Reference')
simulation = app.Simulation(pdbfile.topology, system, integrator)
# Set positions, velocities
simulation.context.setPositions(pdbfile.positions)
simulation.context.setVelocitiesToTemperature(temperature)

# Before doing dynamics, energy minimize (initial geometry will be strained)
simulation.minimizeEnergy()

# Set up reporter for output
reporter = mdtraj.reporters.HDF5Reporter('mixture.h5', trj_freq)
simulation.reporters=[]
simulation.reporters.append(reporter)
simulation.reporters.append(app.StateDataReporter('data.csv', data_freq, step=True, potentialEnergy=True, temperature=True, density=True))

# Run the dynamics
print("Starting simulation")
start = time.clock()
simulation.step(num_steps)
end = time.clock()
print("Elapsed time %.2f seconds" % (end-start))
#netcdf_reporter.close()
reporter.close()
print("Done!")


Starting simulation
Elapsed time 15.69 seconds
Done!


## Let's make a movie of our simulation

In [6]:
import nglview
traj=mdtraj.load('mixture.h5')
view = nglview.show_mdtraj(traj)

#Try some of the following to modify representations
view.clear_representations()
view.add_licorice('all')
view.add_licorice('1-3', color = "blue") #For selection info, see http://arose.github.io/ngl/doc/#User_manual/Usage/Selection_language
view.add_surface('1', opacity=0.3)
view.add_surface('2, 3', color = 'red', opacity=0.3)

view #Note that if you view a movie and keep it playing, your notebook will run a hair slow...