# Rigorous Thermodynamic Decomposition of Salt Effects on the Polymerization of Polyethylene Glycol
Stefan Hervø-Hansen<sup>a,*</sup>, Jan Heyda<sup>b,*</sup>, and Nobuyuki Matubayasi<sup>a,*</sup>.<br><br>
<sup>a</sup> Division of Chemical Engineering, Graduate School of Engineering Science, Osaka University, Toyonaka, Osaka 560-8531, Japan.<br>
<sup>b</sup> Department of Physical Chemistry, University of Chemistry and Technology, Prague CZ-16628, Czech Republic.<br>
<sup>*</sup> To whom correspondence may be addressed stefan@cheng.es.osaka-u.ac.jp, heydaj@vscht.cz, and nobuyuki@cheng.es.osaka-u.ac.jp.

## Part 1: Simulations


### Introduction


### Methods & Materials


### References



## Import of Python Modules

In [None]:
# Notebook dependent libs
import parmed as pmd
import numpy as np
import matplotlib.pyplot as plt
import mdtraj as md
import os, re, time
from shutil import copyfile
import pandas as pd
from scipy import stats
from distutils.spawn import find_executable

# Simulation specific libs
import sys
from simtk.openmm import app
import simtk.openmm as mm
import openmmtools as mmtools
import parmed as pmd

# Check for external programs
if None in [find_executable('packmol'), find_executable('perl')]:
    print('WARNING: External program missing!')
    
homedir = !pwd
homedir = homedir[0]
print(homedir)

## Molecular Dynamics Simulations

### Simulation settings
For the calculation of solvation free energies, we need to simulate the solvated state ($\lambda=1$) and the reference state ($\lambda=0$), with the duration of the simulation determined by together with the output frequency for the saving configurations and other thermodynamic properties for statistical evaluation, all of which is determined by the `states` variable. The molecular dynamics simulations have been set up such that one can run `Nruns` independent molecular dynamics simulations. The system composition of salts and their concentrations are controlled by the variables `salts` and `salt_concentrations` respectively.

In [None]:
# State of simulations, (outFreq is steps per frame)
states = {                                                               
          'sol': {'Nsteps': 50000000,  'OutFreq': 5000}, # 100 nanoseconds, 10000 frames
          'ref': {'Nsteps': 500000,  'OutFreq': 1000}    #   1 nanoseconds,   400 frames
         }                                                          
          
nmers = [2, 4, 6, 8, 15, 36]


salts = {'No_salt': {'Cation': 'No_', 'Anion': 'salt' },
#         'NaCl'   : {'Cation': 'Na' , 'Anion': 'Cl'   },
#         'CsCl'   : {'Cation': 'Cs' , 'Anion': 'Cl'   },
#         'NaSCN'  : {'Cation': 'Cs' , 'Anion': 'SCN'  },
#         'CsSCN'  : {'Cation': 'Cs' , 'Anion': 'SCN'  },
        }

# Approximate concentrations of salts with
concentrations = {0.00: {'PEG': 1, 'Water': 3000, 'Cation': 0,   'Anion': 0},
#                  1.00: {'PEG': 1, 'Water': 3000, 'Cation': 54,  'Anion': 54},
#                  2.00: {'PEG': 1, 'Water': 3000, 'Cation': 109, 'Anion': 109},
                 }

#* Calculated by hand *#
# Cube length ≈ 45 Ångstroms. Round up to 50 and then adjust to correct volume with barostat.

### Construction of topology (.top) and structure (.pdb) files
Fully automated construction of topologies files in gromacs format and initial configurations using packmol. No major important parameters to edit in the following code.

In [None]:
packmol_script="""
tolerance 2.0
filetype pdb
output PEG_{nmer}_{cation}{anion}_sol.pdb

structure {homedir}/PDB_files/PEO-{nmer}-mer.pdb
        number {N_PEG}
        fixed 25. 25. 25. 0. 0. 0.
        centerofmass
end structure

{salt}structure {homedir}/PDB_files/{anion}.pdb
{salt}        number {N_anion}
{salt}        inside cube 0. 0. 0. 50.
{salt}end structure

{salt}structure {homedir}/PDB_files/{cation}.pdb
{salt}        number {N_cation}
{salt}        inside cube 0. 0. 0. 50.
{salt}end structure

structure {homedir}/PDB_files/water.pdb
        number {N_wat}
        inside cube 0. 0. 0. 50.
end structure
"""

topology="""
[ system ]
; Name
Peg in {cation}{anion} {conc} M aqueous solution.

; Include main forcefield parameters
#include "{homedir}/Force_fields/forcefield.itp"

;include custom forcefield parameters
#include "{homedir}/Force_fields/peg-forcefield.itp"
#include "{homedir}/Force_fields/peg{nmer}mer.itp"
#include "{homedir}/Force_fields/spce.itp"
;#include "{homedir}/Force_fields/monovalent.itp"

[ molecules ]
; Compound         #mols
PEG{nmer}               {N_PEG}
{salt}{anion}      {N_anion}
{salt}{cation}     {N_cation}
SOL                {N_wat}
"""

In [None]:
%cd -q $homedir
for nmer in nmers:
    nmerdir = 'PEG{}mer'.format(nmer)
    for saltdir, salt in salts.items():
        for conc, Nparticles in concentrations.items():
            concdir = '{0:.2f}'.format(conc)
            if conc == 0 and saltdir == 'No_salt':
                %cd -q $homedir/Simulations/$nmerdir/$saltdir
            elif conc != 0 and saltdir == 'No_salt':
                continue
            else:
                %cd -q $homedir/Simulations/$nmerdir/$saltdir/$concdir
                
            # Packmol Input
            with open('packmol.in', 'w') as text_file:
                # Fix for no salt
                if conc == 0:
                    saltFix='#'
                else:
                    saltFix=''
                text_file.write(packmol_script.format(N_PEG=Nparticles['PEG'], nmer=nmer, salt=saltFix,
                                                      N_cation=Nparticles['Cation'], N_anion=Nparticles['Anion'],
                                                      cation=salt['Cation'], anion=salt['Anion'],
                                                      N_wat=Nparticles['Water'], homedir=homedir))
            !packmol < packmol.in
            
            # Topology input
            with open('PEG_{nmer}_{salt}_sol.top'.format(nmer=nmer, salt=saltdir), 'w') as text_file:
                # Fix for no salt
                if conc == 0:
                    saltFix=';'
                else:
                    saltFix=''
                text_file.write(topology.format(N_PEG=Nparticles['PEG'], nmer=nmer, salt=saltFix,
                                                N_cation=Nparticles['Cation'], N_anion=Nparticles['Anion'],
                                                cation=salt['Cation'], anion=salt['Anion'], conc=concdir,
                                                N_wat=Nparticles['Water'], homedir=homedir))
            
            # Collect it all for λ=1:
            mol = pmd.load_file('PEG_{nmer}_{salt}_sol.top'.format(nmer=nmer, salt=saltdir),
                                xyz='PEG_{nmer}_{salt}_sol.pdb'.format(nmer=nmer, salt=saltdir))
            mol.save('PEG_{nmer}_{salt}_sol.top'.format(nmer=nmer, salt=saltdir), overwrite=True)
            
            # Generate files for λ=0:
            mol.strip(':PEG')
            mol.save('PEG_{nmer}_{salt}_ref.top'.format(nmer=nmer, salt=saltdir), overwrite=True)
            mol.save('PEG_{nmer}_{salt}_ref.pdb'.format(nmer=nmer, salt=saltdir), overwrite=True)
            print('Wrote initial configurations and topology files to'+os.getcwd())

### Simulation setup using OpenMM
Fully automated construction of simulation input files using python API for OpenMM. In the following one can edit the integration scheme and its parameters set in the variable `integrator` as well as editing the barostat as currently determined from `mm.MonteCarloBarostat`. Additionally one may change the non-bonded methods and their cutoffs in the `system` variable with the option of adding Lennard-Jones switching functions via the `forces` variable. Finally one may edit the number of minimization (`sim.minimizeEnergy`) and equilibration steps conducted as well as choosing whether the simulation should be conducted on GPUs or CPUs via the `platform` and `properties` variables.

In [None]:
%cd -q $homedir
N_simulations = 0
for nmer in nmers:
    nmerdir = 'PEG{}mer'.format(nmer)
    for saltdir, salt in salts.items():
        for conc in concentrations:
            concdir = '{0:.2f}'.format(conc)
            if conc == 0:
                %cd -q $homedir/Simulations/$nmerdir/$saltdir
            else:
                %cd -q $homedir/Simulations/$nmerdir/$saltdir/$concdir
            for state, settings in states.items():
                openmm_script="""

# Imports
import sys
import os
from simtk.openmm import app
import simtk.openmm as mm
import openmmtools as mmtools
from parmed import load_file, unit as u
from mdtraj.reporters import XTCReporter

print('Loading initial configuration and toplogy')
init_conf = load_file('PEG_{nmer}_{salt}_{state}.top', xyz='PEG_{nmer}_{salt}_{state}.pdb')

# Creating system
print('Creating OpenMM System')
system = init_conf.createSystem(nonbondedMethod=app.PME, ewaldErrorTolerance=0.00001,
                                nonbondedCutoff=1.2*u.nanometers, constraints=app.HBonds)

# Calculating total mass of system
total_mass = 0
for i in range(system.getNumParticles()):
    total_mass += system.getParticleMass(i).value_in_unit(u.dalton)
total_mass *= u.dalton

# Temperature-coupling by geodesic Langevin integrator (NVT)
integrator = mmtools.integrators.GeodesicBAOABIntegrator(K_r = 3,
                                                         temperature = 298.15*u.kelvin,
                                                         collision_rate = 1.0/u.picoseconds,
                                                         timestep = 2.0*u.femtoseconds)

# Pressure-coupling by a Monte Carlo Barostat (NPT)
system.addForce(mm.MonteCarloBarostat(1*u.bar, 298.15*u.kelvin, 25))

# Add LJ switching functions
forces = {{system.getForce(index).__class__.__name__: 
          system.getForce(index) for index in range(system.getNumForces())}}
forces['CustomNonbondedForce'].setUseSwitchingFunction(True)
forces['CustomNonbondedForce'].setSwitchingDistance(1*u.nanometer)

platform = mm.Platform.getPlatformByName('CUDA')
properties = {{'CudaPrecision': 'mixed', 'CudaDeviceIndex': '0'}}

# Create the Simulation object
sim = app.Simulation(init_conf.topology, system, integrator, platform, properties)

# Set the particle positions
sim.context.setPositions(init_conf.positions)

# Minimize the energy
print('Minimizing energy')
sim.minimizeEnergy(tolerance=1*u.kilojoule/u.mole, maxIterations=50000)
    
# Draw initial MB velocities
sim.context.setVelocitiesToTemperature(298.15*u.kelvin)

# Equlibrate simulation
print('Equilibrating...')
sim.step(50000)  # 50000*2 fs = 0.1 ns

# Set up the reporters
sim.reporters.append(app.StateDataReporter('output_{state}.dat', {outFreq}, totalSteps={Nsteps}+50000,
    time=True, potentialEnergy=True, kineticEnergy=True, temperature=True, volume=True, density=True,
    systemMass=total_mass, remainingTime=True, speed=True, separator='\t'))

# Set up trajectory reporter
sim.reporters.append(XTCReporter('trajectory_{state}.xtc', reportInterval={outFreq}, append=False))

# Run dynamics
print('Running dynamics! (NPT)')
sim.step({Nsteps})

# Print PME information
print('''
PARTICLE MESH EWALD PARAMETERS (Production run)
Separation parameter: {{}}
Number of grid points along the X axis: {{}}
Number of grid points along the Y axis: {{}}
Number of grid points along the Z axis: {{}}
'''.format(*forces['NonbondedForce'].getPMEParametersInContext(sim.context)))
"""
                with open('openMM_{state}.py'.format(state=state), 'w') as text_file:
                    text_file.write(openmm_script.format(state=state, Nsteps=settings['Nsteps'], nmer=nmer,
                                                         outFreq=settings['OutFreq'], salt=saltdir))
                N_simulations+=1
                print('Wrote run_openMM.py files to '+os.getcwd())


print('Simulations about to be submitted: {}'.format(N_simulations))

### Submit script
Submit script for servers employing job scheduling. The below example is utilizing PBS (for a quick guide see [here](https://latisresearch.umn.edu/creating-a-PBS-script)). However the code below may be edited to utilize Slurm instead (documentation [here](https://slurm.schedmd.com)) by changing the variable `submit_script` and by executing the commands `!sbatch submit_sol.pbs` and `!sbatch submit_ref.pbs` instead of `qsub`.

In [None]:
for nmer in nmers:
    nmerdir = 'PEG{}mer'.format(nmer)
    for saltdir in salts:
        for conc in concentrations:
            concdir = '{0:.2f}'.format(conc)
            if conc == 0:
                %cd -q $homedir/Simulations/$nmerdir/$saltdir
            else:
                %cd -q $homedir/Simulations/$nmerdir/$saltdir/$concdir
            for state in states:
                submit_script="""#!/bin/bash
#PBS -l nodes=1:ppn=18:gpu:gpus=1     # 1 node, 18 cores, GPU node, 1 gpu.
#PBS -N PEG-{nmer}_{conc}_M_{salt}_{state}    # Name of job
#PBS -e run_{state}.err               # error output
#PBS -o run_{state}.out               # output file name

source ~/.bashrc
source ~/.bash_profile
cd {path}

python openMM_{state}.py"""

                with open('submit_{state}.pbs'.format(state=state), 'w') as text_file:
                    text_file.write(submit_script.format(conc=conc, state=state, nmer=nmer,
                                                     path=os.getcwd(), salt=saltdir))
            !qsub submit_sol.pbs
            time.sleep(1) # Safety in submission of jobs: can cause problems if too fast
            !qsub submit_ref.pbs
            time.sleep(1) # Safety in submission of jobs: can cause problems if too fast