# OpenForceField for DBU (1,8-Diazabicyclo[5.4.0]undec7-ene) in DMF (n,n-dimethylformamide)

## Parameterizing DBU with the openFF forcefield

### Collect sdf file from pubchem

In [17]:
#Create force field parameter files for small molecule
import requests
from openforcefield.utils import *
from openforcefield.topology import *
from openforcefield.utils.toolkits import OpenEyeToolkitWrapper
from openforcefield.typing.engines.smirnoff import ForceField


#Pubchem id for DBU is 81184
cid = '81184'
url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/CID/{cid}/record/SDF/?record_type=3d&response_type=save"
with open(f"CID_{cid}.sdf", "w") as f:
    f.write(requests.get(url).text)

#Create the molecule object using the sdf_filepath
molecule = Molecule(f"CID_{cid}.sdf")
off_forcefield = ForceField('openff-1.0.0.offxml')

### Create pdb file from the sdf file

In [11]:
from openbabel import openbabel


# Convert the SDF to a PDB file with openbabel
obConversion = openbabel.OBConversion()
obConversion.SetInAndOutFormats("sdf", "pdb")
mol = openbabel.OBMol()
#mol.SetTitle('RES1')
# Open Babel will uncompress automatically
obConversion.ReadFile(mol, f"CID_{cid}.sdf")
obConversion.WriteFile(mol, f"CID_{cid}.pdb")




True

## Setting up a Solvent box of DMF

In [12]:
#Pubchem id for DMF is 6228

cid2 = '6228'
url2 = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/CID/{cid2}/record/SDF/?record_type=3d&response_type=save"
with open(f"CID_{cid2}.sdf", "w") as f2:
    f2.write(requests.get(url2).text)

obConversion.ReadFile(mol, f"CID_{cid2}.sdf")
obConversion.WriteFile(mol, f"CID_{cid2}.pdb")



True

### Using the SolvationToolkit package to solvate the system (https://github.com/MobleyLab/SolvationToolkit); https://github.com/openforcefield/open-forcefield-tools

In [None]:
'''from solvationtoolkit.solvated_mixtures import *
#specify the path to save the data and setup system
sb_name = 'dbu_dmf'
solvent_box = MixtureSystem(f"./{sb_name}")

solvent_box.addComponent(label='dmf',smiles='CN(C)C=O',number=100)
solvent_box.addComponent(label='dbu',smiles='C1CCC2=NCCCN2CC1',number='1')
mixture.build()
'''

In [14]:
from openmoltools import packmol
import mdtraj as md
''' I have manually adjusted the packmol.py file (in ~/anaconda3/envs/openmm/lib/python3.7/site-packages/openmoltools) as the bonds were defined as an array of 1 x 4 
but in this case the bond type and order should be in the 3rd and fourth column, this is however not the case.
there for this is put to zero at the end which corresponds to a bond order and type of None, though antechambers
will put this correct normally based on the bonds. Alternatively n_atoms is added after each molecule 
to bonds_i [0] and [1] instead to avoid a potential loss of information.

    #cast atom type and order to 0
    if len(bonds[0])==4:
        for idx,frame in enumerate(bonds):
          # for idx2,bond in enumerate(frame):
               bonds[idx][2] = 0
               bonds[idx][3] = 0


TODO: Report to openmoltools
'''
#setting for solvent box
sb_name = 'dbu_dmf'
n_molecules = [1,50]
pdb_filenames = [f"CID_{cid}.pdb",f"CID_{cid2}.pdb"]
#

#small check for the constructed topology files
top1 = md.load_pdb(f'CID_{cid2}.pdb').topology
top2 = md.load_pdb(f'CID_{cid}.pdb').topology
print(f'topology 1:{top1}, topology2:{top2}')

# create box with corresponding md_traj trajectory and topology
md_traj_trajectory = packmol.pack_box(pdb_filenames,n_molecules,box_size=20.0)
print(md_traj_trajectory.topology)
md_traj_trajectory.save_pdb(f'{sb_name}_openmoltools.pdb')


topology 1:<mdtraj.Topology with 1 chains, 1 residues, 12 atoms, 11 bonds>, topology2:<mdtraj.Topology with 1 chains, 1 residues, 27 atoms, 28 bonds>

# Mixture 

tolerance 2.000000
filetype pdb
output /tmp/tmpbxpqjz2v/tmpvgrb3kza.pdb
add_amber_ter


structure CID_81184.pdb
  number 1 
  inside box 0. 0. 0. 18.000000 18.000000 18.000000
end structure

structure CID_6228.pdb
  number 50 
  inside box 0. 0. 0. 18.000000 18.000000 18.000000
end structure

[array([[ 0.,  2.,  0.,  0.],
       [ 0.,  4.,  0.,  0.],
       [ 0.,  6.,  0.,  0.],
       [ 1.,  4.,  0.,  0.],
       [ 1., 10.,  0.,  0.],
       [ 2.,  3.,  0.,  0.],
       [ 2., 11.,  0.,  0.],
       [ 2., 12.,  0.,  0.],
       [ 3.,  5.,  0.,  0.],
       [ 3., 13.,  0.,  0.],
       [ 3., 14.,  0.,  0.],
       [ 4.,  7.,  0.,  0.],
       [ 5.,  8.,  0.,  0.],
       [ 5., 15.,  0.,  0.],
       [ 5., 16.,  0.,  0.],
       [ 6.,  9.,  0.,  0.],
       [ 6., 17.,  0.,  0.],
       [ 6., 18.,  0.,  0.],
       [ 7.,  8.,  0

### Using Packmol via "subprocess and os" (hardcoding)

In [25]:
import tempfile
import os
import subprocess
import numpy as np

def add_structure(file,structure,number,dimension):
    file.write(f'structure {structure}\n')
    file.write(f'\t number {number}\n')
    if number ==1:#assuming this is the solute
        #file.write('\t center')
        #file.write('\t fixed 0.0 0.0 0.0 0.0 0.0 0.0 \n')
        #make sure it is in the middle of the box not on the outside
        file.write(f'\t inside cube {dimension/3} {dimension/3} {dimension/3} {2*dimension/3}\n')
    else:
        file.write(f'\t inside cube 0.0 0.0 0.0 {dimension}\n')
    #other options can/should be added here
    file.write('end structure \n')
    return

#packmol settings
sb_name = 'dbu_dmf'
packmol_format = 'pdb'
cube_dimensions = 30.0  #write function based on rdf and rho?
tolerance = 3.0
output_structure_file = f"{sb_name}.{packmol_format}"
output_filename = 'packmol.out'
packmol_info = dict()
#packmol_info[f"CID_{cid}_uniqRes.pdb"] = 1
#packmol_info[f"CID_{cid2}_uniqRes.pdb"] = 1
packmol_info[f"CID_{cid}_ac.pdb"] = 1
packmol_info[f"CID_{cid2}_ac.pdb"] = 2
print(packmol_info)

with open('packmol.inp','w+') as packmol_inputfile:
    packmol_inputfile.write(f'filetype {packmol_format}\n')
    packmol_inputfile.write(f'tolerance {tolerance}\n')
    packmol_inputfile.write(f'output {output_structure_file}\n\n')
    for key,value in packmol_info.items():
        add_structure(packmol_inputfile,key,value,cube_dimensions)
        packmol_inputfile.write('\n')
    packmol_inputfile.flush()
    packmol_inputfile.seek(0) #put current position at the offset, absolute file positioning is put to 0
    with open(output_filename,'w') as outfile:
        #Run packmol This should be installed on the computer on which the script is run
        #the first argument should be the name of the packmol executable,stdin, should be a file object
        code = subprocess.call("packmol",stdin=packmol_inputfile,stdout=outfile)
        


{'CID_81184_ac.pdb': 1, 'CID_6228_ac.pdb': 2}


Visualize everything to check the structures

In [15]:
import nglview
import ipywidgets

nglview.show_file(f"{sb_name}_openmoltools.pdb")

NGLWidget()

In [21]:
from simtk.openmm.app import *
from simtk.openmm import *
from simtk.unit import *
from openforcefield.tests.utils import get_data_file_path, get_packmol_pdb_file_path
import mdtraj as md

#Define all monomers (sdf,smiles or mol format) used in the solvent box, 
#(if a licence for openEye is obtained this can also be other formats are applicable)
monomer_names = (f'CID_{cid2}.sdf',f'CID_{cid}.sdf') 

from openforcefield.topology import Molecule, Topology


pdb_2 = PDBFile(f"{sb_name}_openmoltools.pdb")
unique_molecules = [Molecule.from_file(name) for name in monomer_names]


#omm_forcefield = app.ForceField('openff-1.0.0.offxml')
omm_topology = pdb_2.getTopology()
box_positions = pdb_2.getPositions()
off_box_topology = Topology.from_openmm(pdb_2.topology, unique_molecules=unique_molecules)
off_box_system = off_forcefield.create_openmm_system(off_box_topology)





### 

## Performing an energy calculations

In [22]:
import numpy as np

integrator = VerletIntegrator(1.0 * femtoseconds)
context = Context(off_box_system, integrator)
context.setPositions(box_positions)
state = context.getState(getEnergy=True)
energy = state.getPotentialEnergy().in_units_of(kilojoules_per_mole)


print(f"Original energy: {energy}")

Original energy: 7032.411703329492 kJ/mol


## Performing a molecular dynamics simulations for the new compound

In [23]:
from sys import stdout
import time

#thermostat
integrator = LangevinIntegrator(600*kelvin, 1/picosecond, 2*femtoseconds)
#for isobaric simulation
off_box_system.addForce(MonteCarloBarostat(1*bar, 600*kelvin))  

#setting up the simulation
simulation = Simulation(pdb_2.topology, off_box_system, integrator)
simulation.context.setPositions(box_positions)
simulation.minimizeEnergy()


with open("init.pdb", "w") as f:
    PDBFile.writeFile(simulation.topology, pdb_2.positions, f)
simulation.reporters = []
simulation.reporters.append(DCDReporter('traj.dcd', 10))
simulation.reporters.append(StateDataReporter(stdout, 100, step=True,
                                              temperature=True, elapsedTime=True))
simulation.reporters.append(StateDataReporter("scalars.csv", 100, time=True,
                                              potentialEnergy=True, totalEnergy=True, temperature=True))

print("Starting simulation")
start = time.process_time()

simulation.step(10000)

end = time.process_time()
print("Elapsed time %.2f seconds" % (end-start))
print("Done!")


Starting simulation
#"Step","Temperature (K)","Elapsed Time (s)"
100,110.46843955724496,0.0004265308380126953
200,208.31290280901484,0.22046637535095215
300,299.3010036571291,0.427501916885376
400,363.306994552848,0.6685197353363037
500,386.2956910550528,0.8867383003234863
600,434.96334949762627,1.0937793254852295
700,445.1117451350331,1.3039963245391846
800,470.98682389521815,1.5528504848480225
900,479.0039221838321,1.7993004322052002
1000,495.8970297828185,2.013545036315918
1100,512.5588832896391,2.3064351081848145
1200,489.10033095475603,2.543405055999756
1300,513.9534177975352,3.0567173957824707
1400,558.1624718685638,3.2776682376861572
1500,579.0892243654,3.480391502380371
1600,533.6972388222322,3.694087266921997
1700,581.6636947891159,3.896881580352783
1800,578.0175354746037,4.103826522827148
1900,590.8242143209113,4.311960220336914
2000,600.2401959061195,4.525664329528809
2100,590.3252580135463,4.734823942184448
2200,594.0928149048498,4.941352844238281
2300,596.1526709904447,5.1

## Visualize the output

In [24]:

traj = md.load('traj.dcd', top='init.pdb')
traj.superpose(traj, 0)
nglview.show_mdtraj(traj)

NGLWidget(count=1000)