In [1]:
# Import general modules
import pandas as pd

# Import chemistry-related modules
import oenotebook as oenb
from openeye.oechem import *
from openmoltools.openeye import *

In [2]:
data = pd.read_pickle("correct_systems_and_thermoML_activities.pickle")

**Strategy:**

1. Each oeb file will have all molecules in a certain solvent at a certain temperature and pressure.
2. Create list of solvents.
3. Create sub-dataframes per solvent.
4. For each solvent, check temperatures and create sub-dataframes for each.
5. For each final sub-dataframe, create one oeb file.

**Dr Mobley's comment:** Initially try to make a single oeb file with all the compounds. Dividing too much defeats the purpose of using Orion.

** How to calculate infinite dilution activity coefficients: **

$k_BT \ln{\gamma_{solute}^{\infty}} = \mu_{solute}^{excess} - \mu_{solute\;molecule}^{excess;\;bulk\,solute} \Leftrightarrow $

$k_BT \ln{\gamma_{solute}^{\infty}} = \Delta G_{solute}^{solvation} - \Delta G_{solute\;molecule}^{solvation,\;bulk\,solute}$

**Important:**

Create mol objects with the atoms of the molecule in question -- forcefield details will be dealt with later -- attach SD tags below to them and ofstream them to an .oeb file. Instructions can be found at:

`https://docs.eyesopen.com/toolkits/python/oechemtk/molreadwrite.html`

`https://docs.eyesopen.com/toolkits/python/oechemtk/moltaggeddata.html`

**Tags:**

``` tags = ["solvents","molar_fractions","density","temperature","pressure"]```

SD tags already added to oemol objects are erased in `oenb.write_dataframe_to_file(args*)`. You should create a new dataframe in which the oemol objects are in the first column and the SD tags in the remaining ones.

In [3]:
solutes = pd.DataFrame()
solutes["Molecule"] = [smiles_to_oemol(x) for x in data.Solute_SMILES]
solutes["solvents"] = [ x for x in data.Solvent_SMILES ]
solutes["molar_fractions"] = [ '1.0' for x in data.Solvent_SMILES ]
solutes["density"] = [ '1.0' for x in data.Solvent_SMILES ]
solutes["temperature"] = [ x for x in data["Temperature, K"]]
solutes["pressure"] = [ '1.0' for x in data.Solvent_SMILES ]

In [4]:
# Fix water molecule SMILES to the OpenEye format 
solvs = []
for elem in solutes.solvents:
    if elem =='O':
        solvs.append('[H]O[H]')
    else:
        solvs.append(elem)
solutes["solvents"] = solvs

# Generate structures
mols = []
for i in range(len(solutes)):
    mol = solutes.Molecule[i]
    OETriposAtomNames(mol)
    mol = normalize_molecule(mol)
    mol = generate_conformers(mol,max_confs=800,strictStereo=False)
    for j, conf in enumerate(mol.GetConfs()):
        if j > 0:
            mol.DeleteConf(conf)
    mols.append(mol)
solutes["Molecule"] = mols

In [5]:
# Preparing input files for the second simulation
self_solv = pd.DataFrame()
self_solv["Molecule"] = [smiles_to_oemol(x) for x in data.Solute_SMILES]
self_solv["solvents"] = [ x for x in data.Solute_SMILES ]
self_solv["molar_fractions"] = [ '1.0' for x in data.Solute_SMILES ]
self_solv["density"] = [ '1.0' for x in data.Solute_SMILES ]
self_solv["temperature"] = [ x for x in data["Temperature, K"]]
self_solv["pressure"] = [ '1.0' for x in data.Solute_SMILES ]

In [6]:
# Generate structures for self_solv
mols = []
for i in range(len(self_solv)):
    mol = self_solv.Molecule[i]
    OETriposAtomNames(mol)
    mol = normalize_molecule(mol)
    mol = generate_conformers(mol,max_confs=800,strictStereo=False)
    for j, conf in enumerate(mol.GetConfs()):
        if j > 0:
            mol.DeleteConf(conf)
    mols.append(mol)
self_solv["Molecule"] = mols

In [7]:
oenb.write_dataframe_to_file(solutes, "solute_solvation_input.oeb")
pd.to_pickle(solutes,"solute_solvation_input.pickle")
oenb.write_dataframe_to_file(self_solv, "solute_self_solvation_input.oeb")
pd.to_pickle(self_solv,"solute_self_solvation_input.pickle")

In [7]:
self_solv.head()

Unnamed: 0,Molecule,solvents,molar_fractions,density,temperature,pressure
0,<oechem.OEMol; proxy of <Swig Object of type '...,Cc1ccccc1,1.0,1.0,288.15,1.0
1,<oechem.OEMol; proxy of <Swig Object of type '...,CCOC(C)(C)C,1.0,1.0,288.15,1.0
2,<oechem.OEMol; proxy of <Swig Object of type '...,COC(C)(C)C,1.0,1.0,288.15,1.0
3,<oechem.OEMol; proxy of <Swig Object of type '...,CCOC(C)(C)CC,1.0,1.0,288.15,1.0
4,<oechem.OEMol; proxy of <Swig Object of type '...,CCC(C)(C)OC,1.0,1.0,288.15,1.0


In [11]:
solutes.iloc[[4,50,110,200]]

Unnamed: 0,Molecule,solvents,molar_fractions,density,temperature,pressure
4,<oechem.OEMol; proxy of <Swig Object of type '...,[H]O[H],1.0,1.0,288.15,1.0
50,<oechem.OEMol; proxy of <Swig Object of type '...,c1ccncc1,1.0,1.0,298.15,1.0
110,<oechem.OEMol; proxy of <Swig Object of type '...,CCCCCCCCCCCCCC(O)=O,1.0,1.0,314.1,1.0
200,<oechem.OEMol; proxy of <Swig Object of type '...,Cc1cccc(C)c1,1.0,1.0,270.1,1.0


In [12]:
test = solutes.iloc[[4,50,110,200]]