**Important imports**

In [28]:
import os
import openmoltools
from openeye import oechem, oeomega, oeiupac
from oeommtools.utils import openmmTop_to_oemol
import time
from chemper.smirksify import SMIRKSifier
import random

## Follow Open Force Field examples

I will look to openforcefield toolkits as examples of how to parameterize a protein and then extract parameters by fragment.

### Parameterize an example protein 

In the `openforcefield` toolkit there is an example, [`mixedFF_structure`](insert url). I will follow that to see if I can type my tiny protein this way or the T4-protein also provided in the `openforcefield` data file. 

OPEN FORCE FIELD NEEDS TO TEST THEIR EXAMPLES!!!!!

1. app is imported after first use
    - error: `NameError: name 'app' is not defined`
    - fix: move line `from simtk.openmm import app` to top of file
2. proteinpdb variable isn't defined, but used in the protein system definition
    - error: `NameError: name 'proteinpdb' is not defined`
    - fix: rename protein_pdbfile to proteinpdb
3. parmed is not defined
    - error: `NameError: name 'parmed' is not defined`
    - fix: import parmed

In [5]:
import simtk
from simtk import unit, openmm
from simtk.openmm import app
import parmed

#protein_pdb_filename = get_data_filename('proteins/T4-protein.pdb')
protein_pdb_filename = 'mol_files/Val_Thr.pdb'
protein_pdb_filename = "mol_files/T4protein.pdb"
protein_pdbfile = app.PDBFile(protein_pdb_filename)
proteinpdb = app.PDBFile(protein_pdb_filename)

# Load the AMBER protein force field, along with a solvent force field
protein_forcefield = 'amber99sbildn.xml'
solvent_forcefield = 'tip3p.xml'
forcefield = app.ForceField(protein_forcefield, solvent_forcefield)

# Parameterize the protein
protein_system = forcefield.createSystem(proteinpdb.topology)

# Create a ParmEd Structure for the protein
protein_structure = parmed.openmm.load_topology(proteinpdb.topology,
                                                protein_system,
                                                xyz=proteinpdb.positions)
print('Protein:', protein_structure)

Protein: <Structure 2634 atoms; 164 residues; 2654 bonds; parametrized>


### Find parameters/energies in openMM sytem 

In [6]:
# bond dict stores bond atoms based on their parameters
# specifically it will have the format {(parameters): (atom_indices)}
bond_dict = dict()
angle_dict = dict()
torsion_dict = dict()
improper_dict = dict()
lj_dict = dict()

for force in protein_system.getForces():
    if isinstance(force, openmm.PeriodicTorsionForce):
        temp_dict = dict()
        for tidx in range(force.getNumTorsions()):
            #print(tf.getTorsionParameters(tidx))
            a1, a2, a3, a4, period, phase, k = force.getTorsionParameters(tidx)
            atoms = (a1, a2, a3, a4)
            params = (period, phase, k)
            if atoms not in temp_dict:
                temp_dict[atoms] = list()
            temp_dict[atoms].append(params)

        # reverse dictionary
        for atoms, param_list in temp_dict.items():
            new_params = tuple([p for p in param_list])
            str_params = str(new_params)
            if str_params not in torsion_dict:
                torsion_dict[str_params] = {
                    'parameters': new_params,
                    'atoms':list()
                }
            torsion_dict[str_params]['atoms'].append(atoms)
        
    if isinstance(force, openmm.HarmonicBondForce):
        for bidx in range(force.getNumBonds()):
            a1, a2, length, k = force.getBondParameters(bidx)
            str_params = str(length) + '; ' + str(k)
            if str_params not in bond_dict:
                bond_dict[str_params] = {
                    'parameters': (length, k),
                    'atoms': list()
                }
            bond_dict[str_params]['atoms'].append((a1, a2))
    
    if isinstance(force, openmm.HarmonicAngleForce):
        angle_dict = dict()
        for aidx in range(force.getNumAngles()):
            a1, a2, a3, angle, k = force.getAngleParameters(aidx)
            str_param = str(angle) + '; ' + str(k)
            if str_param not in angle_dict:
                angle_dict[str_param] = {
                    'parameters': (angle, k),
                    'atoms': list()
                }
            angle_dict[str_param]['atoms'].append((a1, a2, a3))
    
    if isinstance(force, openmm.NonbondedForce):
        lj_force = force
        print("found LJ")

found LJ


# Make OEMol with oeommtools

In [10]:
mol = openmmTop_to_oemol(proteinpdb.topology, proteinpdb.positions)

# Use ChemPer to make Bonds?

In [14]:
bond_clusters = list()
for label, data_dict in bond_dict.items():
    bond_clusters.append( (label, [data_dict['atoms']]) )

In [45]:
init = time.time()
bond_clusters = sorted(bond_clusters, key=lambda x: sum([len(i) for i in x[1]]), reverse=True)
sm = SMIRKSifier([mol], bond_clusters, max_layers=10, strict_smirks=False)
end = time.time()
print("took %.3f minutes" % ((end-init)/60.))


 Label                | SMIRKS 
 zz_0.109 nm; 284512.0 kJ/(nm**2 mol) | [#6H1,#6H2,#6H3;!r;+0;X4;x0;A:1](-;!@[#16H0X2,#16H1X2,#6H0X3,#6H1X4,#6H2X4,#6H3X4,#8H1X2;!r;+0;x0;A]-;!@[#1H0X1,#6H0X3,#6H1X3,#6H1X4,#6H2X4,#6H3X4,#7H1X3,#7H2X3,#7H3X4,#8H0X1;!r;+0;x0;A])(-;!@[#1H0X1,#6H1X4,#6H2X4,#6H3X4;!r;+0;x0;A])(-;!@[#1H0X1,#6H3X4,#7H0X3,#7H1X3,#7H3X4;!r;+0;x0;A])-;!@[#1H0X1x0!r+0A:2] 
--------------------------------------------------------------------------------
 zz_0.1526 nm; 259408.0 kJ/(nm**2 mol) | [#6H1,#6H2,#6H3;!r;+0;X4;x0;A:1](-;!@[#1H0X1,#6H0X3,#6H1X4,#6H2X4;!r;+0;x0;A])(-;!@[#1H0X1,#6H3X4,#7H0X3,#7H1X3,#7H3X4;!r;+0;x0;A])(-;!@[#1H0X1x0!r+0A])-;!@[#6H1,#6H2;!r;+0;X4;x0;A:2](-;!@[#16H0X2,#16H1X2,#6H0X3,#6H1X4,#6H2X4,#6H3X4,#7H3X4,#8H1X2;!r;+0;x0;A]-;!@[#1H0X1,#6H1X3,#6H3X4,#8H0X1;!r;+0;x0;A])(-;!@[#1H0X1,#6H1X4,#6H3X4,#7H1X3;!r;+0;x0;A])-;!@[#1H0X1x0!r+0A] 
--------------------------------------------------------------------------------
 zz_0.101 nm; 363171.2 kJ/(nm**2 mol) | [#1H0

                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
took 1.305 minutes


In [40]:
sms_checks = list()
sms = list()
for i in range(100):
    random.shuffle(bond_clusters)
    mols = [oechem.OEGraphMol(mol)]
    sm = SMIRKSifier(mols, bond_clusters, max_layers=10, strict_smirks=False, verbose=False)
    sms_checks.append(sm.checks)
    sms.append(sm)
    if sm.checks:
        print("Found some SMIRKS")

                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
                      SMIRKSifier was not able to create SMI

                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
                      SMIRKSifier was not able to create SMI

                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
                      SMIRKSifier was not able to create SMI

In [42]:
set(sms_checks)

{False}

# Use Chemper to make Angles?

In [43]:
angle_clusters = list()
for label, data_dict in angle_dict.items():
    angle_clusters.append( (label, [data_dict['atoms']]) )

In [44]:
init = time.time()
angle_clusters = sorted(angle_clusters, key=lambda x: sum([len(i) for i in x[1]]), reverse=True)
sm = SMIRKSifier([mol], angle_clusters, max_layers=10, strict_smirks=False)
end = time.time()
print("took %.3f minutes" % ((end-init)/60.))


 Label                | SMIRKS 
 zz_1.91113553093 rad; 418.4 kJ/(mol rad**2) | [#16H0X2,#16H1X2,#1H0X1,#6H0X3,#6H1X4,#6H2X4,#6H3X4,#7H0X3,#7H1X3,#7H3X4;!r;+0;x0;A:1]-;!@[#6H1,#6H2,#6H3,#7H3;!r;+0;X4;x0;A:2](-;!@[#16H0X2,#16H1X2,#1H0X1,#6H1X4,#6H2X4,#6H3X4,#7H0X3,#7H1X3,#7H3X4,#8H1X2;!r;+0;x0;A])(-;!@[#1H0X1,#6H0X3,#6H1X4,#6H2X4,#6H3X4;!r;+0;x0;A])-;!@[#1H0X1,#8H1X2;!r;+0;x0;A:3] 
--------------------------------------------------------------------------------
 zz_1.91113553093 rad; 292.88 kJ/(mol rad**2) | [#1H0X1x0!r+0A:1]-;!@[#6H2,#6H3,#7H3;!r;+0;X4;x0;A:2](-;!@[#16H0X2,#16H1X2,#6H0X3,#6H1X4,#6H2X4;!r;+0;x0;A]-;!@[#1H0X1,#6H0X3,#6H1X3,#6H1X4,#6H2X4,#6H3X4,#7H0X3,#7H1X3,#7H3X4,#8H0X1;!r;+0;x0;A])(-;!@[#1H0X1,#6H0X3,#6H1X4,#6H2X4,#6H3X4,#7H0X3,#7H1X3,#7H3X4,#8H1X2;!r;+0;x0;A])-;!@[#1H0X1x0!r+0A:3] 
--------------------------------------------------------------------------------
 zz_2.09439510239 rad; 418.4 kJ/(mol rad**2) | [#6H0X3,#6H1X3,#7H0X2,#7H1X3;!r;+0;x0;A:1](-;!@[#6H0X3,#6H1X3

                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
took 2.334 minutes
