# Fitting Proteins

In [6]:
import os
import itertools
import glob
import parmed
from simtk.openmm import app
from simtk import unit
from oeommtools import utils as oeo_utils
from chemper.smirksify import SMIRKSifier

In [7]:
residues = ['ALA', 'ASN', 'CYS', 'GLU', 'HIS',
            'LEU', 'MET', 'PRO', 'THR', 'TYR',
            'ARG', 'ASP', 'GLN', 'GLY', 'ILE',
            'LYS', 'PHE', 'SER', 'TRP', 'VAL']

In [8]:
from openeye import oechem
import openeye.oedepict as oedepict
import IPython
from IPython.display import display, Image

def depictMatch(mol, match=None, supH = True, idx_atoms=list(), width=500, height=200, fn=None,
               color1=oechem.OELightBlue, color2=oechem.OELightSalmon):
    
    atom_bond_set = oechem.OEAtomBondSet()
    for atom in mol.GetAtoms():
        if atom.GetIdx() in idx_atoms:
            atom_bond_set.AddAtom(atom)
            for bond in atom.GetBonds():
                nbr_atom = bond.GetNbr(atom)
                if (nbr_atom.GetIdx() in idx_atoms) and nbr_atom.GetIdx() > atom.GetIdx():
                    atom_bond_set.AddBond(bond)
    
    dopt = oedepict.OEPrepareDepictionOptions()
    dopt.SetDepictOrientation( oedepict.OEDepictOrientation_Horizontal)
    dopt.SetSuppressHydrogens(supH)
    oedepict.OEPrepareDepiction(mol, dopt)
    
    opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
    opts.SetAtomColorStyle(oedepict.OEAtomColorStyle_WhiteMonochrome)
    disp = oedepict.OE2DMolDisplay(mol, opts)
    
    # Highlight indexed atoms
    hstyle = oedepict.OEHighlightStyle_Cogwheel
    hcolor = oechem.OEColor(color1)
    if not atom_bond_set.IsEmpty():
        oedepict.OEAddHighlighting(disp, hcolor, hstyle, atom_bond_set)
    
    if match is not None:
        # Highlight whole match
        hstyle = oedepict.OEHighlightStyle_BallAndStick
        hcolor = oechem.OEColor(color2)
        oedepict.OEAddHighlighting(disp, hcolor, hstyle, match)
    
    img = oedepict.OEImage(width, height)
    oedepict.OERenderMolecule(img,disp)
    
    if fn is not None:
        ext = oechem.OEGetFileExtension(fn)
        if oedepict.OEIsRegisteredImageFile(ext):
            ofs = oechem.oeofstream()
            if not ofs.open(fn):
                print("OE could not open file due to error %s" % fn)
                ofs.close()
            else:
                oedepict.OERenderMolecule(ofs, ext, disp)
                ofs.close()
        else:
            print("OE could not handle extension on %s" % fn)
    return img

## Dipeptides

I realized in this process that we don't need a separate `leap.in` file for each "protein"
so instead I will make one master input file that will parameterize all of the molecules 

In [9]:
def make_leap_str_dipeps(pairs, directory):
    """
    Parameters
    ----------
    residues: list like 
        contains 2 of the three letter residue codes
    directory: str
        path to where the output files should be stored
    
    Returns
    -------
    leap_lines: a string for the lines for the list of 
    """
    leap_str = """
divaline = sequence {N%s C%s}
savepdb divaline %s.pdb
protein = loadpdb %s.pdb
saveamberparm protein %s.prmtop %s.inpcrd
"""
    p0, p1 = pairs
    # fill in leap.in format
    fn = "%s/%s_%s" % (directory, p0, p1)
    out = leap_str % (p0, p1, fn, fn, fn, fn)

    leap_str4 = """
divaline = sequence {NALA %s %s CALA}
savepdb divaline %s.pdb
protein = loadpdb %s.pdb
saveamberparm protein %s.prmtop %s.inpcrd
"""
    fn4 = "%s/%s_%s_tetra" % (directory, p0, p1)
    out4 = leap_str4 % (p0, p1, fn4, fn4, fn4, fn4)
    
    return out, out4

def make_leap_str_tripeps(triplets, directory):
    """
    Parameters
    ----------
    residues: list like 
        contains 3 of the three letter residue codes
    directory: str
        path to where the output files should be stored
    
    Returns
    -------
    leap_lines: a string for the lines for the list of 
    """
    leap_str = """
divaline = sequence {N%s %s C%s}
savepdb divaline %s.pdb
protein = loadpdb %s.pdb
saveamberparm protein %s.prmtop %s.inpcrd
"""
    p0, p1, p2 = triplets
    # fill in leap.in format
    fn = "%s/%s_%s_%s" % (directory, p0, p1, p2)
    out = leap_str % (p0, p1, p2, fn, fn, fn, fn)
    return out

In [10]:
# create a list of all dipeptides
di_peps = itertools.combinations(residues, 2)
tri_peps = itertools.combinations(residues, 3)

# create input file
input_leap = "leap.in"
leap_in = open(input_leap, 'w')
leap_in.write("source oldff/leaprc.ff99SB")
# loop over all pairs, making input files 
for pair in di_peps:
    out, out4 = make_leap_str_dipeps(pair, "./mol_files")
    # make the *.in file
    leap_in.write(out)
    leap_in.write(out4)

for triplets in tri_peps:
    out = make_leap_str_tripeps(triplets, "./mol_files")
    leap_in.write(out)
    
leap_in.close()
os.system('tleap -f %s' % input_leap)

256

# Load systems into openMM systems

Here is what I would like to track and my planned structure:

What information do we need:

* quantitative parameters:
    - with units and "full values"
    - rounded within 0.001? to identify when parameters should be treated the same
* atom indices which will translate into OEmols with `oeommtools`
    - by parameter type
* parmed/openmm systems to make the oemols

#### What is the best way to store this data?

How to store this data isn't necessarily obvious, but this is what I'm thinking right now:

Dictionaries by parameter type with the setup:

* keys: string of quantitative parameters rounded to assigned values with two sub dictionaries:
    - 'atom indices': dictioanry with form {mol_num/id: list of atom indice tuples}
        * I think it makes more sense to keep these as dictionaries for now so that we can also have a {mol_num/id: molecule} dictionary that can be translated into the chemper format later
    - 'parameters': set of tuples so that if there are differences in rounding to the top string we can track them down and so we can store the units that go with each component

Separate dictionary with the molecules stored with the form `{mol_num/id: { 'oemol': oemol, 'parmed': parmed_system} }`. I think for a first pass it is worth storing both objects just in case, but ultimately we only really need to the oemol after the first set of dictionaries is done.

In [11]:
class parameter_dict:
    
    def __init__(self):
        self.d = dict()
    
    def items(self):
        return self.d.items()
    
    def add_key(self, key):
        if key not in self.d:
            self.d[key] = {'atom_indices': dict(), 'parameters': set(), 'units': None}
    
    def add_atoms(self, key, mol_id, atom_tuple):
        self.add_key(key)
        if mol_id not in self.d[key]['atom_indices']:
            self.d[key]['atom_indices'][mol_id] = list()
        self.d[key]['atom_indices'][mol_id].append(tuple(atom_tuple))
    
    def add_param(self, key, params):
        self.add_key(key)
        new_tuple = [x._value for x in params]
        self.d[key]['parameters'].add(tuple(new_tuple))
        self.d[key]['units'] = tuple([x.unit for x in params])

In [28]:
class parameter_system(object):
    
    def __init__(self):
        self.lj_dict = parameter_dict()
        self.charge_dict = parameter_dict()
        self.bond_dict = parameter_dict()
        self.angle_dict = parameter_dict()
        self.proper_dict = parameter_dict()
        self.improper_dict = parameter_dict()
        self.mol_dict = dict()
    
    def add_amber_system(self, prm_file=None, inp_file=None):
        """
        Adds an amber system to the clusters of parameters stored here.
        All parameters are added to relevant dictionary and returns the system
        
        Parameters
        ----------
        prm_file: path to AMBER prmtop file
        inp_file: path to AMBER inpcrd file
    
        Returns
        -------
        sys: parmed system created from these files
        """
        parm = parmed.load_file(prm_file, inp_file)
        base = prm_file.split('.')[0]
        mol_id = base.split('/')[-1]
        self.mol_dict[mol_id] = {'parmed': parm, 
                                   'oemol': oeo_utils.openmmTop_to_oemol(parm.topology, parm.positions)}
        self.add_nonbonds(parm, mol_id)
        self.add_bonds(parm, mol_id)
        self.add_angles(parm, mol_id)
        self.add_torsions(parm, mol_id)
        return parm
    
    def add_molecule(self, sys, mol_id):
        self.mol_dict[mol_id] = {'parmed': sys, 
                                   'oemol': oeo_utils.openmmTop_to_oemol(sys.topology, sys.positions)}
        
    def add_parmed_system(self, sys):
        base = sys.name.split('.')[0]
        mol_id = base.split('/')[-1]
        if mol_id not in self.mol_dict:
            self.add_molecule(sys, mol_id)
        self.add_nonbonds(sys, mol_id)
        self.add_bonds(sys, mol_id)
        self.add_angles(sys, mol_id)
        self.add_torsions(sys, mol_id)
        
    def add_nonbonds(self, sys, mol_id):
        """
        Cluster atoms based on their partial charge

        Parameters
        ----------
        sys: list like of parmed system
        charge_dict: dictionary to store data that will be updated in this function
        lj_dict: dictionary to store LJ parameters for this molecule
        mol_id: key for this system to store data in the dictionaries

        Returns
        -------
        clusters: dictionary with the form
                  {string parameter: {'atom_idices': {}}
        """    
        if mol_id not in self.mol_dict:
            self.add_molecule(sys, mol_id)
            
        for a in sys.atoms:
            # Update charge dictionary:
            charge_str = "%.3f" % a.charge
            charge_param = [a.ucharge]
            self.charge_dict.add_param(charge_str, charge_param)
            self.charge_dict.add_atoms(charge_str, mol_id, [a.idx])

            # Update LJ dictionary
            lj_str = "%.3f\t%.3f" % (a.epsilon, a.rmin)
            lj_params = [a.uepsilon, a.urmin]
            self.lj_dict.add_param(lj_str, lj_params)
            self.lj_dict.add_atoms(lj_str, mol_id, [a.idx])

    def add_bonds(self, sys, mol_id):
        if mol_id not in self.mol_dict:
            self.add_molecule(sys, mol_id)
        for b in sys.bonds:
            bond_str = "%.3f\t%.3f" % (b.type.k, b.type.req)
            bond_params = [b.type.uk, b.type.ureq]
            self.bond_dict.add_param(bond_str, bond_params)
            self.bond_dict.add_atoms(bond_str, mol_id, [b.atom1.idx, b.atom2.idx])
        
    def add_angles(self, sys, mol_id):
        if mol_id not in self.mol_dict:
            self.add_molecule(sys, mol_id)
        for an in sys.angles:
            angle_str = "%.3f\t%.3f" % (an.type.k, an.type.theteq)
            angle_params = [an.type.uk, an.type.utheteq]
            self.angle_dict.add_param(angle_str, angle_params)
            self.angle_dict.add_atoms(angle_str, mol_id, [an.atom1.idx, an.atom2.idx, an.atom3.idx])
    
    def convert_for_smirksifying(self, param_type=None):
        """
        param_type: string specifying the parameter you want clusters for
        must chose from ['lj', 'charge', 'proper_torsion', 'improper_torsion', 'angle', 'bond']
        
        Returns
        -------
        - list of molecules
        - either dictionary or list of clustered atomic indices
        
        """
        idx_list = list()
        mol_list = list()
        cluster_types = dict()
        
        dictionaries = {
            'lj': self.lj_dict,
            'charge': self.charge_dict,
            'proper_torsion': self.proper_dict,
            'improper_torsion': self.improper_dict,
            'angle': self.angle_dict,
            'bond': self.bond_dict,
        }
        
        if param_type is not None:
            if param_type.lower() not in dictionaries.keys():
                return cluster_types
            dictionaries = {param_type.lower(): dictionaries[param_type.lower()]}
        
        for idx, me in self.mol_dict.items():
            idx_list.append(idx)
            mol_list.append(me['oemol'])
        
        for label, par_dict in dictionaries.items():
            cluster_types[label] = list()
            for cluster_label, entry in par_dict.items():
                atom_list = list()
                for idx in idx_list:
                    if idx in entry['atom_indices']:
                        atom_list.append(entry['atom_indices'][idx])
                    else:
                        atom_list.append(list())
                cluster_types[label].append((cluster_label, atom_list))

        if param_type is None:
            return mol_list, cluster_types
        
        return mol_list, cluster_types[param_type.lower()]

    def add_torsions(self, sys, mol_id):
        if mol_id not in self.mol_dict:
            self.add_molecule(sys, mol_id)
        temp_dict = dict()
        for d in sys.dihedrals:
            if d.improper:
                imp_str = "%.3f\t%.3f\t%.3f" % (d.type.phi_k, d.type.phase, d.type.per)
                imp_params = [d.type.uphi_k, d.type.uphase, unit.Quantity(d.type.per)]
                self.improper_dict.add_param(imp_str, imp_params)
                self.improper_dict.add_atoms(imp_str, mol_id, [d.atom1.idx, d.atom3.idx, d.atom2.idx, d.atom4.idx])
            else:
                atoms = tuple([d.atom1.idx, d.atom2.idx, d.atom3.idx, d.atom4.idx])
                params = (d.type.uphi_k, d.type.uphase, unit.Quantity(d.type.per))
                if atoms not in temp_dict:
                    temp_dict[atoms] = list()
                temp_dict[atoms].append(params)

        for atoms, param_list in temp_dict.items():
            new_params = [p for t in param_list for p in t]
            prop_str = '\t'.join(['%.3f' % p._value for p in new_params])
            self.proper_dict.add_param(prop_str, new_params)
            self.proper_dict.add_atoms(prop_str, mol_id, atoms)

# Save molecules and atom indices for tetra and dipeptides

In [29]:
cwd = os.getcwd()
tetras = glob.glob('%s/mol_files/*tetra.pdb' % cwd)

tetras_store_data = parameter_system()

for pdb in tetras:
    base = pdb.split('.')[0]
    mol_id = base.split('/')[-1]
    prmFile = os.path.join('%s.prmtop' % base)
    inpFile = os.path.join('%s.inpcrd' % base)
    parm = tetras_store_data.add_amber_system(prmFile, inpFile)

In [30]:
mols, cluster_types = tetras_store_data.convert_for_smirksifying()

In [31]:
cwd = os.getcwd()
all_pdb = glob.glob('%s/mol_files/*.pdb' % cwd)
dis = [s for s in all_pdb if s.count('_') == 3]

dis_store_data = parameter_system()

for pdb in dis:
    base = pdb.split('.')[0]
    mol_id = base.split('/')[-1]
    prmFile = os.path.join('%s.prmtop' % base)
    inpFile = os.path.join('%s.inpcrd' % base)
    parm = dis_store_data.add_amber_system(prmFile, inpFile)

dis_mols, dis_cluster_types = dis_store_data.convert_for_smirksifying()

for label, clusters in dis_cluster_types.items():
    print(label, len(dis_mols), len(clusters), len(clusters[0][1]))

proper_torsion 190 29 190
improper_torsion 190 3 190
lj 190 14 190
bond 190 33 190
angle 190 42 190
charge 190 335 190


# Visualize dipeptide torsions and impropers

In [33]:
#depictMatch(mol, match=None, supH = True, idx_atoms=list(), width=500, height=200, fn=None,
#               color1=oechem.OELightBlue, color2=oechem.OELightSalmon)

for (p_lab, cluster) in dis_cluster_types['proper_torsion']:
    print(p_lab)
    for mol_idx, atom_indice_list in enumerate(cluster):
        if len(atom_indice_list) > 0:
            print('mol', mol_idx)

5.375	180.000	2.000
mol 26
mol 30
mol 34
mol 42
mol 47
mol 56
mol 59
mol 61
mol 62
mol 66
mol 91
mol 111
mol 135
mol 137
mol 149
mol 170
mol 184
mol 186
mol 188
0.250	0.000	1.000	0.000	0.000	3.000
mol 29
mol 31
mol 33
mol 39
mol 44
mol 56
mol 59
mol 65
mol 68
mol 70
mol 83
mol 87
mol 89
mol 92
mol 95
mol 96
mol 97
mol 102
mol 104
mol 106
mol 107
mol 110
mol 117
mol 118
mol 130
mol 140
mol 147
mol 150
mol 153
mol 156
mol 159
mol 171
mol 173
mol 177
mol 180
0.800	0.000	1.000	0.000	0.000	2.000	0.080	180.000	3.000
mol 10
mol 11
mol 13
mol 18
mol 20
mol 22
mol 31
mol 35
mol 38
mol 43
mol 44
mol 45
mol 57
mol 58
mol 60
mol 66
mol 67
mol 68
mol 73
mol 74
mol 78
mol 88
mol 93
mol 99
mol 100
mol 103
mol 107
mol 108
mol 121
mol 127
mol 128
mol 133
mol 149
mol 155
mol 165
mol 172
mol 182
0.156	0.000	3.000
mol 0
mol 1
mol 2
mol 3
mol 4
mol 5
mol 6
mol 7
mol 8
mol 9
mol 10
mol 11
mol 12
mol 13
mol 14
mol 15
mol 16
mol 17
mol 18
mol 19
mol 20
mol 21
mol 22
mol 23
mol 24
mol 25
mol 26
mol 27
mol 28
m

mol 104
mol 105
mol 106
mol 107
mol 108
mol 109
mol 110
mol 111
mol 112
mol 113
mol 114
mol 115
mol 116
mol 117
mol 118
mol 119
mol 120
mol 121
mol 122
mol 123
mol 124
mol 125
mol 126
mol 127
mol 128
mol 129
mol 130
mol 131
mol 132
mol 133
mol 134
mol 135
mol 136
mol 137
mol 138
mol 139
mol 140
mol 141
mol 142
mol 143
mol 144
mol 145
mol 146
mol 147
mol 148
mol 149
mol 150
mol 151
mol 152
mol 153
mol 154
mol 155
mol 156
mol 157
mol 158
mol 159
mol 160
mol 161
mol 162
mol 163
mol 164
mol 165
mol 166
mol 167
mol 168
mol 169
mol 170
mol 171
mol 172
mol 173
mol 174
mol 175
mol 176
mol 177
mol 178
mol 179
mol 180
mol 181
mol 182
mol 183
mol 184
mol 185
mol 186
mol 187
mol 188
mol 189
0.000	0.000	3.000
mol 13
mol 21
mol 28
mol 49
mol 54
mol 58
mol 76
mol 77
mol 90
mol 111
mol 112
mol 116
mol 129
mol 130
mol 143
mol 163
mol 173
mol 185
mol 187
2.300	180.000	2.000
mol 6
mol 12
mol 32
mol 46
mol 77
mol 86
mol 92
mol 125
mol 126
mol 136
mol 139
mol 158
mol 166
mol 169
mol 172
mol 176
mol 180
mol

# Try making SMIRKS for all dipeptides

In [34]:
smirs_dis = dict()
for label, clusters in dis_cluster_types.items():
    print(label)
    try: 
        smirs_dis[label] = SMIRKSifier(dis_mols, clusters, max_layers=10, strict_smirks=False)
    except Exception as e: 
        smirs_dis[label] = e

proper_torsion

 Label                | SMIRKS 
 zz_5.375	180.000	2.000 | [#6H2X4,#7H0X2;!r;+0;x0;A:1](-;!@[#6X3,#6X4;!r;+0;H1;x0;A]-;!@[#1H0X1x0!r+0A])-;!@[#6H0X3x0!r+0A:2](-;!@[#6H2X4,#7H0X2;!r;+0;x0;A]-;!@[#6X3,#6X4;!r;+0;H1;x0;A]-;!@[#1H0X1x0!r+0A])-;!@[#6H1X3x0!r+0A:3]-;!@[#1H0X1,#7H1X3;!r;+0;x0;A:4] 
--------------------------------------------------------------------------------
 zz_0.250	0.000	1.000	0.000	0.000	3.000 | [#1H0X1x0!r+0A:1]-;!@[#6H1,#6H3;!r;+0;X4;x0;A:2](-;!@[#1X1,#6X3;!r;+0;H0;x0;A])(-;!@[#1H0X1,#7H1X3;!r;+0;x0;A])-;!@[#6H1,#6H2;!r;+0;X4;x0;A:3](-;!@[#1H0X1,#6H1X4,#6H3X4;!r;+0;x0;A])(-;!@[#1H0X1x0!r+0A])-;!@[#8H1X2x0!r+0A:4]-;!@[#1H0X1x0!r+0A] 
--------------------------------------------------------------------------------
 zz_0.800	0.000	1.000	0.000	0.000	2.000	0.080	180.000	3.000 | [#1H0X1x0!r+0A:1]-;!@[#6H2X4x0!r+0A:2](-;!@[#1H0X1x0!r+0A])(-;!@[#6H1,#6H2;!r;+0;X4;x0;A](-;!@[#1H0X1,#7H1X3,#7H3X4;!r;+0;x0;A])(-;!@[#1H0X1x0!r+0A])-;!@[#6H0X3,#6H1X4;!r;+0;x0;A](-;

                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
improper_torsion

 Label                | SMIRKS 
 zz_10.500	180.000	2.000 | [#6H1X4,#6H2X4,#7H2X3;!r;+0;x0;A:1](-;!@[#1H0X1,#7H0X3,#7H1X3,#7H2X4,#7H3X4;!r;+0;x0;A])(-;!@[#1H0X1x0!r+0A])-;!@[#6H0X3x0!r+0A:2](-;!@[#7H0X3,#7H1X3,#7H2X3,#8H0X1;!r;+0;x0;A:3])-;!@[#7H1X3,#8H0X1;!r;+0;x0;A:4] 
--------------------------------------------------------------------------------
 zz_1.000	180.000	2.000 | [#6H0,#6H1;!r;+0;X3;x0;A:1](-;!@[#1H0X1,#6H0X3,#7H2X3,#8H0X1;!r;+0;x0;A])(-;!@[#6H0X3,#6H1X3,#6H1X4,#6H2X4,#7H0X2,#7H1X3,#7H2X3;!r;+0;x0;A]-;!@[#1H0X1,#6H0X3,#6H1X3,#6H1X4,#6H2X4;!r;+0;x0;A])-;!@[#7H0,#7H1,#7H2;!r;+0;X3;x0;A:2](-;!@[#1H0X1,#6H2X4;!r;+0;x0;A:3])-;!@[#1H0X1,#6H1X3,#6H1X4;!r;+0;x0;A:4] 
-------------------------------------------------------------------

angle

 Label                | SMIRKS 
 zz_80.000	120.400    | [#6H1,#6H2;!r;+0;X4;x0;A:1](-;!@[#1H0X1,#7H2X4,#7H3X4;!r;+0;x0;A])(-;!@[#1H0X1x0!r+0A])(-;!@[#6H1,#6H2,#6H3,#7H3;!r;+0;X4;x0;A](-;!@[#16H1X2,#1H0X1,#6H0X3,#6H1X4,#6H2X4,#6H3X4;!r;+0;x0;A])(-;!@[#1H0X1,#6H3X4,#7H1X3,#7H3X4,#8H1X2;!r;+0;x0;A])-;!@[#1H0X1x0!r+0A])-;!@[#6H0X3x0!r+0A:2](-;!@[#7H0,#7H1,#7H2;!r;+0;X3;x0;A](-;!@[#1H0X1,#6H1X4,#6H2X4;!r;+0;x0;A])-;!@[#1H0X1,#6H2X4;!r;+0;x0;A])-;!@[#8H0X1x0!r+0A:3] 
--------------------------------------------------------------------------------
 zz_50.000	120.000    | [#6H0X3,#6H1X3,#7H0X2,#7H1X3;!r;+0;x0;A:1](-;!@[#1H0X1,#6H0X3,#6H1X3,#7H0X2,#7H2X3,#8H0X1,#8H1X2;!r;+0;x0;A])-;!@[#6H1,#7H1,#7H2;!r;+0;X3;x0;A:2](-;!@[#1H0X1,#6H0X3,#6H1X3,#6H1X4,#6H2X4,#7H0X2,#7H1X3;!r;+0;x0;A])-;!@[#1H0X1x0!r+0A:3] 
--------------------------------------------------------------------------------
 zz_50.000	123.200    | [#6H2X4x0!r+0A:1](-;!@[#1H0X1x0!r+0A])(-;!@[#1H0X1x0!r+0A])(-;!@[#6H2X4x0!r+0A](-;

                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
charge

 Label                | SMIRKS 
 zz_-0.786            | [#8H0X1x0!r+0A:1]-;!@[#6H0X3x0!r+0A](-;!@[#6H2X4x0!r+0A](-;!@[#1H0X1x0!r+0A])(-;!@[#1H0X1x0!r+0A])-;!@[#7H1X3x0!r+0A](-;!@[#1H0X1x0!r+0A])-;!@[#6H0X3x0!r+0A](-;!@[#6H1X4x0!r+0A](-;!@[#1H0X1x0!r+0A])(-;!@[#6H1,#6H2,#6H3;!r;+0;X4;x0;A](-;!@[#16H1X2,#1H0X1,#6H0X3,#6H1X4,#6H2X4,#6H3X4;!r;+0;x0;A])(-;!@[#1H0X1,#8H1X2;!r;+0;x0;A])-;!@[#1H0X1x0!r+0A])-;!@[#7H2,#7H3;!r;+0;X4;x0;A](-;!@[#1H0X1,#6H2X4;!r;+0;x0;A])(-;!@[#1H0X1x0!r+0A])-;!@[#1H0X1x0!r+0A])-;!@[#8H0X1x0!r+0A])-;!@[#8H0X1x0!r+0A] 
--------------------------------------------------------------------------------
 zz_0.038             | [#1H0X1x0!r+0A:1]-;!@[#6H2X4x0!r+0A](-;!@[#1H0X1x0!r+0A])(-;!@[#6H1X4x0!r+0A](-;!@[#1H0X1x0!r+0A])(-;!@[#6H

                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      


In [35]:
for label, output in smirs_dis.items():
    if isinstance(output, Exception):
        print(label, 'error')
        continue
    if not output.checks:
        print(label, 'failed to make smirks')
        continue
    print(label, 'passes')

bond passes
improper_torsion failed to make smirks
angle failed to make smirks
proper_torsion failed to make smirks
lj passes
charge failed to make smirks


# Try to make SMIRKS for tetrapeptides

In [36]:
for label, clusters in cluster_types.items():
    print(label, len(mols), len(clusters), len(clusters[0][1]))

proper_torsion 190 30 190
improper_torsion 190 3 190
lj 190 14 190
bond 190 33 190
angle 190 42 190
charge 190 181 190


In [None]:
smirs = dict()
for label, clusters in cluster_types.items():
    print(label)
    try: 
        smirs[label] = SMIRKSifier(mols, clusters, max_layers=10, strict_smirks=False)
    except Exception as e: 
        smirs[label] = e

proper_torsion

 Label                | SMIRKS 
 zz_5.375	180.000	2.000 | [#1H0X1,#6H2X4,#7H0X2;!r;+0;x0;A:1]-;!@[#6H0,#6H1;!r;+0;X3;x0;A:2](-;!@[#6H2X4,#7H0X2,#7H1X3;!r;+0;x0;A]-;!@[#6X3,#6X4;!r;+0;H1;x0;A]-;!@[#1H0X1x0!r+0A])-;!@[#6H0,#6H1;!r;+0;X3;x0;A:3](-;!@[#1H0X1,#6H2X4,#7H1X3;!r;+0;x0;A])-;!@[#1H0X1,#7H0X2,#7H1X3;!r;+0;x0;A:4] 
--------------------------------------------------------------------------------
 zz_0.250	0.000	1.000	0.000	0.000	3.000 | [#1H0X1x0!r+0A:1]-;!@[#6H1,#6H3;!r;+0;X4;x0;A:2](-;!@[#1X1,#6X3;!r;+0;H0;x0;A])(-;!@[#1H0X1,#7H1X3;!r;+0;x0;A])-;!@[#6H1,#6H2;!r;+0;X4;x0;A:3](-;!@[#1H0X1,#6H1X4,#6H3X4;!r;+0;x0;A])(-;!@[#1H0X1x0!r+0A])-;!@[#8H1X2x0!r+0A:4]-;!@[#1H0X1x0!r+0A] 
--------------------------------------------------------------------------------
 zz_0.800	0.000	1.000	0.000	0.000	2.000	0.080	180.000	3.000 | [#1H0X1x0!r+0A:1]-;!@[#6H1,#6H2;!r;+0;X4;x0;A:2](-;!@[#1H0X1,#7H0X3,#7H1X3;!r;+0;x0;A])(-;!@[#6H1X4,#6H2X4,#6H3X4,#7H1X3;!r;+0;x0;A](-;!@[#1H0X1,#6H0X3,

                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
improper_torsion

 Label                | SMIRKS 
 zz_10.500	180.000	2.000 | [#6H1X4,#6H2X4,#7H2X3;!r;+0;x0;A:1](-;!@[#1H0X1,#7H0X3,#7H1X3,#7H3X4;!r;+0;x0;A])(-;!@[#1H0X1x0!r+0A])-;!@[#6H0X3x0!r+0A:2](-;!@[#7H0X3,#7H1X3,#7H2X3,#8H0X1;!r;+0;x0;A:3])-;!@[#7H1X3,#8H0X1;!r;+0;x0;A:4] 
--------------------------------------------------------------------------------
 zz_1.000	180.000	2.000 | [#6H0,#6H1;!r;+0;X3;x0;A:1](-;!@[#1H0X1,#6H1X3,#7H1X3,#7H2X3,#8H0X1;!r;+0;x0;A])(-;!@[#6H0X3,#6H1X4,#6H2X4,#7H0X2,#7H2X3;!r;+0;x0;A]-;!@[#1H0X1,#6H0X3,#6H2X4;!r;+0;x0;A])-;!@[#7H0,#7H1,#7H2;!r;+0;X3;x0;A:2](-;!@[#1H0X1,#6H1X3,#6H2X4;!r;+0;x0;A:3])-;!@[#1H0X1,#6H1X4;!r;+0;x0;A:4] 
--------------------------------------------------------------------------------
 zz_1.100	180.

angle

 Label                | SMIRKS 
 zz_80.000	120.400    | [#6H1,#6H2;!r;+0;X4;x0;A:1](-;!@[#1H0X1,#7H0X3,#7H1X3,#7H3X4;!r;+0;x0;A])(-;!@[#1H0X1x0!r+0A])(-;!@[#6H1X4,#6H2X4,#6H3X4,#7H1X3;!r;+0;x0;A](-;!@[#1H0X1,#6H0X3,#6H1X4,#6H2X4,#6H3X4,#8H1X2;!r;+0;x0;A])-;!@[#1H0X1x0!r+0A])-;!@[#6H0X3x0!r+0A:2](-;!@[#7H0,#7H1,#7H2;!r;+0;X3;x0;A](-;!@[#1H0X1,#6H1X4,#6H2X4;!r;+0;x0;A])-;!@[#1H0X1,#6H2X4;!r;+0;x0;A])-;!@[#8H0X1x0!r+0A:3] 
--------------------------------------------------------------------------------
 zz_50.000	120.000    | [#6H0X3,#6H1X3,#7H0X2,#7H1X3;!r;+0;x0;A:1](-;!@[#1H0X1,#6H0X3,#6H2X4,#7H2X3,#8H0X1,#8H1X2;!r;+0;x0;A])-;!@[#6H1,#7H1,#7H2;!r;+0;X3;x0;A:2](-;!@[#1H0X1,#6H0X3,#6H1X3,#6H1X4,#6H2X4,#7H0X2,#7H1X3;!r;+0;x0;A])-;!@[#1H0X1x0!r+0A:3] 
--------------------------------------------------------------------------------
 zz_50.000	123.200    | [#6H2X4x0!r+0A:1](-;!@[#1H0X1x0!r+0A])(-;!@[#1H0X1x0!r+0A])(-;!@[#6H2X4x0!r+0A](-;!@[#1H0X1x0!r+0A])(-;!@[#1H0X1x0!r+0A])-;!@[#6H2X

                      SMIRKSifier was not able to create SMIRKS for the provided
                      clusters with 10 layers. Try increasing the number of layers
                      or changing your clusters
                      
charge
