# Fitting Proteins

In [1]:
import os
import copy
import glob
import itertools
from chemper.chemper_utils import check_smirks_to_reference, get_typed_molecules, create_tuples_for_clusters
from making_proteins import everything_from_fastas, print_order_type_data, at_least_one_passed, ParameterSystem, by_biggest_size, by_biggest_smirks
import cmiles
import json
from parmed.modeller import ResidueTemplate
from chemper.smirksify import Reducer, print_smirks

In [2]:
from openeye import oechem, oedepict
import IPython
from IPython.display import display, Image

alpha_list = [255, 51, 153, 117]
final_colors = list()

for a in alpha_list:
    for c in oechem.OEGetContrastColors():
        temp_c = c.CreateCopy()
        temp_c.SetA(a)
        final_colors.append(temp_c)

def depictMatch(mol, match=None, supH = True, idx_atoms=list(), width=500, height=200, fn=None,
               color1=oechem.OELightBlue, color2=oechem.OELightSalmon, include_bonds=True):
    
    atom_bond_set = oechem.OEAtomBondSet()
    for atom in mol.GetAtoms():
        if atom.GetIdx() in idx_atoms:
            atom_bond_set.AddAtom(atom)
            if not include_bonds:
                continue
            for bond in atom.GetBonds():
                nbr_atom = bond.GetNbr(atom)
                if (nbr_atom.GetIdx() in idx_atoms) and nbr_atom.GetIdx() > atom.GetIdx():
                    atom_bond_set.AddBond(bond)
    
    dopt = oedepict.OEPrepareDepictionOptions()
    dopt.SetDepictOrientation( oedepict.OEDepictOrientation_Horizontal)
    dopt.SetSuppressHydrogens(supH)
    oedepict.OEPrepareDepiction(mol, dopt)
    
    opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
    opts.SetAtomColorStyle(oedepict.OEAtomColorStyle_WhiteMonochrome)
    disp = oedepict.OE2DMolDisplay(mol, opts)
    
    # Highlight indexed atoms
    hstyle = oedepict.OEHighlightStyle_Cogwheel
    hcolor = oechem.OEColor(color1)
    if not atom_bond_set.IsEmpty():
        oedepict.OEAddHighlighting(disp, hcolor, hstyle, atom_bond_set)
    
    if match is not None:
        # Highlight whole match
        hstyle = oedepict.OEHighlightStyle_BallAndStick
        hcolor = oechem.OEColor(color2)
        oedepict.OEAddHighlighting(disp, hcolor, hstyle, match)
    
    img = oedepict.OEImage(width, height)
    oedepict.OERenderMolecule(img,disp)
    
    if fn is not None:
        ext = oechem.OEGetFileExtension(fn)
        if oedepict.OEIsRegisteredImageFile(ext):
            ofs = oechem.oeofstream()
            if not ofs.open(fn):
                print("OE could not open file due to error %s" % fn)
                ofs.close()
            else:
                oedepict.OERenderMolecule(ofs, ext, disp)
                ofs.close()
        else:
            print("OE could not handle extension on %s" % fn)
    return img

# ==================================================

# Reparsing json system

In [3]:
def convert_json_and_oeb(json_file, mol_dir='./mol_files/'):
    with open(json_file, 'r') as inputf:
        d = json.load(inputf)

    mol_dir = os.path.abspath(mol_dir)
    mol_files = [os.path.join(mol_dir, m) for m in d['mol_files']]
    mols = list()
    for mol_file in mol_files:
        mol = oechem.OEMol()
        ifs = oechem.oemolistream(mol_file)
        while oechem.OEReadMolecule(ifs,mol):
            mols.append(oechem.OEMol(mol))

    return mols, d['smirks_lists'], d['clusters']

In [4]:
json_test = './all_aminos_together/allIn1_big_99sbildn_lj_1mols.json'
json_test2 = './all_aminos_together/allIn1_small_99sbildn_lj_1mols.json'
mols, dsmirks, dclusters = convert_json_and_oeb(json_test)
mols2, dsmirks2, dclusters2 = convert_json_and_oeb(json_test2)

# Final Dictionary 

The final results I want are:

initial SMIRKS, reduced SMIRKS (None if failed), four sorting, all fragments so the dictionary will have:

**Fragement**
* ordering 
    - 'initial smirks'
    - 'final smirks' 
    

In [6]:
final_dict = dict()

In [14]:
file_keys = [
    ('big', ['big_smirks', 'biggest_size']),
    ('small', ['small_smirks', 'small_size'])
]
for fn_label, cluster_orders in file_keys:
    fns = glob.glob('./all_aminos_together/allIn1_%s_99sbildn_*_1mols.json' % fn_label)
    print('='*80)
    print(' '*20,fn_label)
    print('='*80)
    for f in fns:
        frag = f.split('_')[-2]
        if frag == 'charge':
            continue
        if frag == 'torsion':
            prefix = f.split('_')[-3]
            frag = '%s_%s' % (prefix, frag)
        print('-'*80)
        print(' '*30,frag)
        print('-'*80)
        mols, dsmirks, dclusters = convert_json_and_oeb(f)
        
        if frag not in final_dict:
            final_dict[frag] = dict()
        
        for order in cluster_orders:
            if order in final_dict[frag]:
                if 'output' in final_dict[frag][order] or 'output_10k' in final_dict[frag][order]:
                    print('Already in dictionary ', frag, order)
                    continue
                    
            final_dict[frag][order] = dict()
            d = dsmirks[order][frag]
            type_list = [(l, s) for l,s in d['type_list']]
            final_dict[frag][order]['initial'] = type_list
            
            print('ORIGINAL', order)
            print_smirks(type_list)
            
            if not d['checked']:
                final_dict[frag][order]['output_10k'] = None
                continue
                
            red = Reducer(type_list, mols, verbose=False)
            final_dict[frag][order]['output_1k'] = red.run(1000)
            print('REDUCED 1k ', order)
            print_smirks(final_dict[frag][order]['output_1k'])
            
            #final_dict[frag][order]['output_10k'] = red.run(9000)
            #print('REDUCED 10k ', order)
            #print_smirks(final_dict[frag][order]['output_10k'])

                     big
--------------------------------------------------------------------------------
                               angle
--------------------------------------------------------------------------------
ORIGINAL big_smirks

 Label                | SMIRKS 
 zz_50.000	109.500    | [#16!r+0H0X2x0A,#16!r+0H1X2x0A,#6!r+0H0X3x0A,#6!r+0H1X4x0A,#6!r+0H2X4x0A,#6!r+0H3X4x0A,#6+0H0X3r5x2A,#6+0H0X3r6x2a,#6+0H1X4r5x2A,#6+0H2X4r5x2A,#7!r+0H1X3x0A,#7!r+1H3X4x0A,#7+0H0X3r5x2A,#8!r+0H1X2x0A:1](-,:,=[#1!r+0H0X1x0A,#6!r+0H0X3x0A,#6!r+0H2X4x0A,#6!r+0H3X4x0A,#6+0H1X3r5x2A,#6+0H1X3r6x2a,#7!r+0H1X3x0A,#7!r+0H2X3x0A,#7+0H0X3r5x2A,#7+0H1X3r5x2A,#8!r-1H0X1x0A])-[#6!r+0H1x0,#6!r+0H2x0,#6!r+0H3x0,#6+0H1r5x2,#6+0H2r5x2,#7!r+1H3x0;X4;A:2](-[#1!r+0H0X1x0,#16!r+0H0X2x0,#16!r+0H1X2x0,#6!r+0H1X4x0,#6!r+0H2X4x0,#6!r+0H3X4x0,#6+0H1X4r5x2,#6+0H2X4r5x2,#7!r+0H1X3x0,#7!r+1H3X4x0,#7+0H0X3r5x2;A])(-[#1!rH0X1x0A,#6!rH0X3x0A,#6!rH1X4x0A,#6!rH2X4x0A,#6!rH3X4x0A,#6H0X3r5x2A,#6H0X3r6x2a,#6H2X4r5x2A,#8!rH1X2x0A

REDUCED 1k  big_smirks

 Label                | SMIRKS 
 zz_50.000	109.500    | [*:1]~[*:2]-;!@[*:3] 
--------------------------------------------------------------------------------
 zz_70.000	120.000    | [*;A:1](~[#1x0X1H0,#6X3H1x2;+0])~[*x0AH0!r,*r5AH0x2,*aH0r6x2,*r5H1x2:2]-,:,=[#6,#6,#7A,#7A,#7A,#7A:3] 
--------------------------------------------------------------------------------
 zz_50.000	120.000    | [*x0AH0!r,*r5x3H0a,*aH0r6x2,*r5H1Ax2,*:1]-,:,=[#6r5H1Ax2+0,#6+0H1r6x2a,#7x0H1A!r,#7x0A!rH2+0,#7x0+1A!rH2,#7r5H1Ax2+0:2](-,:,=[#1x0AH0!rX1,#6x0X4H1A!r,#6x0X4A!rH2,#6r5X3AH0x2,#6r5X3x3H0a,#6X3aH0r6x2,#6r5X3H1Ax2,#6aX3H1r6x2,#7r5AH0x2X2,#7r5X3H1Ax2])~;!@[#1x0AH0!rX1:3] 
--------------------------------------------------------------------------------
 zz_40.000	109.500    | [#6x0!r,#6x0!r,#6:1]~[*;X4;A:2]-[#6,#6!rH2,#6!rH3,#6r5H1,#6r5H2:3] 
--------------------------------------------------------------------------------
 zz_80.000	109.700    | [#7x0H1!r,#7r5H0x2;+0;X3;A:1](-[#1x0X1H

REDUCED 10k  big_smirks

 Label                | SMIRKS 
 zz_50.000	109.500    | [*:1]~[*:2]~[*:3] 
--------------------------------------------------------------------------------
 zz_70.000	120.000    | [*:1](~[*H0,*x2:2]~[#6,#7:3])~[*] 
--------------------------------------------------------------------------------
 zz_50.000	120.000    | [*:1]~[#6r5H1,#6H1r6,#7!rH2,#7H1:2](~[#1,#6x0X4,#6H0x2,#6X3x3H0,#6X3H0x2,#6H1x2,#7X2H0x2,#7X3H1x2])~;!@[*:3] 
--------------------------------------------------------------------------------
 zz_40.000	109.500    | [#6:1]~[*;X4:2]~[#6:3] 
--------------------------------------------------------------------------------
 zz_80.000	109.700    | [#7:1]~[*;X4:2]~[*:3]~[*] 
--------------------------------------------------------------------------------
 zz_63.000	111.100    | [#6H0!r:1]~[*:2]~[#6:3] 
--------------------------------------------------------------------------------
 zz_70.000	116.600    | [#6:1]~[#6x0H0:2]~[*:3] 
------------------------

REDUCED 1k  big_smirks

 Label                | SMIRKS 
 zz_1.100	180.000	2.000	6 | [*:1]~[*:2](~[*:3])~[*:4] 
--------------------------------------------------------------------------------
 zz_1.000	180.000	2.000	7 | [*:1]~[#7:2](~[*:3])~[*:4] 
--------------------------------------------------------------------------------
 zz_10.500	180.000	2.000	6 | [*:1]~[*:2](=[*+1,*X1:3])~[*:4] 
--------------------------------------------------------------------------------
 zz_1.100	180.000	2.000	7 | [*:1]~[*:2](~[*:4])~[*;x0:3]~[#6H0] 
--------------------------------------------------------------------------------



KeyboardInterrupt: 

In [None]:
import pickle
pickle.dump(final_dict, open('./all_aminos_together/reduced_smirks_dict.p', 'wb'))

In [63]:
for frag, frag_dict in final_dict.items():
    print('='*80)
    print(' '*30, frag)
    print('='*80)
    for order, order_dict in frag_dict.items():
        print('-'*80)
        print(' '*20, order)
        print('-'*80)
        if 'output' in order_dict:
            if order_dict['output'] is not None:
                print('REDUCED')
                print_smirks(order_dict['output'])
            else:
                print("No Reduction")
            


                               angle
--------------------------------------------------------------------------------
                     big_smirks
--------------------------------------------------------------------------------
REDUCED

 Label                | SMIRKS 
 zz_50.000	109.500    | [*:1]~[*:2]~[*:3] 
--------------------------------------------------------------------------------
 zz_70.000	120.000    | [*:1](~[*:2]~[#6x2,#7:3])~[*] 
--------------------------------------------------------------------------------
 zz_50.000	120.000    | [*:1]~[#6x2H1,#7H1,#7H2,#7x2H1:2](~[#1:3])~[#1x0H0,#6X4x0H1,#6H2x0,#6X3H0x3,#6x2H0,#6X3x2,#6x2H1,#7X2x2H0,#7x2H1] 
--------------------------------------------------------------------------------
 zz_40.000	109.500    | [#6:1]~[#6:2](~[#6:3])~;!@[*] 
--------------------------------------------------------------------------------
 zz_80.000	109.700    | [*;X3:1](~[#6:2]~[*:3]-[*])~;!@[*]~[*]~[#7] 
-------------------------------------------

In [13]:
for frag, frag_dict in final_dict.items():
    print('='*80)
    print(' '*30, frag)
    print('='*80)
    for order, order_dict in frag_dict.items():
        print('-'*80)
        print(' '*20, order)
        print('-'*80)
        if 'output_10k' in order_dict:
            if order_dict['output_10k'] is not None:
                print('REDUCED')
                print_smirks(order_dict['output_10k'])
            else:
                print("No Reduction")
        if 'output' in order_dict:
            print('No Reduction')

                               angle
--------------------------------------------------------------------------------
                     big_smirks
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
                     biggest_size
--------------------------------------------------------------------------------
No Reduction
                               bond
--------------------------------------------------------------------------------
                     big_smirks
--------------------------------------------------------------------------------
REDUCED

 Label                | SMIRKS 
 zz_310.000	1.526     | [*:1]~[*:2] 
--------------------------------------------------------------------------------
 zz_434.000	1.010     | [*:1]~[#1:2] 
--------------------------------------------------------------------------------
 zz_340.000	1.090     | [#6:1]~[#1:2] 
-------------

In [16]:
for frag, frag_dict in final_dict.items():
    print('='*80)
    print(' '*30, frag)
    print('='*80)
    for order, order_dict in frag_dict.items():
        print('-'*80)
        print(' '*20, order)
        print('-'*80)
        if 'output_10k' in order_dict:
            if order_dict['output_10k'] is not None:
                print('REDUCED')
                print_smirks(order_dict['output_10k'])
            else:
                print("No Reduction")
        if 'output' in order_dict:
            print(order_dict['output'])

                               angle
--------------------------------------------------------------------------------
                     big_smirks
--------------------------------------------------------------------------------
REDUCED

 Label                | SMIRKS 
 zz_50.000	109.500    | [*:1]~[*:2]~[*:3] 
--------------------------------------------------------------------------------
 zz_70.000	120.000    | [*:1](~[*H0,*x2:2]~[#6,#7:3])~[*] 
--------------------------------------------------------------------------------
 zz_50.000	120.000    | [*:1]~[#6r5H1,#6H1r6,#7!rH2,#7H1:2](~[#1,#6x0X4,#6H0x2,#6X3x3H0,#6X3H0x2,#6H1x2,#7X2H0x2,#7X3H1x2])~;!@[*:3] 
--------------------------------------------------------------------------------
 zz_40.000	109.500    | [#6:1]~[*;X4:2]~[#6:3] 
--------------------------------------------------------------------------------
 zz_80.000	109.700    | [#7:1]~[*;X4:2]~[*:3]~[*] 
---------------------------------------------------------------------