In [1]:
from chemper.mol_toolkits import mol_toolkit
from chemper.graphs.cluster_graph import ClusterGraph
from chemper.graphs.fragment_graph import ChemPerGraphFromMol
from chemper.chemper_utils import create_dictionaries_for_clusters

In [2]:
from chemper import chemper_utils
from chemper.optimize_smirks.reducer import Reducer

In [3]:
mol2 = chemper_utils.get_data_path('molecules/MiniDrugBank_tripos.mol2')
mols = chemper_utils.mols_fom_mol2(mol2)
print(len(mols))

363


# practice Clusters

In [4]:
small_mols = mols[:10]
smirks_labels = [('wild', '[*:1]~[*:2]'), ('double', '[*:1]=[*:2]'),
                ('triple', '[*:1]#[*:2]'), ('aromatic', '[*:1]:[*:2]'),
                ('carbon_sing', '[#6:1]-[#6:2]')]
cluster_list = create_dictionaries_for_clusters(smirks_labels, small_mols)

In [5]:
print(len(cluster_list))
print(len(cluster_list[0]))
print(len(cluster_list[0][1][0]))

5
2
10


In [6]:
red = Reducer(small_mols, cluster_list, layers=0)

making graph:  10
adding first molecule
making graph:  10
adding first molecule
making graph:  10
adding first molecule
making graph:  10
adding first molecule
making graph:  10
adding first molecule


In [7]:
red.best_match_reference()

Molecule  0
Molecule  1
Molecule  2
Molecule  3
Molecule  4
Molecule  5
Molecule  6
Molecule  7
Molecule  8
Molecule  9
Creating graph matching current types with reference types...

{0: <chemper.chemper_utils.ValenceDict object at 0x11ab7bcf8>, 1: <chemper.chemper_utils.ValenceDict object at 0x11ab7be10>, 2: <chemper.chemper_utils.ValenceDict object at 0x11ab7be48>, 3: <chemper.chemper_utils.ValenceDict object at 0x11ab7be80>, 4: <chemper.chemper_utils.ValenceDict object at 0x11ab7bc50>, 5: <chemper.chemper_utils.ValenceDict object at 0x11ab7bd68>, 6: <chemper.chemper_utils.ValenceDict object at 0x11ab7bda0>, 7: <chemper.chemper_utils.ValenceDict object at 0x11ab7bc18>, 8: <chemper.chemper_utils.ValenceDict object at 0x11ab7bbe0>, 9: <chemper.chemper_utils.ValenceDict object at 0x11ab7bdd8>}
zz_double
zz_double
zz_double
zz_double
zz_double
zz_double
zz_double
zz_double
zz_double
zz_double
zz_double
zz_double
zz_double
zz_double
zz_double
zz_double
zz_double
zz_double
zz_double
Graph 

([('zz_double', 'double', 19)], 19)

In [None]:
help(Reducer)

In [None]:
import networkx as nx
g = nx.Graph()
g.add_node(1)
g.add_node(2)
g.add_edge(1,2, weight=0)
for e in g.edges(data=True):
    print(e)

In [None]:
from chemper.chemper_utils import ValenceDict, ImproperDict

def get_smirks_matches(mol, smirks):
    """
    Gets atom indices for a smirks string in a given molecule

    Parameters
    ----------
    mol : a chemper Mol
    smirks : str
             SMIRKS pattern being matched to the molecule

    Returns
    --------
    matches: list of tuples
        atom indices for labeled atom in the smirks
    """
    from chemper.optimize_smirks.environment import ChemicalEnvironment

    env = ChemicalEnvironment(smirks)
    if env.getType().lower() == 'impropertorsion':
        matches = ImproperDict()
    else:
        matches = ValenceDict()
            
    for match in mol.smirks_search(smirks):
        smirks_indices = sorted(list(match.keys()))
        atom_indices = tuple([match[s].get_index() for s in smirks_indices])
        print(atom_indices)
        matches[atom_indices] = ''

    return list(matches.keys())

def get_typed_molecules(smirks_list, molecules):
    """
    Creates a dictionary assigning a typename
    for each set of atom indices in each molecule

    Parameters
    ----------
    smirks_list: list of tuples in the form (label, smirks)
    molecules: list of chemper Mols

    Returns
    -------
    typeDict: embedded dictionary
        keys: SMILES string for each molecule
            keys: tuple of indices assigned a parameter type
    """
    type_dict = dict()
    for mol_idx, mol in enumerate(molecules):
        type_dict[mol_idx] = {}
        for [label, smirks] in smirks_list:
            matches = get_smirks_matches(mol, smirks)
            for match in matches:
                type_dict[mol_idx][match] = label

    return type_dict

In [None]:
get_typed_molecules(red.current_smirks, red.molecules)

In [None]:
print(red.current_smirks[0][1])
get_smirks_matches(red.molecules[0], red.current_smirks[0][1])

In [None]:
red.best_match_reference()

In [None]:
help(Reducer)