-
Notifications
You must be signed in to change notification settings - Fork 5
/
ambit_tautomer.py
executable file
·156 lines (117 loc) · 5.22 KB
/
ambit_tautomer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import os
import jpype
import jpype.imports
from jpype.types import *
from rdkit.Chem import MolFromSmiles, MolToSmiles
# Note: this will work if your working directory is reac-space-exp/main/<reaction_name>
# If your working directory is reac-space-exp/ instead, you need to get rid of the "../../"
# Prefer doing "libs/whatever.jar" instead of the entire os.path.join thing
jpype.startJVM(classpath=[os.path.join('../','libs/ambit-tautomers-2.0.0-SNAPSHOT.jar'),
os.path.join('..','libs/cdk-2.3.jar')], convertStrings=True)
from java.lang import System
java = jpype.JPackage("java")
ambit2 = jpype.JPackage("ambit2")
cdk = jpype.JPackage("org").openscience.cdk
tautomerManager = JClass('ambit2.tautomers.TautomerManager')()
silentChemObjectBuilder = JClass('org.openscience.cdk.silent.SilentChemObjectBuilder').getInstance()
def smilesToMolecule(smiles):
"""
Convert a SMILES string to a CDK Molecule object.
Returns: the Molecule object
"""
mol = None
try:
smilesParser = cdk.smiles.SmilesParser(silentChemObjectBuilder)
mol = smilesParser.parseSmiles(smiles)
except cdk.exception.InvalidSmilesException as e:
System.err.println('An error occured while parsing the SMILES')
e.printStackTrace()
return mol
def smiles_from_molecule(molecule):
"""
Parse a CDK object into a SMILES String
Returns: an *RDKit* canonical SMILES
"""
# I wanted to use SmiFlavor.Unique as an argument to the SmilesGenerator constructor
#smi_flavor = cdk.smiles.SmiFlavor
smilesGenerator = cdk.smiles.SmilesGenerator(True)
smiles = smilesGenerator.createSMILES(molecule)
# Note: this method may not create canonical SMILES prefer using RDKit for canonicalization
# CDK does have a method to create canonical SMILES but I can't call it using JPype (or even using Java/Kotlin themselves)
mol = MolFromSmiles(smiles)
smiles = MolToSmiles(mol)
return smiles
def generateTautomers(smiles, mode="CMI"):
"""
Generate the list of possible tautomers for a given molecule
Keyword arguments:
smiles -- the SMILES string of the molecule
mode -- The generation algorithm to use (default: "CMI")
Available algorithms include:
"CM" for Simple Combinatorial,
"CMI" for Improved Combinatorial,
"IA-DFS" for Incremental Algorithm - Depth First Search
"combined" doesn't work is intended for a combination of CMI and IA-DFS
See https://onlinelibrary.wiley.com/doi/abs/10.1002/minf.201200133 for a detailed discussion
of the algorithms
Returns: a list of SMILES strings of the possible tautomers
"""
mol = smilesToMolecule(smiles)
tautomerManager.setStructure(mol)
if mode == "IA-DFS":
tautomers = tautomerManager.generateTautomersIncrementaly()
# Note: "combined" doesn't produce anything
elif mode == "combined":
print("WARNING: Combined approach is not fully complete yet, it may produce nothing")
tautomers = tautomerManager.generateTautomersCombinedApproach()
elif mode == "CMI":
tautomers = tautomerManager.generateTautomers_ImprovedCombApproach()
elif mode == "CM":
tautomers = tautomerManager.generateTautomers()
else:
raise NameError("Invalid generation algorithm mode: {0}, please correct typos".format(mode))
smiles_to_return = [smiles_from_molecule(taut) for taut in tautomers]
# In case the post filter removes the original molecule
# TODO: Carbons with two double bonds (and other substrucutres) should be forbidden
# at the time of reaction itself then this shouldn't happen
if smiles_from_molecule(mol) not in smiles_to_return:
smiles_to_return.append(smiles_from_molecule(mol))
return tuple(smiles_to_return)
def setNumBackTracks(num):
"""
Sets the maximum number of back tracks the IA-DFS algorithm is allowed to perform.
"""
tautomerManager.maxNumOfBackTracks = num
def setMaxTautomerRegistrations(num):
tautomerManager.maxNumOfTautomerRegistrations = num
def maxSubCombinations(num):
"""
Maximum number of sub-combinations the improved combinatorial methods is allowed to work with
See their paper for details on what a sub-combination is.
"""
tautomerManager.maxNumOfSubCombiations = num
def toCalculateCACTVSEnergyRank(flag):
tautomerManager.FlagCalculateCACTVSEnergyRank = flag
def use13Rules(flag):
"""
Whether or not to use 1,3 shift tautomer rules
Keyword arguments:
flag: True or False
"""
tautomerManager.getKnowledgeBase().FlagUse13Shifts = flag
def use15Rules(flag):
tautomerManager.getKnowledgeBase().FlagUse15Shifts = flag
def use17Rules(flag):
tautomerManager.getKnowledgeBase().FlagUse17Shifts = flag
#def selectRules(args):
# TODO:
#tautomerManager.getRuleSelector().setSelectionMode(RSM.valueOf(args))
def setRuleNumberLimit(limit):
tautomerManager.getRuleSelector().setRuleNumberLimit(limit)
def useDuplicationIsomorphismCheck(flag):
# TODO: see what this even does
tautomerManager.tautomerFilter.setFlagApplyDuplicationCheckIsomorphism(flag)
def useDuplicationCheckInChI(flag):
# TODO: same as above.
tautomerManager.tautomerFilter.setFlagApplyDuplicationCheckInChI(flag)
#jpype.shutdownJVM()