In [2]:
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.MolStandardize import rdMolStandardize
from rdkit.Chem.Descriptors import ExactMolWt
enumerator = rdMolStandardize.TautomerEnumerator()
import os 

In [None]:
 def get_canonical(network_iteration):
        tautomer = []
        for molecule in network_iteration:

            can_mol=Chem.MolFromSmiles(molecule)             # convert to mol to use tatomer function

            taut_mol = enumerator.Canonicalize(can_mol) # get tautomer (it only works with molecule objects)
            taut = Chem.MolToSmiles(taut_mol)           # convert tatuomer  to smiles
            taut_can = Chem.CanonSmiles(taut)           # conver to Canonical String
            tautomer.append(taut_can)                   # append to tuatomer (SMILES)list

        return tautomer

In [37]:
class Tautomer:
    
    def __init__(self,name,file):
        self.name = name
        self.file = file
    
    
    
    ### Get dictionary of iterations ###
    def dic_by_iterations(self):
        Network = dict()    # Define an Empty Dictionary

        ### Open file with context handler ###
        with open(self.file, "r") as handler:

            for line in handler:                       # Iterate over the lines
                iteration,molecule = line.split()      # Splite every line: The format is Iteration,moleule  e.g. G1    ccc1c3


                try:
                    Network[iteration].append(Chem.CanonSmiles(molecule)) # Append molecule to iteration list inside Network dictionary

                except:
                    Network[iteration] = list()         # If an exception occurs then iteration list does not exxists, create it
                    Network[iteration].append(Chem.CanonSmiles(molecule)) # Append molecule to iteration list inside Network dictionary
        
        
        ### Save dict in object ###
        self.network =  Network
        ### Return dict in case someoe once it ###
        return  self.network
    
    
        ### Get dictionary of iterations by tatuomers ###
    def get_canonical(self,network_iteration):
        tautomer = []
        for molecule in network_iteration:

            can_mol=Chem.MolFromSmiles(molecule)             # convert to mol to use tatomer function

            taut_mol = enumerator.Canonicalize(can_mol) # get tautomer (it only works with molecule objects)
            taut = Chem.MolToSmiles(taut_mol)           # convert tatuomer  to smiles
            taut_can = Chem.CanonSmiles(taut)           # conver to Canonical String
            tautomer.append(taut_can)                   # append to tuatomer (SMILES)list

        return tautomer
        
    def tautomers_transformation(self):
        tautomers = dict()
        for iterations in  self.network.keys():
            tautomers[iterations] = self.get_canonical(self.network[iterations])
            
        self.Network_tatuomers = tautomers
            
        return self.Network_tatuomers
    
    
    def compare_network(self):
        matched = dict()
        missed = dict()
        for iterations in  self.network.keys():
            print(f" Currently working on Iteration: {iterations}")
            for mol, taut in zip(self.network[iterations],self.Network_tatuomers[iterations]):
                
                #### Capture Matches ####
                if mol == taut:
                    #print(f"Molecule:{mol}   Tautomer: {taut}")
                    try:
                        matched[iterations].append(mol) # Append molecule to iteration list inside Network dictionary

                    except:
                        matched[iterations] = list()    # If an exception occurs then iteration list does not exxists, create it
                        matched[iterations].append(mol) # Append molecule to iteration list inside Network dictionary
                        
                   #### Capture missmatches ####
                elif mol != taut: 
                    try:
                        missed[iterations].append(mol) # Append molecule to iteration list inside Network dictionary
                    except:
                        missed[iterations] = list()    # If an exception occurs then iteration list does not exxists, create it
                        missed[iterations].append(mol) # Append molecule to iteration list inside Network dictionary
             
                    
        self.missed = missed
        self.matched = matched
        
        return self.matched,self.missed
        
    
    def status(self):
        print()
        print(f"{self.name} Summary")
        for iterations in  self.network.keys():
            print(f"Iteration {iterations}")
            print("Tautomers Matches: ",len(self.matched[iterations])/len(self.network[iterations])*100, "%")
            print("Tautomers Missed: ",len(self.missed[iterations])/len(self.network[iterations])*100, "%")
            print()
            
        
    def export2txt(self,prefix):
        
        file1 = prefix+"_matches.txt"
        with open(file1, "w") as handler:
            for iterations in self.matched.keys():
                for smiles in self.matched[iterations]:
                    handler.write(iterations + "    " + smiles +"\n")
        print()
        print(f"Network {self.name} Matched Tautomers stored at {file1}")
        
        file2 = prefix+"_missed.txt"
        with open(file2, "w") as handler:
            for iterations in self.missed.keys():
                for smiles in self.missed[iterations]:
                    handler.write(iterations + "    " + smiles +"\n")
        
        print(f"Network {self.name} Missed Tautomers stored at {file2}")
        


In [38]:
Network = Tautomer('Glucose Degradation 5th Iteration','glucose_degradation_output.txt')
Network.dic_by_iterations()
Network.tautomers_transformation()
Network.compare_network()
Network.status()
Network.export2txt("Glucose_Degdatation")

 Currently working on Iteration: G1
 Currently working on Iteration: G2
 Currently working on Iteration: G3
 Currently working on Iteration: G4
 Currently working on Iteration: G5

Glucose Degradation 5th Iteration Summary
Iteration G1
Tautomers Matches:  37.5 %
Tautomers Missed:  62.5 %

Iteration G2
Tautomers Matches:  45.370370370370374 %
Tautomers Missed:  54.629629629629626 %

Iteration G3
Tautomers Matches:  50.3858875413451 %
Tautomers Missed:  49.61411245865491 %

Iteration G4
Tautomers Matches:  58.039594398841146 %
Tautomers Missed:  41.96040560115886 %

Iteration G5
Tautomers Matches:  60.33567677216772 %
Tautomers Missed:  39.66432322783227 %


Network Glucose Degradation 5th Iteration Matched Tautomers stored at Glucose_Degdatation_matches.txt
Network Glucose Degradation 5th Iteration Missed Tautomers stored at Glucose_Degdatation_missed.txt
