In [33]:
# Load RDKit modules
from rdkit import Chem

# For Error Logs
from io import StringIO
from contextlib import redirect_stderr
import sys
from rdkit import rdBase
from rdkit import RDLogger

# For changing directories
import os

# For csv import
import csv

# RDKit Version
from rdkit import rdBase
print(rdBase.rdkitVersion)

2023.09.3


In [34]:
# 1.0 Load SMILES validation data
smiles_data = {}
with open('test.smi', 'r') as infile:
    reader = csv.reader(infile, delimiter='\t')
    
    # Loop through each row in the CSV file
    for row in reader:
        # Add each pair to the dictionary
        smiles_data[row[1]] = {'validation_smiles': row[0]}

In [35]:
# 2.0 Calculate InChI from SMILES
# SMILES → RDKit Molecular Object → InChI v1.06

# Redirect RDKit logging to Python's standard error
rdBase.LogToPythonStderr()
original_stderr = sys.stderr

# Loop through SMILES and calculate InChI
with open("test.log", "a") as log:
    with open('test.inchi', 'w') as out:
        
        for key, value in smiles_data.items():

            # Use a StringIO buffer to capture stderr
            sio = StringIO()
            sys.stderr = sio

            try:
                mol = Chem.MolFromSmiles(value['validation_smiles'])

                # Log the error and warnings, if any
                message = str(key) + "\t" + str(sio.getvalue())
               
                if mol is not None: 

                   # Compute InChI
                   inchi = Chem.MolToInchi(mol)

                   # capture an empty string returned from the InChI calculation
                   # Here is an example: [C@H]1([C@H](C1C2[C@@H]([C@@H]2C(=O)O)C(=O)O)C(=O)O)C(=O)O
                   if not inchi:
                       out.write("InChI=1S//" + "\t" + str(key) + "\n")
                       log.write(message + "\n")
                       continue

                   # write InChi to file
                   out.write(inchi + "\t" + str(key) + "\n")
                   
                   # write any warnings to file
                   # If none, sio.getvalue() is an empty string
                   if sio.getvalue() == "":
                      log.write(str(key) + "\n")
                   else:
                      log.write(message)
                
                # write the errors to file when mol is None
                else:
                   out.write("Error with processing, see log" + "\t" + str(key) + "\n")
                   log.write(message + "\n")   

            finally:
                 # Reset stderr to its original value
                sys.stderr = original_stderr

# Re-enable RDKit logging
RDLogger.EnableLog('rdApp.info')