In [1]:
#Major dependencies:
#pandas, numpy, seaborn, matplotlib
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sys
import os
import networkx as nx   
import rdkit 
from rdkit import Chem
from rdkit.Chem import rdmolops
from rdkit.Chem import Draw

In [81]:
# Define the SMILES strings for the molecules
smi_1 = "OCC1OC(O)C(O)C(O)C1O"
smi_2 = 'O=CC(O)C(O)C(O)C(O)CO'
smi_3 = 'O=CC(O)C(O)CO'
smi_4 = 'OCC(O)C1OC1O'
smi_5 = 'O=CCO'
smiles_list = [smi_1, smi_2, smi_3, smi_4, smi_5]

In [91]:
import pubchempy as pcp

def smiles_to_name_pubchem(smiles):
    """
    Convert SMILES to a tuple: (IUPAC name, list of synonyms) using PubChem.
    
    Returns:
        (iupac_name, synonyms) or (None, None) if not found
    """
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return None, None

    try:
        inchikey = inchi.MolToInchiKey(mol)
        compounds = pcp.get_compounds(inchikey, 'inchikey')
        if compounds:
            c = compounds[0]
            iupac = c.iupac_name
            synonyms = c.synonyms or []
            return iupac, synonyms
    except Exception as e:
        print(f"Error: {e}")
        return None, None


In [92]:
for smi in smiles_list:
    name = smiles_to_name_pubchem(smi)
    if name:
        print(f"SMILES: {smi} -> Name: {name}")
    else:
        print(f"SMILES: {smi} -> Name: Not found")

SMILES: OCC1OC(O)C(O)C(O)C1O -> Name: ('6-(hydroxymethyl)oxane-2,3,4,5-tetrol', ['Hexopyranose', 'Hexose', 'hexopyranoside', '42752-07-8', 'DTXSID80858960', 'Unknown Hexose', 'Unknown Hexopyranose', 'Unknown Hexopyranoside', 'CHEBI:18133', 'DTXCID10196988', 'DTXCID40202844', 'WURCS=2.0/1,1,0/(axxxxh-1x_1-5_2*NCC/3=O)/1/', '45009-62-9', 'dtxsid70860057', 'HEX', 'HEX (SNFG)', 'SNFG:HEX', '6-(hydroxymethyl)oxane-2,3,4,5-tetrol', 'D-[4,6-13C2]glucose', 'D-Mannose-6-13C', "D-[6,6'-2H2]galactose", 'ZYMOSAN', 'D-(+)-Mannose', 'D-Mannose-2-13C', 'b-d-glucose', 'D-Mannose-1-C-d', 'D-[2,5-13C2]glucose', 'D-[4,5-13C2]glucose', 'D-[1-13C]gulose', 'D-[2-13C]gulose', 'D-[1-13c,1-2h]glucose', 'ALPHA-D-GLUCOSEANHYDROUS', 'D-[11-13C]TALOSE', 'NSC8102', 'Glucose Oxidase from Aspergillus niger,', 'D-[2-2H]galactose', 'D-[4-2H]galactose', '.beta.-D-Glucopyranose', 'B-D-ALLOPYRANOSE', 'D-[1-18O]glucose', 'D-[2-13C]talose', 'D-[3-13C]glucose', 'L-[2-13C]glucose', 'D-[3-13C]galactose', 'D-[5-13C]galactose', 

In [86]:
import requests

def smiles_to_name_cactus(smiles):
    """
    Convert a SMILES string to a chemical name using NIH CACTUS.
    """
    url = f"https://cactus.nci.nih.gov/chemical/structure/{smiles}/iupac_name"
    try:
        response = requests.get(url, timeout=5)
        if response.status_code == 200:
            return response.text.strip()
    except requests.exceptions.RequestException:
        pass
    return None

In [87]:
for smi in smiles_list:
    name = smiles_to_name_cactus(smi)
    if name:
        print(f"SMILES: {smi} -> Name: {name}")
    else:
        print(f"SMILES: {smi} -> Name: Not found")

SMILES: OCC1OC(O)C(O)C(O)C1O -> Name: 6-(hydroxymethyl)oxane-2,3,4,5-tetrol
SMILES: O=CC(O)C(O)C(O)C(O)CO -> Name: 2,3,4,5,6-pentahydroxyhexanal
SMILES: O=CC(O)C(O)CO -> Name: 2,3,4-trihydroxybutanal
SMILES: OCC(O)C1OC1O -> Name: Not found
SMILES: O=CCO -> Name: Not found


In [88]:
from rdkit import Chem
from rdkit.Chem import inchi
import pubchempy as pcp

def smiles_to_name(smiles):
    """
    Convert a SMILES string to a chemical name using InChI + PubChem.
    """
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return None

    try:
        inchi_str = inchi.MolToInchi(mol)
        compounds = pcp.get_compounds(inchi_str, namespace='inchi')
        if compounds:
            c = compounds[0]
            return c.iupac_name or c.synonyms[0]
    except Exception:
        return None


In [100]:
for smi in smiles_list:
    name = smiles_to_name(smi)
    if name:
        print(f"SMILES: {smi} -> Name: {name}")
    else:
        print(f"SMILES: {smi} -> Name: Not found")




SMILES: OCC1OC(O)C(O)C(O)C1O -> Name: 6-(hydroxymethyl)oxane-2,3,4,5-tetrol





SMILES: O=CC(O)C(O)C(O)C(O)CO -> Name: 2,3,4,5,6-pentahydroxyhexanal





SMILES: O=CC(O)C(O)CO -> Name: 2,3,4-trihydroxybutanal





SMILES: OCC(O)C1OC1O -> Name: Not found
SMILES: O=CCO -> Name: 2-hydroxyacetaldehyde


In [90]:
if __name__ == "__main__":
    smi_1 = "OCC1OC(O)C(O)C(O)C1O"
    name = smiles_to_name(smi_1)
    print(f"{smi_1} → {name}")




OCC1OC(O)C(O)C(O)C1O → 6-(hydroxymethyl)oxane-2,3,4,5-tetrol


In [101]:
import re
from rdkit import Chem
from rdkit.Chem import inchi
import pubchempy as pcp

def smiles_to_best_name(smiles):
    """
    Convert SMILES to a best-guess common or IUPAC name using PubChem.
    
    Returns:
        A single human-readable name (common name preferred, fallback to IUPAC).
    """
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return None

    try:
        inchikey = inchi.MolToInchiKey(mol)
        compounds = pcp.get_compounds(inchikey, 'inchikey')
        if not compounds:
            return None

        c = compounds[0]
        iupac = c.iupac_name or ""

        if c.synonyms:
            # Heuristic filter: exclude CAS numbers and long technical names
            common_candidates = [
                s for s in c.synonyms
                if re.match(r"^[A-Za-z \-]+$", s) and len(s.split()) <= 3
            ]
            # Further prioritize known common names
            for name in common_candidates:
                if name.lower() in ['glucose', 'ethanol', 'benzene', 'aspirin']:
                    return name
            if common_candidates:
                return common_candidates[0]

        return iupac if iupac else None
    except Exception as e:
        print(f"Error: {e}")
        return None


In [102]:
if __name__ == "__main__":
    test_smiles = [
        "CCO",                         # ethanol
        "C1=CC=CC=C1",                 # benzene
        "OCC1OC(O)C(O)C(O)C1O",        # glucose
        "CC(=O)OC1=CC=CC=C1C(=O)O",    # aspirin
    ]

    for smi in test_smiles:
        best_name = smiles_to_best_name(smi)
        print(f"{smi} → {best_name}")


CCO → ethanol
C1=CC=CC=C1 → benzene
OCC1OC(O)C(O)C(O)C1O → Hexopyranose
CC(=O)OC1=CC=CC=C1C(=O)O → aspirin


In [103]:
for smi in smiles_list:
    name = smiles_to_best_name(smi)
    if name:
        print(f"SMILES: {smi} -> Name: {name}")
    else:
        print(f"SMILES: {smi} -> Name: Not found")

SMILES: OCC1OC(O)C(O)C(O)C1O -> Name: Hexopyranose
SMILES: O=CC(O)C(O)C(O)C(O)CO -> Name: Aldohexose
SMILES: O=CC(O)C(O)CO -> Name: tetrose
SMILES: OCC(O)C1OC1O -> Name: Not found
SMILES: O=CCO -> Name: glycolaldehyde


In [104]:
from rdkit.Chem import inchi

def canonicalize_smiles(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return None
    return Chem.MolToSmiles(mol, canonical=True)


In [105]:
canonical = canonicalize_smiles("C1(C(C(CO)O)O1)O")
print(canonical)  # Should give a more typical SMILES string


OCC(O)C1OC1O


In [108]:
print(smiles_to_name(canonical))  # Should give a more typical name
print(smiles_to_best_name(canonical))  # Should give a more typical name
print(smiles_to_name_pubchem(canonical))  # Should give a more typical name
print(smiles_to_name_cactus(canonical))  # Should give a more typical name




None
None
None
None


In [110]:

smiles = "OCC(O)C1OC1O"
mol = Chem.MolFromSmiles(smiles)

if mol:
    inchikey = inchi.MolToInchiKey(mol)
    print(f"InChIKey: {inchikey}")
else:
    print("Invalid SMILES.")

InChIKey: CXZZRQVTLHCGPB-UHFFFAOYSA-N
