In [1]:
import pandas as pd
import pubchempy as pcp
from datetime import datetime

In [2]:
def get_compound_info(smiles):
    try:
        compound = pcp.get_compounds(smiles, namespace='smiles')
        
        if not compound:
            return {"compoud name": "not found", "CID": None, "molecular formula": None, "molecular weight": None, "Canonical SMILES": None}

        c = compound[0]
        return {
            "compoud name": c.iupac_name,
            "CID": c.cid,
            "molecular formula": c.molecular_formula,
            "molecular weight": c.molecular_weight,
            "Canonical SMILES": c.canonical_smiles
        }
    except Exception as e:
        return {"compoud name": "error", "CID": None, "molecular formula": None, "molecular weight": None, "Canonical SMILES": None}

In [3]:
def process_csv(input_file, output_file):
    df = pd.read_csv(input_file)

    results = []
    for smiles in df['SMILES']:
        info = get_compound_info(smiles)
        info["SMILES"] = smiles
        results.append(info)

    results_df = pd.DataFrame(results)
    results_df.to_csv(output_file, index=False)

In [4]:
input_file = "data/data_Human.csv"
output_file = "result/result_Human.csv"
process_csv(input_file, output_file)

In [5]:
print(datetime.now().strftime("%Y-%m-%d"))

2025-05-16
