In [None]:
import requests
import pandas as pd

# protein (target): https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data/protein/P00742/JSON

# Function to convert CHEMBL ID to PubChem CID
def convert_chembl_to_pubchem(chembl_id):
    try:
        # Construct the PubChem PUG REST URL for conversion
        url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/chemblid/{chembl_id}/cids/JSON"

        # Make a request to PubChem to get the CID
        response = requests.get(url)
        response.raise_for_status()

        # Extract the CID from the response JSON
        cid = response.json()
        return cid[0]

    except Exception as e:
        print(f"Error converting {chembl_id} to PubChem: {str(e)}")
        return None

# Function to retrieve SMILES for a PubChem CID
def get_smiles_from_pubchem(cid):
    try:
        # Construct the PubChem PUG REST URL for SMILES
        url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid/{cid}/property/CanonicalSMILES/JSON"

        # Make a request to PubChem to get the SMILES
        response = requests.get(url)
        response.raise_for_status()

        # Extract the SMILES from the response JSON
        smiles = response.json()
        return smiles["PropertyTable"]["Properties"][0]["CanonicalSMILES"]

    except Exception as e:
        print(f"Error retrieving SMILES for CID {cid}: {str(e)}")
        return None

# Read the list of CHEMBL IDs from the input file
input_file = "../unfound_drug_smiles.csv"  # Replace with the actual input file path
chembl_ids = [line.strip() for line in open(input_file, "r")]

# Convert CHEMBL IDs to PubChem CIDs and retrieve SMILES
results = []
for chembl_id in chembl_ids:
    pubchem_cid = convert_chembl_to_pubchem(chembl_id)
    if pubchem_cid:
        smiles = get_smiles_from_pubchem(pubchem_cid)
        if smiles:
            results.append([chembl_id, pubchem_cid, smiles])

# Create a DataFrame from the results
df = pd.DataFrame(results, columns=["CHEMBL_ID", "PubChem_CID", "SMILES"])

# Save the results to a CSV file
output_file = "../data/opentargets/unfound_drug_smiles.csv"  # Replace with the desired output file path
df.to_csv(output_file, index=False)

In [2]:
import pandas as pd
import pubchempy as pcp

# Read the CSV file
df = pd.read_csv('../unfound_drug_smiles.csv', header=None)

# Convert CHEMBL IDs to PubChem IDs and retrieve SMILES
smiles_list = []
for chembl_id in df[0]:
    try:
        compound = pcp.get_compounds(chembl_id, 'name')[0]
        smiles_list.append(compound.isomeric_smiles)
    except:
        smiles_list.append('Not found')

# Save the SMILES to a new CSV file
df_smiles = pd.DataFrame(smiles_list)
df_smiles.to_csv('../data/opentargets/unfound_drug_smiles_pubchem.csv', index=False, header=False)