Python Skript um SMILES mit vorhandener CID von NIB via restAPI zu downloaden

In [1]:
import pandas as pd
import requests

# Function to fetch SMILES strings for a given batch of CIDs
def fetch_smiles(cids):
    # Base URL for PubChem PUG REST API
    base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid"

    # Prepare the request URL
    url = f"{base_url}/{','.join(map(str, cids))}/property/CanonicalSMILES/JSON"

    # Send the request
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        # Extract and return the SMILES strings
        smiles_list = []
        for compound in data["PropertyTable"]["Properties"]:
            smiles_list.append((compound['CID'], compound['CanonicalSMILES']))
        return smiles_list
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return []

# Function to split list into batches
def split_into_batches(lst, batch_size):
    for i in range(0, len(lst), batch_size):
        yield lst[i:i + batch_size]

# Example usage
if __name__ == "__main__":
    # Read the CSV file containing the list of CIDs
    csv_file = 'C:\\Users\\SchockWav3\\Desktop\\Masterarbeit\\AXL_Kinasen_NIB.csv'  # Update this path to your CSV file
    df = pd.read_csv(csv_file, delimiter=';')  # Specify the correct delimiter

    # Assuming the CSV has a column named 'CID' with the list of CIDs
    if 'cid' in df.columns:
        cids = df['cid'].tolist()
    elif 'CID' in df.columns:
        cids = df['CID'].tolist()
    else:
        raise ValueError("No 'cid' or 'CID' column found in the CSV file")

    # Fetch SMILES strings in batches
    batch_size = 100  # Adjust batch size as needed
    all_smiles = []
    for batch in split_into_batches(cids, batch_size):
        smiles_list = fetch_smiles(batch)
        all_smiles.extend(smiles_list)

    # Display the fetched SMILES strings
    for cid, smiles in all_smiles:
        print(f"CID: {cid}, SMILES: {smiles}")

    # Optionally, save the results to a new CSV file
    output_df = pd.DataFrame(all_smiles, columns=['CID', 'SMILES'])
    output_df.to_csv('C:\\Users\\SchockWav3\\Desktop\\Masterarbeit\\AXL_Kinasen_NIB_smiles.csv', index=False)  # Update this path to your desired output file


CID: 121409318, SMILES: CN1C=C(N=C1)C2=CC(=C(N=C2)N)OCC3CCN(CC3)C4=NC(=NC(=N4)C(=O)NC56CC(C5)C6)OCC7(CC7)C#N
CID: 121409558, SMILES: CC1(CCC1)NC(=O)C2=NC(=NC(=N2)OCC3(CC3)C#N)N4CCC(CC4)COC5=C(N=CC(=C5)C6=CN(C=N6)C)N
CID: 121409206, SMILES: CC(CO)NC(=O)C1=NC(=NC(=N1)OCC2CCCO2)N3CCC(CC3)COC4=C(N=CC(=C4)C5=CN(C=N5)C)N
CID: 121409160, SMILES: CC(CO)NC(=O)C1=CC(=NC(=N1)OCC2CCCO2)N3CCC(CC3)C4=CNC5=NC=CC(=C45)OC
CID: 121409433, SMILES: CC(CO)NC(=O)C1=CC(=NC(=N1)OCC2=CC=CC=C2)N3CCC(CC3)C4=C5C(=CC=NC5=NN4)OC
CID: 121409449, SMILES: CC(C(C)(C)N)NC(=O)C1=CC(=NC(=N1)OCC2CC2)N3CCC(CC3)C4=CNC5=NC=CC(=C45)OC
CID: 121409518, SMILES: CC(C(F)(F)F)NC(=O)C1=NC(=NC(=N1)OCC2CC2)N3CCC(CC3)COC4=C(N=CC(=C4)C5=CN(C=N5)C)N
CID: 121409175, SMILES: CC(CO)NC(=O)C1=CC(=NC(=N1)OCC(C)OC)N2CCC(CC2)C3=CNC4=NC=CC(=C34)OC
CID: 121409523, SMILES: CC(C)NC(=O)C1=NC(=NC(=N1)OCC2CC2(F)F)N3CCC(CC3)COC4=C(N=CC(=C4)C5=CN(C=N5)C)N
CID: 121409045, SMILES: CC(CO)NC(=O)C1=CC(=NC(=N1)OCC2CCCO2)N3CCC(CC3)C4=C5C(=CC=NC5=NN4)OC
CID: 1214

Skript um SMILES direkt in CSV hinzuzufügen

In [2]:
import pandas as pd
import requests

# Function to fetch SMILES strings for a given batch of CIDs
def fetch_smiles(cids):
    # Base URL for PubChem PUG REST API
    base_url = "https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/cid"

    # Prepare the request URL
    url = f"{base_url}/{','.join(map(str, cids))}/property/CanonicalSMILES/JSON"

    # Send the request
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        # Extract and return the SMILES strings
        smiles_dict = {compound['CID']: compound['CanonicalSMILES'] for compound in data["PropertyTable"]["Properties"]}
        return smiles_dict
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return {}

# Function to split list into batches
def split_into_batches(lst, batch_size):
    for i in range(0, len(lst), batch_size):
        yield lst[i:i + batch_size]

# Example usage
if __name__ == "__main__":
    # Read the CSV file containing the list of CIDs
    csv_file = 'C:\\Users\\SchockWav3\\Desktop\\Masterarbeit\\AXL_Kinasen_NIB.csv'  # Update this path to your CSV file
    df = pd.read_csv(csv_file, delimiter=';')  # Specify the correct delimiter

    # Assuming the CSV has a column named 'CID' with the list of CIDs
    if 'cid' in df.columns:
        cids = df['cid'].tolist()
    elif 'CID' in df.columns:
        cids = df['CID'].tolist()
    else:
        raise ValueError("No 'cid' or 'CID' column found in the CSV file")

    # Fetch SMILES strings in batches
    batch_size = 100  # Adjust batch size as needed
    all_smiles = {}
    for batch in split_into_batches(cids, batch_size):
        smiles_dict = fetch_smiles(batch)
        all_smiles.update(smiles_dict)


    # Add the SMILES strings to the DataFrame
    if 'cid' in df.columns:
        df['SMILES'] = df['cid'].map(all_smiles)
    elif 'CID' in df.columns:
        df['SMILES'] = df['CID'].map(all_smiles)


    # Save the updated DataFrame to the same CSV file
    df.to_csv(csv_file, index=False, sep=';')

    print(f"Updated data saved to {csv_file}")


Updated data saved to C:\Users\SchockWav3\Desktop\Masterarbeit\AXL_Kinasen_NIB.csv
