### Automated Retrieval and Saving of Approved Drugs Data from ChEMBL API

In [5]:
import requests
import csv
import time

def get_all_approved_drugs():
    base_url = "https://www.ebi.ac.uk/chembl/api/data/molecule.json"  # ChEMBL API endpoint for molecules
    query_params = {
        "max_phase": 4,    # Filter: only molecules approved up to clinical phase 4 (approved drugs)
        "limit": 1000,     # Maximum records per API request (page size)
        "offset": 0        # Starting index for pagination
    }

    all_drugs = []  # List to store all approved drug data

    while True:
        print(f"Fetching offset {query_params['offset']}...")  # Log current page offset
        response = requests.get(base_url, params=query_params)  # API GET request
        if response.status_code != 200:                         # Check for HTTP errors
            print("Request error:", response.status_code)
            break

        data = response.json()                                  # Parse JSON response
        molecules = data.get("molecules", [])                   # Get list of molecules

        for mol in molecules:                                   # Loop over molecules on current page
            structures = mol.get("molecule_structures")        # Get structure info
            if structures and "canonical_smiles" in structures:  # Check if SMILES exists
                all_drugs.append({                              # Append selected data to list
                    "name": mol.get("pref_name"),
                    "chembl_id": mol.get("molecule_chembl_id"),
                    "smiles": structures["canonical_smiles"]
                })

        # Check if more pages exist; if so, update offset for next page
        if data["page_meta"]["next"]:
            query_params["offset"] += query_params["limit"]
            time.sleep(0.2)  # Sleep to respect ChEMBL API rate limits
        else:
            break

    return all_drugs


# Function to save the collected drug data to CSV file
def save_to_csv(drugs, filename="approved_drugs.csv"):
    with open(filename, mode="w", newline='', encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=["name", "chembl_id", "smiles"])
        writer.writeheader()
        writer.writerows(drugs)
    print(f"✅ {len(drugs)} molecules saved to '{filename}'")

# Execute the data retrieval and save
approved_drugs = get_all_approved_drugs()
save_to_csv(approved_drugs)

Fetching offset 0...
Fetching offset 1000...
Fetching offset 2000...
Fetching offset 3000...
Fetching offset 4000...
✅ 3594 molecules saved to 'approved_drugs.csv'
