In [61]:
from datetime import timedelta
import os
from requests_cache import CachedSession
from tqdm.notebook import tqdm

session = CachedSession(
    expire_after=timedelta(days=1), allowable_methods=["GET", "POST"]
)

key = os.getenv("CTS_V2_API_KEY")

In [62]:
def get_ctsapi_trials(start: int):
    res = session.post(
        "https://clinicaltrialsapi.cancer.gov/api/v2/trials",
        json={
            "current_trial_status": [
                "Active",
                "Approved",
                "Enrolling by Invitation",
                "In Review",
                "Temporarily Closed to Accrual",
                "Temporarily Closed to Accrual and Intervention",
            ],
            "include": [
                "nci_id",
                "nct_id",
                "brief_title",
                "sites.org_name",
                "sites.org_postal_code",
                "current_trial_status",
                "sites.org_va",
                "sites.org_country",
                "sites.org_state_or_province",
                "sites.org_city",
                "sites.org_coordinates",
                "sites.recruitment_status",
            ],
            "sites.org_postal_code": "20892",
            "from": start,
            "sites.recruitment_status": [
                "active",
                "approved",
                "enrolling_by_invitation",
                "in_review",
                "temporarily_closed_to_accrual",
            ],
            "size": 50,
        },
        headers={"X-API-KEY": key},
    )
    res.raise_for_status()
    return res.json()


page = get_ctsapi_trials(start=0)
total = page["total"]
trials = page["data"]
pbar = tqdm(total=total)
pbar.update(len(trials))
while len(trials) < total:
    next_page = get_ctsapi_trials(start=len(trials))
    trials.extend(next_page["data"])
    pbar.update(len(next_page["data"]))

  0%|          | 0/267 [00:00<?, ?it/s]

In [63]:
import pandas as pd

df = pd.DataFrame(trials)
df2 = df.explode("sites")

In [64]:
new_rows = []
for idx, row in df2.iterrows():
    new_row = {
        "nct_id": row["nct_id"],
        "brief_title": row["brief_title"],
        "current_trial_status": row["current_trial_status"],
        "org_state_or_province": row["sites"]["org_state_or_province"],
        "org_city": row["sites"]["org_city"],
        "org_va": row["sites"]["org_va"],
        "org_country": row["sites"]["org_country"],
        "org_name": row["sites"]["org_name"],
        "org_postal_code": row["sites"]["org_postal_code"],
        "recruitment_status": row["sites"]["recruitment_status"],
    }
    new_rows.append(new_row)
df_final = pd.DataFrame(new_rows)
df_final

Unnamed: 0,nct_id,brief_title,current_trial_status,org_state_or_province,org_city,org_va,org_country,org_name,org_postal_code,recruitment_status
0,NCT05554367,Palbociclib and Binimetinib in RAS-Mutant Canc...,Active,IL,Crystal Lake,False,United States,AMG Crystal Lake - Oncology,60014,ACTIVE
1,NCT05554367,Palbociclib and Binimetinib in RAS-Mutant Canc...,Active,IL,Libertyville,False,United States,AMG Libertyville - Oncology,60048,ACTIVE
2,NCT05554367,Palbociclib and Binimetinib in RAS-Mutant Canc...,Active,IL,Libertyville,False,United States,Condell Memorial Hospital,60048,ACTIVE
3,NCT05554367,Palbociclib and Binimetinib in RAS-Mutant Canc...,Active,IL,Ottawa,False,United States,Illinois CancerCare-Ottawa Clinic,61350,ACTIVE
4,NCT05554367,Palbociclib and Binimetinib in RAS-Mutant Canc...,Active,IL,Canton,False,United States,Illinois CancerCare-Canton,61520,ACTIVE
...,...,...,...,...,...,...,...,...,...,...
3171,NCT00923065,"Data Collection, Clinical Care and Interventio...",Enrolling by Invitation,MD,Bethesda,False,United States,National Institutes of Health Clinical Center,20892,ENROLLING_BY_INVITATION
3172,NCT00923026,Follow Up Protocol for Subjects Previously Enr...,Enrolling by Invitation,MD,Bethesda,False,United States,National Institutes of Health Clinical Center,20892,ENROLLING_BY_INVITATION
3173,NCT00068003,Harvesting Cells for Experimental Cancer Treat...,Enrolling by Invitation,MD,Bethesda,False,United States,National Institutes of Health Clinical Center,20892,ENROLLING_BY_INVITATION
3174,NCT05237986,Cognitive Aftereffects of Neurotoxicity in Chi...,Temporarily Closed to Accrual,MD,Bethesda,False,United States,National Institutes of Health Clinical Center,20892,TEMPORARILY_CLOSED_TO_ACCRUAL


In [65]:
df_final.to_csv("nih_onsite_trials_20250218.csv", index=False, encoding="utf_8_sig")