In [4]:
import warnings
import pandas as pd
import requests



In [5]:
# Initial URL for the first API call
base_url = "https://clinicaltrials.gov/api/v2/studies"

# Initialize an empty list to store the data
data_list = []
params = {
}

In [6]:
# Loop until there is no nextPageToken
while True:
    # Print the current URL (for debugging purposes)
    print("Fetching data from:", base_url + '?' + '&'.join([f"{k}={v}" for k, v in params.items()]))

    # Send a GET request to the API
    response = requests.get(base_url, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()  # Parse JSON response
        studies = data.get('studies', [])  # Extract the list of studies

        # Loop through each study and extract specific information
        for study in studies:
            # Safely access nested keys
                responsibleParty = study['protocolSection']['sponsorCollaboratorsModule'].get('responsibleParty', {}).get('type', 'Unknown')
                orgFullName = study['protocolSection']['identificationModule'].get('organization', {}).get('fullName', 'Unknown')
                orgClass = study['protocolSection']['identificationModule'].get('organization',{}).get('class', 'Unknown')
                overallStatus = study['protocolSection']['statusModule'].get('overallStatus', 'Unknown')
            
                startDate = study['protocolSection']['statusModule'].get('startDateStruct', {}).get('date', 'Unknown')
                if 'conditionsModule' in study['protocolSection']:
                  conditions = ', '.join(study['protocolSection']['conditionsModule'].get('conditions', ['Unknown']))
                else:
                  conditions = 'No conditions listed'
                briefTitle = study['protocolSection']['identificationModule'].get('briefTitle', 'Unknown')
                fullTitle = study['protocolSection']['identificationModule'].get('officialTitle', 'Unknown')
            # Extract interventions safely
                interventions_list = study['protocolSection'].get('armsInterventionsModule', {}).get('interventions', [])
                interventions = ', '.join([intervention.get('name', 'Unknown') for intervention in interventions_list]) if interventions_list else "Unknown"
                interventionDesc_list = study['protocolSection'].get('armsInterventionsModule', {}).get('interventions',[])
                interventionDesc = ', '.join([interventionDesc.get('description', 'Unknown') for interventionDesc in interventionDesc_list]) if interventionDesc_list else "Unknown"

            # Extract dates and phases
                if 'designModule' in study['protocolSection']:
                  studyType = study['protocolSection']['designModule'].get('studyType', 'Unknown')
                else:
                  studyType = 'Unknown'
                if 'designModule' in study['protocolSection']:
                  phases = ', '.join(study['protocolSection']['designModule'].get('phases', ['Unknown']))
                else:
                  phases = 'No phases listed'
                outcome_list = study.get('protocolSection', {}).get('outcomesModule', {}).get('primaryOutcomes', [])
                if outcome_list:
                  outcomes = ' '.join([outcome.get('measure', 'Unknown') for outcome in outcome_list])
                else:
                  outcomes = "Unknown"
                standardAge_list = study['protocolSection'].get('eligibilityModule', {}).get('stdAges',[])
                standardAge = ' '.join(standardAge_list)
                if 'designModule' in study['protocolSection']:
                  if 'designInfo' in study['protocolSection']['designModule']:
                    primaryPurpose = study['protocolSection']['designModule']['designInfo'].get('primaryPurpose', 'Unknown')
                  else:
                    primaryPurpose = 'Unknown'
                else:
                    primaryPurpose = 'Unknown'
            # Append the data to the list as a dictionary
                data_list.append({
                    "Organization Full Name": orgFullName,
                    "Organization Class": orgClass,
                    "Responsible Party": responsibleParty,
                    "Brief Title": briefTitle,
                    "Full Title": fullTitle,
                    "Overall Status": overallStatus,
                    "Start Date": startDate,
                    "Standard Age": standardAge,
                    "Conditions": conditions,
                    "Primary Purpose": primaryPurpose,
                    "Interventions": interventions,
                    "Intervention Description": interventionDesc,
                    "Study Type": studyType,
                    "Phases": phases,
                    "Outcome Measure": outcomes,
                })

        # Check for nextPageToken and update the params or break the loop
        nextPageToken = data.get('nextPageToken')
        if nextPageToken:
            params['pageToken'] = nextPageToken  # Set the pageToken for the next request
        else:
            break  # Exit the loop if no nextPageToken is present
    else:
        print("Failed to fetch data. Status code:", response.status_code)
        break


Fetching data from: https://clinicaltrials.gov/api/v2/studies?
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV1565qGkvYg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV1-7ZKFlvYg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV177ZqDlfg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV146JWClvYg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV1_55GEk_Yg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV1_55WDlfYg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV167ZGBmvYg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV146JWOkfYg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV1_65WClPYg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV1975ODlfYg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV1-65aOlvYg
Fet

Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV1955OGlPYg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV167JeAl_Yg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV1955aElPYg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV1-75OPkfYg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV156ZqGkPYg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV196pSCkPYg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV196peBlPYg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV1_7ZaPm_Yg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV196JaOmvYg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV197JOBk_Yg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pageToken=KV1_65eHm_Yg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?pag

KeyboardInterrupt: 

In [None]:
clin_trials = pd.DataFrame(data_list)
clin_trials.to_csv('clin_trials.csv')
clin_trials.head(1)