In [46]:
import requests
import pandas as pd

# Function to fetch clinical trials based on user input
def fetch_clinical_trials():
    # Prompt user for the keyword
    keyword = input("Enter a keyword to search for clinical trials: ")

    # Define base URL and query parameters
    base_url = "https://clinicaltrials.gov/api/v2/studies"
    params = {
        "query.titles": keyword,  # Use the keyword for the search
        "pageSize": 100  # Fetch up to 100 records at a time
    }

    # Initialize an empty list to store the results
    results = []

    # Loop to handle pagination
    while True:
        print(f"Fetching data for keyword: {keyword}")  # Debugging output

        # Send a GET request to the API
        response = requests.get(base_url, params=params)

        # Check if the request was successful
        if response.status_code == 200:
            # Parse the JSON response
            data = response.json()
            studies = data.get('studies', [])

            # Extract relevant fields from each study
            for study in studies:
                nct_id = study['protocolSection']['identificationModule'].get('nctId', 'Unknown')
                title = study['protocolSection']['identificationModule'].get('officialTitle', 'Unknown')
                outcomes = study['protocolSection']['outcomesModule'].get('primaryOutcomeMeasures', [])

                # Extract outcomes and measures safely
                outcome_measures = ', '.join([outcome.get('description', 'No outcome description') for outcome in outcomes]) if outcomes else "No outcomes listed"

                # Append to results
                results.append({
                    "Trial Identifier (NCTId)": nct_id,
                    "Study Name": title,
                    "Overall Status": overallStatus,
                    "Start Date": startDate,
                    "Conditions": conditions,
                    "Interventions": interventions,
                    "Locations": locations,
                    "Primary Completion Date": primaryCompletionDate,
                    "Study First Post Date": studyFirstPostDate,
                    "Last Update Post Date": lastUpdatePostDate,
                    "Study Type": studyType,
                    "Phases": phases
                })

            # Check if there is a nextPageToken
            next_page_token = data.get('nextPageToken')
            if next_page_token:
                params['pageToken'] = next_page_token  # Update for the next page
            else:
                break  # Exit the loop if no more pages are available
        else:
            print("Failed to fetch data. Status code:", response.status_code)
            break

    # Convert the results to a DataFrame for tabulation
    df = pd.DataFrame(results)

    # Display the DataFrame
    print("\nClinical Trials Data:")
    print(df)

    # Save the data to a CSV file
    df.to_csv("clinical_trials_results.csv", index=False)
    print("\nData saved to 'clinical_trials_results.csv'.")

# Run the function
if __name__ == "__main__":
    fetch_clinical_trials()


SyntaxError: invalid syntax (1693416230.py, line 40)

In [None]:
# Initial URL for the first API call
base_url = "https://clinicaltrials.gov/api/v2/studies"
params = {
    "query.titles": "Diabetes",
    "pageSize": 100
}

# Initialize an empty list to store the data
data_list = []

# Loop until there is no nextPageToken
while True:
    # Print the current URL (for debugging purposes)
    print("Fetching data from:", base_url + '?' + '&'.join([f"{k}={v}" for k, v in params.items()]))
    
    # Send a GET request to the API
    response = requests.get(base_url, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()  # Parse JSON response
        studies = data.get('studies', [])  # Extract the list of studies

        # Loop through each study and extract specific information
        for study in studies:
            # Safely access nested keys
            nctId = study['protocolSection']['identificationModule'].get('nctId', 'Unknown')
            overallStatus = study['protocolSection']['statusModule'].get('overallStatus', 'Unknown')
            startDate = study['protocolSection']['statusModule'].get('startDateStruct', {}).get('date', 'Unknown Date')
            conditions = ', '.join(study['protocolSection']['conditionsModule'].get('conditions', ['No conditions listed']))
            acronym = study['protocolSection']['identificationModule'].get('acronym', 'Unknown')

            # Extract interventions safely
            interventions_list = study['protocolSection'].get('armsInterventionsModule', {}).get('interventions', [])
            interventions = ', '.join([intervention.get('name', 'No intervention name listed') for intervention in interventions_list]) if interventions_list else "No interventions listed"
            
            # Extract locations safely
            locations_list = study['protocolSection'].get('contactsLocationsModule', {}).get('locations', [])
            locations = ', '.join([f"{location.get('city', 'No City')} - {location.get('country', 'No Country')}" for location in locations_list]) if locations_list else "No locations listed"
            
            # Extract dates and phases
            primaryCompletionDate = study['protocolSection']['statusModule'].get('primaryCompletionDateStruct', {}).get('date', 'Unknown Date')
            studyFirstPostDate = study['protocolSection']['statusModule'].get('studyFirstPostDateStruct', {}).get('date', 'Unknown Date')
            lastUpdatePostDate = study['protocolSection']['statusModule'].get('lastUpdatePostDateStruct', {}).get('date', 'Unknown Date')
            studyType = study['protocolSection']['designModule'].get('studyType', 'Unknown')
            phases = ', '.join(study['protocolSection']['designModule'].get('phases', ['Not Available']))

            # Append the data to the list as a dictionary
            data_list.append({
                "NCT ID": nctId,
                "Acronym": acronym,
                "Overall Status": overallStatus,
                "Start Date": startDate,
                "Conditions": conditions,
                "Interventions": interventions,
                "Locations": locations,
                "Primary Completion Date": primaryCompletionDate,
                "Study First Post Date": studyFirstPostDate,
                "Last Update Post Date": lastUpdatePostDate,
                "Study Type": studyType,
                "Phases": phases
            })

        # Check for nextPageToken and update the params or break the loop
        nextPageToken = data.get('nextPageToken')
        if nextPageToken:
            params['pageToken'] = nextPageToken  # Set the pageToken for the next request
        else:
            break  # Exit the loop if no nextPageToken is present
    else:
        print("Failed to fetch data. Status code:", response.status_code)
        break

# Create a DataFrame from the list of dictionaries
df = pd.DataFrame(data_list)

# Print the DataFrame
print(df)

# Optionally, save the DataFrame to a CSV file
df.to_csv("clinical_trials_data_complete.csv", index=False)

Fetching data from: https://clinicaltrials.gov/api/v2/studies?query.titles=Diabetes&pageSize=100
Fetching data from: https://clinicaltrials.gov/api/v2/studies?query.titles=Diabetes&pageSize=100&pageToken=NF0g5JGPkvgu
Fetching data from: https://clinicaltrials.gov/api/v2/studies?query.titles=Diabetes&pageSize=100&pageToken=NF0g5JaPlfkv
Fetching data from: https://clinicaltrials.gov/api/v2/studies?query.titles=Diabetes&pageSize=100&pageToken=NF0g5JqHlPcp
Fetching data from: https://clinicaltrials.gov/api/v2/studies?query.titles=Diabetes&pageSize=100&pageToken=NF0g5JKFk_UoyA
Fetching data from: https://clinicaltrials.gov/api/v2/studies?query.titles=Diabetes&pageSize=100&pageToken=NF0g5JKDm_QhxA
Fetching data from: https://clinicaltrials.gov/api/v2/studies?query.titles=Diabetes&pageSize=100&pageToken=NF0g5JKPkPAowg
Fetching data from: https://clinicaltrials.gov/api/v2/studies?query.titles=Diabetes&pageSize=100&pageToken=NF0g5JGHm_IsyQ
Fetching data from: https://clinicaltrials.gov/api/v2/s

Fetching data from: https://clinicaltrials.gov/api/v2/studies?query.titles=Diabetes&pageSize=100&pageToken=NF0g5JGHk_UpxQM
Fetching data from: https://clinicaltrials.gov/api/v2/studies?query.titles=Diabetes&pageSize=100&pageToken=NF0g5JGHlvApyA4
Fetching data from: https://clinicaltrials.gov/api/v2/studies?query.titles=Diabetes&pageSize=100&pageToken=NF0g5JGHlfEhwQM
Fetching data from: https://clinicaltrials.gov/api/v2/studies?query.titles=Diabetes&pageSize=100&pageToken=NF0g5JGGkvkrxAc
Fetching data from: https://clinicaltrials.gov/api/v2/studies?query.titles=Diabetes&pageSize=100&pageToken=NF0g5JGGkfchxgU
Fetching data from: https://clinicaltrials.gov/api/v2/studies?query.titles=Diabetes&pageSize=100&pageToken=NF0g5JGGlPYhxwA
Fetching data from: https://clinicaltrials.gov/api/v2/studies?query.titles=Diabetes&pageSize=100&pageToken=NF0g5JGGm_gtwQM
Fetching data from: https://clinicaltrials.gov/api/v2/studies?query.titles=Diabetes&pageSize=100&pageToken=NF0g5JGFkPIswQ4
Fetching data fr