In [1]:
# Import dependencies
import requests
import pandas as pd

In [2]:
# Base url for the api call
url = "https://clinicaltrials.gov/api/query/study_fields"

In [3]:
# Headers for json call
headers = {'Content-Type': 'application/json', 
           'Accept': 'application/json'}

In [4]:
# List of fields
fields = ['OrgStudyId',
          'BriefTitle',
          'StudyType', 
          'StartDate',
          'CompletionDate',
          'OverallStatus',  
          'ResponsiblePartyType', 
          'Gender', 
          'GenderBased', 
          'MinimumAge', 
          'MaximumAge']

In [5]:
# Create a list to hold the dictionaries from the json response
sourced_data = []

In [6]:
# Set a counter to increase the ranks with each iteration
counter = 0

while True:
    # Define the parameters of the url
    params = {'expr': 'breast cancer AND SEARCH[Location](AREA[LocationState] California)', 
              'fields': ','.join(fields), 
              'min_rnk': 1 + 1000 * counter,  
              'max_rnk': 1000 + 1000 * counter, 
              'fmt': 'json'}

    # Make the call 
    response = requests.get(url, 
                            headers = headers, 
                            params = params)


    

    # Increment the counter
    counter += 1
    
    # Check to see if more data was returned for the current response
    if response.json()['StudyFieldsResponse']['NStudiesReturned'] == 0:
        # If there was no new data returned, break out of the while loop
        break

    # Add the data from the api call to the sourced_data list
    sourced_data += response.json()['StudyFieldsResponse']['StudyFields']
    

In [9]:
print(sourced_data)

[{'Rank': 1, 'OrgStudyId': ['CCT1043'], 'BriefTitle': ['Breast Cancer Trials Education Program'], 'StudyType': ['Interventional'], 'StartDate': ['May 2007'], 'CompletionDate': ['December 2015'], 'OverallStatus': ['Completed'], 'ResponsiblePartyType': ['Principal Investigator'], 'Gender': ['Female'], 'GenderBased': [], 'MinimumAge': ['21 Years'], 'MaximumAge': []}, {'Rank': 2, 'OrgStudyId': ['150B-0158'], 'BriefTitle': ['Breast Cancer Risk Reduction: A Patient Doctor Intervention'], 'StudyType': ['Interventional'], 'StartDate': ['June 2011'], 'CompletionDate': ['December 2012'], 'OverallStatus': ['Completed'], 'ResponsiblePartyType': ['Sponsor'], 'Gender': ['All'], 'GenderBased': [], 'MinimumAge': ['40 Years'], 'MaximumAge': ['74 Years']}, {'Rank': 3, 'OrgStudyId': ['CHUM-CRYOABLATION'], 'BriefTitle': ['Anti-Tumor Immunity Induced by Cryoablation of Invasive Breast Cancers'], 'StudyType': ['Interventional'], 'StartDate': ['April 2013'], 'CompletionDate': ['October 2014'], 'OverallStatus

In [48]:
values_list = df.values.tolist()
values_list

[[['CCT1043'],
  ['Breast Cancer Trials Education Program'],
  ['Interventional'],
  ['May 2007'],
  ['December 2015'],
  ['Completed'],
  ['Principal Investigator'],
  ['Female'],
  [],
  ['21 Years'],
  []],
 [['150B-0158'],
  ['Breast Cancer Risk Reduction: A Patient Doctor Intervention'],
  ['Interventional'],
  ['June 2011'],
  ['December 2012'],
  ['Completed'],
  ['Sponsor'],
  ['All'],
  [],
  ['40 Years'],
  ['74 Years']],
 [['CHUM-CRYOABLATION'],
  ['Anti-Tumor Immunity Induced by Cryoablation of Invasive Breast Cancers'],
  ['Interventional'],
  ['April 2013'],
  ['October 2014'],
  ['Terminated'],
  ['Sponsor'],
  ['All'],
  [],
  ['18 Years'],
  []],
 [['20107812'],
  ['Monitoring and Predicting Breast Cancer Neoadjuvant Chemotherapy Response Using Diffuse Optical Spectroscopic Imaging'],
  ['Observational'],
  ['December 2010'],
  ['October 2013'],
  ['Withdrawn'],
  ['Principal Investigator'],
  ['Female'],
  [],
  ['18 Years'],
  []],
 [['234870'],
  ['Blood Glycan Biom

In [44]:
for new_list in values_list:
    print(new_list,end='')

[['CCT1043'], ['Breast Cancer Trials Education Program'], ['Interventional'], ['May 2007'], ['December 2015'], ['Completed'], ['Principal Investigator'], ['Female'], [], ['21 Years'], []][['150B-0158'], ['Breast Cancer Risk Reduction: A Patient Doctor Intervention'], ['Interventional'], ['June 2011'], ['December 2012'], ['Completed'], ['Sponsor'], ['All'], [], ['40 Years'], ['74 Years']][['CHUM-CRYOABLATION'], ['Anti-Tumor Immunity Induced by Cryoablation of Invasive Breast Cancers'], ['Interventional'], ['April 2013'], ['October 2014'], ['Terminated'], ['Sponsor'], ['All'], [], ['18 Years'], []][['20107812'], ['Monitoring and Predicting Breast Cancer Neoadjuvant Chemotherapy Response Using Diffuse Optical Spectroscopic Imaging'], ['Observational'], ['December 2010'], ['October 2013'], ['Withdrawn'], ['Principal Investigator'], ['Female'], [], ['18 Years'], []][['234870'], ['Blood Glycan Biomarkers in Women With Stage IV Breast Cancer'], ['Observational'], ['September 2006'], ['January

[['C3661001'], ['A Study of PF-06873600 in People With Cancer'], ['Interventional'], ['March 7, 2018'], ['November 28, 2024'], ['Active, not recruiting'], ['Sponsor'], ['All'], [], ['18 Years'], []][['MCC-14662'], ['Phase II Trial of SAHA & Tamoxifen for Patients With Breast Cancer'], ['Interventional'], ['February 2006'], ['August 2012'], ['Completed'], ['Sponsor'], ['All'], [], ['18 Years'], []][['VICC BRE 1287'], ['GDC-0941 and Cisplatin in Treating Patients With Androgen Receptor-Negative Triple Negative Metastatic Breast Cancer'], ['Interventional'], ['September 2013'], ['April 2015'], ['Terminated'], ['Principal Investigator'], ['All'], [], ['18 Years'], []][['201610'], ['Predicting Chronic Pain Following Breast Surgery'], ['Observational'], ['July 19, 2021'], ['December 31, 2023'], ['Recruiting'], ['Principal Investigator'], ['All'], [], ['18 Years'], []][['MC1831'], ['Estradiol in Treating Patients With ER Beta Positive, Triple Negative Locally Advanced or Metastatic Breast Can

In [49]:
import json

json.dumps(values_list).strip('[]')

'"CCT1043"], ["Breast Cancer Trials Education Program"], ["Interventional"], ["May 2007"], ["December 2015"], ["Completed"], ["Principal Investigator"], ["Female"], [], ["21 Years"], []], [["150B-0158"], ["Breast Cancer Risk Reduction: A Patient Doctor Intervention"], ["Interventional"], ["June 2011"], ["December 2012"], ["Completed"], ["Sponsor"], ["All"], [], ["40 Years"], ["74 Years"]], [["CHUM-CRYOABLATION"], ["Anti-Tumor Immunity Induced by Cryoablation of Invasive Breast Cancers"], ["Interventional"], ["April 2013"], ["October 2014"], ["Terminated"], ["Sponsor"], ["All"], [], ["18 Years"], []], [["20107812"], ["Monitoring and Predicting Breast Cancer Neoadjuvant Chemotherapy Response Using Diffuse Optical Spectroscopic Imaging"], ["Observational"], ["December 2010"], ["October 2013"], ["Withdrawn"], ["Principal Investigator"], ["Female"], [], ["18 Years"], []], [["234870"], ["Blood Glycan Biomarkers in Women With Stage IV Breast Cancer"], ["Observational"], ["September 2006"], ["

In [7]:
# Create a dataframe from the dictionaries
df = pd.DataFrame(sourced_data)

In [8]:
# Set the rank as the index
df.set_index('Rank', inplace = True)
df

Unnamed: 0_level_0,OrgStudyId,BriefTitle,StudyType,StartDate,CompletionDate,OverallStatus,ResponsiblePartyType,Gender,GenderBased,MinimumAge,MaximumAge
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,[CCT1043],[Breast Cancer Trials Education Program],[Interventional],[May 2007],[December 2015],[Completed],[Principal Investigator],[Female],[],[21 Years],[]
2,[150B-0158],[Breast Cancer Risk Reduction: A Patient Docto...,[Interventional],[June 2011],[December 2012],[Completed],[Sponsor],[All],[],[40 Years],[74 Years]
3,[CHUM-CRYOABLATION],[Anti-Tumor Immunity Induced by Cryoablation o...,[Interventional],[April 2013],[October 2014],[Terminated],[Sponsor],[All],[],[18 Years],[]
4,[20107812],[Monitoring and Predicting Breast Cancer Neoad...,[Observational],[December 2010],[October 2013],[Withdrawn],[Principal Investigator],[Female],[],[18 Years],[]
5,[234870],[Blood Glycan Biomarkers in Women With Stage I...,[Observational],[September 2006],"[January 12, 2016]",[Completed],[Sponsor],[Female],[],[18 Years],[]
...,...,...,...,...,...,...,...,...,...,...,...
1681,[S1614],[Prophylactic Antiviral Therapy in Patients Wi...,[Interventional],"[February 21, 2019]","[February 28, 2027]","[Active, not recruiting]",[Sponsor],[All],[],[18 Years],[]
1682,[NSABP MPR-1],[NSABP Patient Registry and Biospecimen Profil...,[Observational],[February 2013],[March 2022],[Completed],[Sponsor],[All],[],[18 Years],[]
1683,[NCI-2012-00107],[Study of Positron Emission Tomography and Com...,[Interventional],"[February 22, 2012]","[August 1, 2021]",[Unknown status],[Sponsor],[All],[],[18 Years],[]
1684,[302],[177Lu-DTPA-Omburtamab Radioimmunotherapy for ...,[Interventional],"[August 31, 2022]","[December 31, 2024]",[Withdrawn],[Sponsor],[All],[],[18 Years],[]


In [None]:
df.info()

In [22]:
df['OrgStudyId'] = df['OrgStudyId'].values.tolist()
print(df['OrgStudyId'])

Rank
1                 [CCT1043]
2               [150B-0158]
3       [CHUM-CRYOABLATION]
4                [20107812]
5                  [234870]
               ...         
1681                [S1614]
1682          [NSABP MPR-1]
1683       [NCI-2012-00107]
1684                  [302]
1685          [D967MC00001]
Name: OrgStudyId, Length: 1685, dtype: object


In [32]:
OrgStudyId = str(df['OrgStudyId']).replace('[', '').replace(']','')
print(OrgStudyId)

Rank
1                 CCT1043
2               150B-0158
3       CHUM-CRYOABLATION
4                20107812
5                  234870
               ...         
1681                S1614
1682          NSABP MPR-1
1683       NCI-2012-00107
1684                  302
1685          D967MC00001
Name: OrgStudyId, Length: 1685, dtype: object


In [24]:
df['BriefTitle'] = df['BriefTitle'].values.tolist()
print(df['BriefTitle'])

Rank
1                [Breast Cancer Trials Education Program]
2       [Breast Cancer Risk Reduction: A Patient Docto...
3       [Anti-Tumor Immunity Induced by Cryoablation o...
4       [Monitoring and Predicting Breast Cancer Neoad...
5       [Blood Glycan Biomarkers in Women With Stage I...
                              ...                        
1681    [Prophylactic Antiviral Therapy in Patients Wi...
1682    [NSABP Patient Registry and Biospecimen Profil...
1683    [Study of Positron Emission Tomography and Com...
1684    [177Lu-DTPA-Omburtamab Radioimmunotherapy for ...
1685    [A Study of T-DXd for the Treatment of Solid T...
Name: BriefTitle, Length: 1685, dtype: object


In [31]:
BriefTitle = str(df['BriefTitle']).replace('[', '').replace(']','')
print(BriefTitle)

Rank
1                Breast Cancer Trials Education Program
2       Breast Cancer Risk Reduction: A Patient Docto...
3       Anti-Tumor Immunity Induced by Cryoablation o...
4       Monitoring and Predicting Breast Cancer Neoad...
5       Blood Glycan Biomarkers in Women With Stage I...
                              ...                        
1681    Prophylactic Antiviral Therapy in Patients Wi...
1682    NSABP Patient Registry and Biospecimen Profil...
1683    Study of Positron Emission Tomography and Com...
1684    177Lu-DTPA-Omburtamab Radioimmunotherapy for ...
1685    A Study of T-DXd for the Treatment of Solid T...
Name: BriefTitle, Length: 1685, dtype: object


In [50]:
df['StudyType'] = x[0] 
for x in df['StudyType'] 
    if len(x)<2 
    else 

SyntaxError: invalid syntax (1921817964.py, line 2)