## Part 1 - Criteria generation setup sandbox

In [44]:
import openai
import getpass

api_key = getpass.getpass("api key:")
openai.api_key = api_key
print('redacted')


api key:········
redacted


In [54]:
# openai.api_key = api_key
client = OpenAI(
    api_key = api_key,
)


completion = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user",
            "content": "Write a haiku about recursion in programming."
        }
    ]
)

print(completion.choices[0].message)

ChatCompletionMessage(content='Functions call themselves,  \nLayers of depth intertwine,  \nTruth in loops of code.', refusal=None, role='assistant', function_call=None, tool_calls=None)


In [59]:
def get_completion(prompt, model="gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt}]
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message.content

In [61]:
prompt = 'find me clinical trials for patients of age > 18 yo and with heart diease'

response = get_completion(prompt, 'gpt-4o-mini')
print(response)


To find clinical trials for patients over 18 years old with heart disease, you can use several resources:

1. **ClinicalTrials.gov**: This is a database of privately and publicly funded clinical studies conducted around the world. You can search for trials by entering keywords such as "heart disease" and applying filters for age and other criteria.

   - Go to [ClinicalTrials.gov](https://clinicaltrials.gov/)
   - Use the search bar to enter "heart disease."
   - Use the filters to select age (e.g., "18 years and older").

2. **World Health Organization (WHO) International Clinical Trials Registry Platform**: This platform provides access to clinical trial information from various countries.

   - Visit the [WHO ICTRP](https://www.who.int/clinical-trials-registry-platform)
   - Search for "heart disease" and apply relevant filters.

3. **European Union Clinical Trials Register**: If you are in Europe, this site allows you to search for protocol information on interventional clinical tr

In [None]:
# !pip install openai==0.27.6

In [48]:
!openai -V


openai 0.27.6


## Part 3 - dataset generation sandbox - Test set: get some example clinical trials focusing on FHA 

In [70]:
import requests

# https://clinicaltrials.gov/data-api/api
API_SERVER = "https://clinicaltrials.gov/api/v2"

In [87]:
# test connection

test_conn_endpoint = f"{API_SERVER}/version"


resp_v = requests.get(test_conn_endpoint).json()
resp_v

{'apiVersion': '2.0.3', 'dataTimestamp': '2024-08-26T11:12:53'}

In [89]:
# test get by nctid NCT05410886

nctId = 'NCT05410886'
get_by_id_endpoint = f"{API_SERVER}/studies/{nctId}"


resp_id= requests.get(get_by_id_endpoint).json()
resp_id

{'protocolSection': {'identificationModule': {'nctId': 'NCT05410886',
   'orgStudyIdInfo': {'id': '20HH6115'},
   'organization': {'fullName': 'Imperial College London', 'class': 'OTHER'},
   'briefTitle': 'Screening Women for Functional Hypothalamic Amenorrhea (FHA)',
   'officialTitle': 'A Questionnaire-based Study to Improve the Diagnosis of Functional Hypothalamic Amenorrhea (FHA) in Women With Secondary Amenorrhea Attending Hospital'},
  'statusModule': {'statusVerifiedDate': '2024-04',
   'overallStatus': 'COMPLETED',
   'expandedAccessInfo': {'hasExpandedAccess': False},
   'startDateStruct': {'date': '2021-02-11', 'type': 'ACTUAL'},
   'primaryCompletionDateStruct': {'date': '2024-02-02', 'type': 'ACTUAL'},
   'completionDateStruct': {'date': '2024-02-02', 'type': 'ACTUAL'},
   'studyFirstSubmitDate': '2022-06-06',
   'studyFirstSubmitQcDate': '2022-06-06',
   'studyFirstPostDateStruct': {'date': '2022-06-08', 'type': 'ACTUAL'},
   'lastUpdateSubmitDate': '2024-04-26',
   'last

In [83]:
# test get by search condition FHA
cond = 'FHA'

# curl -X GET "https://clinicaltrials.gov/api/v2/studies?query.cond=FHA" -H "accept: application/json" 
get_by_cond_endpoint = f"{API_SERVER}/studies?query.cond={cond}"

resp_cond = requests.get(get_by_cond_endpoint).json()
resp_cond

{'studies': [{'protocolSection': {'identificationModule': {'nctId': 'NCT06533865',
     'orgStudyIdInfo': {'id': '2024P000273'},
     'organization': {'fullName': 'Massachusetts General Hospital',
      'class': 'OTHER'},
     'briefTitle': 'Romosozumab as an Adjunct to Physiologic Estrogen Replacement in Functional Hypothalamic Amenorrhea',
     'officialTitle': 'Romosozumab as an Adjunct to Physiologic Estrogen Replacement in Adolescents and Young Adults With Functional Hypothalamic Amenorrhea'},
    'statusModule': {'statusVerifiedDate': '2024-08',
     'overallStatus': 'NOT_YET_RECRUITING',
     'expandedAccessInfo': {'hasExpandedAccess': False},
     'startDateStruct': {'date': '2024-10-01', 'type': 'ESTIMATED'},
     'primaryCompletionDateStruct': {'date': '2028-12-01',
      'type': 'ESTIMATED'},
     'completionDateStruct': {'date': '2029-06-01', 'type': 'ESTIMATED'},
     'studyFirstSubmitDate': '2024-07-30',
     'studyFirstSubmitQcDate': '2024-07-30',
     'studyFirstPostDat

In [86]:
len(resp['studies'])

5

In [None]:
    params = {
        "query.cond": "FHA",
        "pageSize": 20,
#        "query.titles":
    }

## Part 3 - API payload schema parsing sandbox - parse those criteria into some structured format

In [None]:
# parse those criteria into some structured format: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10797270/ 

# OMOP format: https://www.ohdsi.org/data-standardization/ 

In [None]:
# https://github.com/OHDSI/