In [1]:
# Activate google drive to access data (default setup for Google Colab)
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [14]:
from google.colab import userdata
api_key = userdata.get('OPENAI_API_KEY_mT') # for openai calling
import sys
sys.path.append('/content/drive/MyDrive/Projects/case_study_mT/') # Add data folder to path; adapt this for your own use
sys.path.append('/content/drive/MyDrive/Projects/case_study_mT/src/')
import json
import pandas as pd
from openai import OpenAI

import importlib
import trialsearch_utils
importlib.reload(trialsearch_utils)
from trialsearch_utils import get_eligibility_text, load_json, patient_to_text, trial_to_text, build_prompt, call_llm

In [20]:
# Load the dataframe files
df_trials = pd.read_pickle("/content/drive/MyDrive/Projects/case_study_mT/data/df_rec_phases.pkl")
df_trials.head()

Unnamed: 0,utn,phase,title,gender,language,studies_id,study_type,maximum_age,minimum_age,interventions,...,detailed_description,countries_and_sites,publications,eligibility,additional_info,references,location,ctgov_location,duplicate_info,mesh_terms
2,NCT02935257,[Phase 1],Immunotherapy for High Risk/Relapsed CD19+ Acu...,All,,,,,16 Years,"[{'type': 'BIOLOGICAL', 'name': 'CD19CAT-41BBZ...",...,"This is a multi-centre, non-randomised, open l...","[{'country_start': None, 'country_end': None, ...",[],{'inclusion': 'Inclusion Criteria: 1. Age ≥16...,"{'sponsors': [{'name': 'University College, Lo...",[{'url': 'https://clinicaltrials.gov/study/NCT...,[{'facility': 'University College London Hospi...,[{'facility': 'University College London Hospi...,"{'nctId': 'NCT02935257', 'orgStudyIdInfo': {'i...","[{'id': 'D000008223', 'term': 'Lymphoma'}, {'i..."
6,NCT04850755,[Phase 1],A Phase I Dose Escalation Study of Selinexor P...,All,,,,99 Years,21 Years,"[{'type': 'DRUG', 'name': 'Selinexor in combin...",...,Primary Objectives\n\n* To evaluate the safety...,"[{'country_start': None, 'country_end': None, ...",[],{'inclusion': 'Inclusion Criteria: 1. Age ≥ 2...,{'sponsors': [{'name': 'National University Ho...,[{'url': 'https://clinicaltrials.gov/study/NCT...,"[{'facility': 'National University Hospital, S...","[{'facility': 'National University Hospital, S...","{'nctId': 'NCT04850755', 'orgStudyIdInfo': {'i...","[{'id': 'D000009369', 'term': 'Neoplasms'}]"
7,NCT05680233,[Phase 1],A Phase 1a/1b Single Ascending and Multiple Do...,All,,,,70 Years,18 Years,"[{'type': 'DRUG', 'name': 'OA-235i (4 mg)', 'd...",...,The purpose of this study is to assess the saf...,"[{'country_start': None, 'country_end': None, ...",[],{'inclusion': 'Main Inclusion Criteria: 1. Ma...,"{'sponsors': [{'name': 'Oasis Pharmaceuticals,...",[{'url': 'https://clinicaltrials.gov/study/NCT...,"[{'facility': 'Mayo Clinic', 'status': 'RECRUI...","[{'facility': 'Mayo Clinic', 'status': 'RECRUI...","{'nctId': 'NCT05680233', 'orgStudyIdInfo': {'i...","[{'id': 'D000005234', 'term': 'Fatty Liver'}, ..."
23,NCT05526924,[Phase 1],A Phase I Trial of Re-Irradiation With Concurr...,All,,,,,18 Years,"[{'type': 'RADIATION', 'name': 'Chemoradiation...",...,The purpose of this study is to evaluate the s...,"[{'country_start': None, 'country_end': None, ...",[],{'inclusion': 'Inclusion Criteria:  - Clinica...,{'sponsors': [{'name': 'University of Chicago'...,[{'url': 'https://clinicaltrials.gov/study/NCT...,"[{'facility': 'The University of Chicago', 'st...","[{'facility': 'The University of Chicago', 'st...","{'nctId': 'NCT05526924', 'orgStudyIdInfo': {'i...","[{'id': 'D000002277', 'term': 'Carcinoma'}, {'..."
28,NCT03467373,[Phase 1],A Phase Ib Study Evaluating Glofitamab (RO7082...,All,,,,,18 Years,"[{'type': 'DRUG', 'name': 'Glofitamab', 'descr...",...,,"[{'country_start': None, 'country_end': None, ...",[],{'inclusion': 'Inclusion Criteria:  - Age >/=...,"{'sponsors': [{'name': 'Hoffmann-La Roche', 't...",[{'url': 'https://clinicaltrials.gov/study/NCT...,[{'facility': 'University of Alabama Medical C...,[{'facility': 'University of Alabama Medical C...,"{'nctId': 'NCT03467373', 'orgStudyIdInfo': {'i...","[{'id': 'D000008223', 'term': 'Lymphoma'}, {'i..."


### Question 1: Within the disease “Duchenne Muscular Dystrophy”, what are the common eligibility criteria?

In [21]:
# Filter trials for DMD using trial indexing
# Read in the indexing record file with NCT-disease pairs
index_file = "/content/drive/MyDrive/Projects/case_study_mT/indexing_records.csv"
df_index = pd.read_csv(index_file)
#print(df_index.head())

# Find all NCT-numbers associated with DMD
dmd_ncts = df_index[df_index['a.alias'] == "Muscular Dystrophy, Duchenne"]['s.id'].tolist()
print(f"Found {len(dmd_ncts)} DMD-related NCT numbers")

Found 26 DMD-related NCT numbers


In [22]:
# Find all relevant trials to DMD in the dataset
relevant_trials = df_trials[df_trials['utn'].isin(dmd_ncts)]
print(f"Found {len(relevant_trials)} DMD-related trials in the dataset")
#relevant_trials.iloc[14]["eligibility"] - by some initial eyeballing, inclusion and exclusion texts are often the same
#print(relevant_trials[['utn', 'phase', 'eligibility']].head()) # check results
#get_eligibility_text(relevant_trials.iloc[6]["eligibility"])

Found 15 DMD-related trials in the dataset


In [23]:
# Add two columns (inclusion and exclusion) to the dataset
#relevant_trials[['inclusion_text', 'exclusion_text']] = relevant_trials['eligibility'].apply(
#    lambda x: pd.Series(get_eligibility_text(x))
#)
eligibility_split = relevant_trials['eligibility'].apply(lambda x: pd.Series(get_eligibility_text(x)))
eligibility_split.columns = ['inclusion_text', 'exclusion_text']

relevant_trials = relevant_trials.join(eligibility_split)
# Then, gather all inclusion and exclusion texts (if any) separately for prompt prep
# Combine all inclusion criteria
all_inclusion = " ".join([text for text in relevant_trials['inclusion_text'] if text])
# Combine all exclusion criteria - there is no exclusion text left as they are all identical to inclusion
#all_exclusion = " ".join([text for text in relevant_trials['exclusion_text'] if text])

In [24]:
relevant_trials

Unnamed: 0,utn,phase,title,gender,language,studies_id,study_type,maximum_age,minimum_age,interventions,...,publications,eligibility,additional_info,references,location,ctgov_location,duplicate_info,mesh_terms,inclusion_text,exclusion_text
157,NCT06114056,[Phase 1],"A Clinical Study Evaluating the Safety, Tolera...",Male,,,,10 Years,5 Years,"[{'type': 'BIOLOGICAL', 'name': 'JWK007 Single...",...,[],{'inclusion': 'Inclusion Criteria: Participan...,"{'sponsors': [{'name': 'West China Hospital', ...",[{'url': 'https://clinicaltrials.gov/study/NCT...,"[{'facility': 'West China Hospital, Sichuan Un...","[{'facility': 'West China Hospital, Sichuan Un...","{'nctId': 'NCT06114056', 'orgStudyIdInfo': {'i...","[{'id': 'D000009136', 'term': 'Muscular Dystro...",Inclusion Criteria: Participants meeting all o...,
12649,NCT06224660,[Phase 1],"A Phase 1b, Open-Label, Controlled Trial Evalu...",Male,,,,,18 Years,"[{'type': 'GENETIC', 'name': 'SRD-001', 'descr...",...,[],{'inclusion': 'Inclusion Criteria:  - Diagnos...,"{'sponsors': [{'name': 'Sardocor Corp.', 'type...",[{'url': 'https://clinicaltrials.gov/study/NCT...,,[],"{'nctId': 'NCT06224660', 'orgStudyIdInfo': {'i...","[{'id': 'D000009136', 'term': 'Muscular Dystro...",Inclusion Criteria: - Diagnosis of DMD with co...,
1062,NCT03340675,[Phase 2],"A Randomized, Double-Blind, Placebo-Controlled...",Male,,,,,7 Years,"[{'type': 'DRUG', 'name': 'Ifetroban', 'descri...",...,[],{'inclusion': 'Inclusion criteria: 1. Males 7...,{'sponsors': [{'name': 'Cumberland Pharmaceuti...,[{'url': 'https://clinicaltrials.gov/study/NCT...,"[{'facility': 'Arkansas Children's Hospital', ...","[{'facility': 'Arkansas Children's Hospital', ...","{'nctId': 'NCT03340675', 'orgStudyIdInfo': {'i...","[{'id': 'D000009136', 'term': 'Muscular Dystro...",Inclusion criteria: 1. Males 7 years of age an...,
3020,NCT05996003,[Phase 2],"A Phase 2 Study to Assess the Efficacy, Safety...",Male,,,,14 Years,4 Years,"[{'type': 'DRUG', 'name': 'NS-089/NCNP-02', 'd...",...,[],{'inclusion': 'Inclusion Criteria:  - Male ≥ ...,"{'sponsors': [{'name': 'NS Pharma, Inc.', 'typ...",[{'url': 'https://clinicaltrials.gov/study/NCT...,"[{'facility': 'Children's Hospital Colorado', ...","[{'facility': 'Children's Hospital Colorado', ...","{'nctId': 'NCT05996003', 'orgStudyIdInfo': {'i...","[{'id': 'D000009136', 'term': 'Muscular Dystro...",Inclusion Criteria: - Male ≥ 4 years and <15 y...,
6704,NCT06013839,[Phase 2],"A Phase 2, Single-Arm, Open-Label, Multi-Cente...",Male,,,,,16 Years,"[{'type': 'DRUG', 'name': 'talfirastide', 'des...",...,[],{'inclusion': 'Inclusion Criteria: 1. Male su...,{'sponsors': [{'name': 'Constant Therapeutics ...,[{'url': 'https://clinicaltrials.gov/study/NCT...,"[{'facility': 'Hadassah Medical Center', 'stat...","[{'facility': 'Hadassah Medical Center', 'stat...","{'nctId': 'NCT06013839', 'orgStudyIdInfo': {'i...","[{'id': 'D000009202', 'term': 'Cardiomyopathie...",Inclusion Criteria: 1. Male subjects 16 years ...,
7409,NCT05938023,[Phase 2],"A Multicentre, Randomised, Double-blind, Place...",Male,,,,17 Years,10 Years,"[{'type': 'DRUG', 'name': 'ATL1102 25mg', 'des...",...,[],{'inclusion': 'Key Inclusion Criteria:  - Has...,{'sponsors': [{'name': 'Percheron Therapeutics...,[{'url': 'https://clinicaltrials.gov/study/NCT...,"[{'facility': 'Royal Childrens Hospital', 'sta...","[{'facility': 'Royal Childrens Hospital', 'sta...","{'nctId': 'NCT05938023', 'orgStudyIdInfo': {'i...","[{'id': 'D000009136', 'term': 'Muscular Dystro...",Key Inclusion Criteria: - Has a clinical diagn...,
7528,NCT05185622,[Phase 2],"A Phase II Open-Label, Multiple Dose Study to ...",Male,,,,17 Years,2 Years,"[{'type': 'DRUG', 'name': 'Vamorolone', 'descr...",...,[],{'inclusion': 'Inclusion Criteria: 1. Subject...,{'sponsors': [{'name': 'Santhera Pharmaceutica...,[{'url': 'https://clinicaltrials.gov/study/NCT...,"[{'facility': 'Alberta's Children Hospital', '...","[{'facility': 'Alberta's Children Hospital', '...","{'nctId': 'NCT05185622', 'orgStudyIdInfo': {'i...","[{'id': 'D000009136', 'term': 'Muscular Dystro...",Inclusion Criteria: 1. Subject's parent(s) or ...,
8322,NCT06128564,[Phase 2],"A Two-Part, Open-Label Systemic Gene Delivery ...",Male,,,,3 Years,,"[{'type': 'GENETIC', 'name': 'delandistrogene ...",...,[],{'inclusion': 'Inclusion Criteria:  - Cohort ...,"{'sponsors': [{'name': 'Hoffmann-La Roche', 't...",[{'url': 'https://clinicaltrials.gov/study/NCT...,"[{'facility': 'Hospital Sant Joan De Deu', 'st...","[{'facility': 'Hospital Sant Joan De Deu', 'st...","{'nctId': 'NCT06128564', 'orgStudyIdInfo': {'i...","[{'id': 'D000009136', 'term': 'Muscular Dystro...",Inclusion Criteria: - Cohort A: >=3 years of a...,
8950,NCT06079736,[Phase 2],"A Phase 2, Open-Label, Multiple Ascending Dose...",Male,,,,,8 Years,"[{'type': 'DRUG', 'name': 'PGN-EDO51', 'descri...",...,[],{'inclusion': 'Inclusion Criteria:  - Males b...,"{'sponsors': [{'name': 'PepGen Inc', 'type': '...",[{'url': 'https://clinicaltrials.gov/study/NCT...,[{'facility': 'Stan Cassidy Centre for Rehabil...,[{'facility': 'Stan Cassidy Centre for Rehabil...,"{'nctId': 'NCT06079736', 'orgStudyIdInfo': {'i...","[{'id': 'D000009136', 'term': 'Muscular Dystro...","Inclusion Criteria: - Males by birth, age at l...",
12910,NCT06100887,[Phase 2],A Phase 2 Study to Evaluate the Effect of EDG-...,Male,,,,14 Years,6 Years,"[{'type': 'DRUG', 'name': 'EDG-5506 Dose 1', '...",...,[],{'inclusion': 'Key Inclusion Criteria:  - Age...,"{'sponsors': [{'name': 'Edgewise Therapeutics,...",[{'url': 'https://clinicaltrials.gov/study/NCT...,"[{'facility': 'UCLA Medical Center', 'status':...","[{'facility': 'UCLA Medical Center', 'status':...","{'nctId': 'NCT06100887', 'orgStudyIdInfo': {'i...","[{'id': 'D000009136', 'term': 'Muscular Dystro...",Key Inclusion Criteria: - Aged 6 to 14 with a ...,


In [None]:
# Prompt to ask LLM for criterion summarisation
prompt = f"""
You are a clinical trial assistant.

Here are the combined inclusion criteria from Duchenne Muscular Dystrophy clinical trials:
{all_inclusion}

Please provide your answer as bullet points, each describing one common eligibility criterion.
Focus on the key patterns that appear across multiple trials.
"""

In [None]:
# Call the LLM via OpenAI API
client = OpenAI(api_key=api_key)
response = client.chat.completions.create(
  model="gpt-4.1-mini",
  messages=[{"role": "user", "content": prompt}],
  temperature=0.3,
  #max_tokens=8000
)

In [None]:
# Extract and print the text
summary = response.choices[0].message.content
print(summary)

Common Eligibility Criteria Across Duchenne Muscular Dystrophy (DMD) Clinical Trials:

**Inclusion Criteria:**
- Male sex at birth.
- Confirmed diagnosis of DMD by genetic testing, muscle biopsy, or clinical phenotype consistent with DMD.
- Age ranges commonly include pediatric to adolescent groups (often from as young as 2-4 years up to 18 years), with some trials including adults ≥16 years.
- Stable corticosteroid use for a defined period prior to screening (commonly 8-12 weeks to 6 months), or corticosteroid-naïve status with defined washout periods.
- Ability to perform or cooperate with motor function assessments (e.g., stand from supine, 4-stair climb, Performance of Upper Limb test [PUL], TTSTAND).
- Specific motor function thresholds, such as timed stand from supine ≤7-10 seconds, 4-stair climb <10 seconds, or PUL entry scores within defined ranges.
- Body weight minimum thresholds (commonly ≥20-25 kg) and sometimes maximum thresholds depending on the study.
- Stable cardiac fu

### Question 2: Match Patients to Trials

In [None]:
# Read in one participant file for a look
#file_path = "/content/drive/MyDrive/Projects/case_study_mT/patient_02.json"
#record = load_json(file_path)
#record
#patient_to_text(record)

In [None]:
# json data for patients
patients = ["01", "02", "03"]
#patients = ["02",] # for unit test
file_path = "/content/drive/MyDrive/Projects/case_study_mT/"

# Estimate patient-trial eligibility per patient across trials
Results = {}
for id in patients:
  # Load file
  patient_file = file_path + "patient_{}.json".format(id)
  patient_data = load_json(patient_file)

  # Create the structure for this patient
  Results[id] = {
    "patient_info": patient_data,
    "trial_matches": []
  }

  for trial_inx in range(len(relevant_trials)):
  #for trial_inx in range(0,2): # as a tryout # for unit test
    trial = relevant_trials.iloc[trial_inx]
    nct_id = trial.get("utn")
    prompt = build_prompt(patient_data, trial)

    try:
      match_result = call_llm(prompt)
      Results[id]["trial_matches"].append({
        "nct_id": nct_id,
        "eligibility": match_result.get("eligibility"),
        "reasoning": match_result.get("reasoning")
      })
    except Exception as e: # if in strange cases no eligibility info available
      Results[id]["trial_matches"].append({
        "nct_id": nct_id,
        "eligibility": "error",
        "reasoning": str(e)
      })

In [None]:
# Showcase results of one patient
Results["03"]

{'patient_info': {'profile': {'condition': 'Muscular Dystrophy, Duchenne',
   'country': 'United Kingdom of Great Britain and Northern Ireland',
   'age': 6,
   'gender': 'Male',
   'profile': 'Histology: None\nPrevious treatment: Deflazacort\nBiomarkers: DMD exon 27 deletion\nTests: LVEF 62%, eGFR 92 ml/min/1.73m2\nComorbidities: None\nOther: Ambulatory'}},
 'trial_matches': [{'nct_id': 'NCT06114056',
   'eligibility': 'eligible',
   'reasoning': 'Patient is a 6-year-old male with DMD confirmed by genetic testing (DMD exon 27 deletion within exons 18-58), on stable corticosteroid treatment (Deflazacort), ambulatory, with normal LVEF (62%) and eGFR, no comorbidities or contraindications listed. No evidence of exclusion criteria present.'},
  {'nct_id': 'NCT06224660',
   'eligibility': 'ineligible',
   'reasoning': 'Patient is 6 years old but trial requires age from 18 years; no evidence of cardiomyopathy or left ventricular ejection fraction below 40%.'},
  {'nct_id': 'NCT03340675',
  

### Others

In [None]:
# Save the Results for API building
with open(file_path + 'data\Results_eligiblity.json', 'w') as fp:
  json.dump(Results, fp)

In [25]:
!pip freeze > requirements_search_prototype.txt