In [1]:
!pip install groq

Collecting groq
  Downloading groq-0.36.0-py3-none-any.whl.metadata (16 kB)
Downloading groq-0.36.0-py3-none-any.whl (137 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.3/137.3 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq
Successfully installed groq-0.36.0


In [2]:
import pandas as pd
from tqdm import tqdm

import re
import requests
import json
import time
import copy
from typing import Dict, Any

In [None]:

# ============= LUNG CANCER-ONLY FETCHER =============
BASE_URL = "https://clinicaltrials.gov/api/v2/studies"

def fetch_lung_cancer_trials(
    page_size=10,
    max_total=50,             
    status_filter=None         # e.g., "RECRUITING", "ACTIVE_NOT_RECRUITING"
):
    params = {
        "format": "json",
        "pageSize": page_size,
        # MeSH filter for lung neoplasms
        "filter.advanced": "AREA[ConditionMeshTerm]Neoplasms/Lung",
    }
    if status_filter:
        params["filter.status"] = status_filter

    all_studies = []
    next_token = None
    pbar = tqdm(desc="Fetching lung cancer trials", unit="trial")

    while True:
        if next_token:
            params["pageToken"] = next_token

        resp = requests.get(BASE_URL, params=params, timeout=60)
        resp.raise_for_status()
        data = resp.json()

        batch = data.get("studies", [])
        all_studies.extend(batch)
        pbar.update(len(batch))

        next_token = data.get("nextPageToken")
        if not next_token or (max_total and len(all_studies) >= max_total):
            break

        time.sleep(0.1)  # being polite to the API

    pbar.close()
    studies = all_studies[:max_total] if max_total else all_studies
    print(f"\nFetched {len(studies)} lung cancer trials")
    return studies

# Usage
studies = fetch_lung_cancer_trials(
    page_size=100,
    max_total=100,
    # status_filter="RECRUITING|ACTIVE_NOT_RECRUITING"  # optional
)

# Save to file if needed
# with open("lung_cancer_trials_2025.json", "w", encoding="utf-8") as f:
#     json.dump(studies, f, indent=2)

print("Done!")


Fetching lung cancer trials: 100trial [00:00, 201.95trial/s]


Fetched 100 lung cancer trials
Done!





In [4]:
# ============= Flatten helper =============
def safe_get(d, path, default=""):
    for p in path:
        if not isinstance(d, dict):
            return default
        d = d.get(p, default)
    return d if d is not None else default

records = []
for s in studies:
    ps = s.get("protocolSection", {})
    row = {
        "NCT_ID": safe_get(ps, ["identificationModule", "nctId"]),
        "Title": safe_get(ps, ["identificationModule", "briefTitle"]),
        "Official_Title": safe_get(ps, ["identificationModule", "officialTitle"]),
        "Status": safe_get(ps, ["statusModule", "overallStatus"]),
        "Start_Date": safe_get(ps, ["statusModule", "startDateStruct", "date"]),
        "Primary_Completion": safe_get(ps, ["statusModule", "primaryCompletionDateStruct", "date"]),
        "Study_Type": safe_get(ps, ["designModule", "studyType"]),
        "Phases": ", ".join(safe_get(ps, ["designModule", "phases"], [])) if isinstance(safe_get(ps, ["designModule", "phases"], []), list) else safe_get(ps, ["designModule", "phases"], ""),
        "Enrollment": safe_get(ps, ["designModule", "enrollmentInfo", "count"]),
        "Conditions": ", ".join(safe_get(ps, ["conditionsModule", "conditions"], [])) if isinstance(safe_get(ps, ["conditionsModule", "conditions"], []), list) else safe_get(ps, ["conditionsModule", "conditions"], ""),
        "Sex": safe_get(ps, ["eligibilityModule", "sex"]),
        "Min_Age": safe_get(ps, ["eligibilityModule", "minimumAge"]),
        "Max_Age": safe_get(ps, ["eligibilityModule", "maximumAge"]),
        "Lead_Sponsor": safe_get(ps, ["sponsorCollaboratorsModule", "leadSponsor", "name"]),
        "Brief_Summary": safe_get(ps, ["descriptionModule", "briefSummary"]),
        "Eligibility": safe_get(ps, ["eligibilityModule", "eligibilityCriteria"]),
    }
    # outcomes & interventions
    prim = safe_get(ps, ["outcomesModule", "primaryOutcomes"], [])
    row["Primary_Measures"] = " | ".join([p.get("measure", "") for p in prim]) if prim else ""
    sec = safe_get(ps, ["outcomesModule", "secondaryOutcomes"], [])
    row["Secondary_Measures"] = " | ".join([p.get("measure", "") for p in sec]) if sec else ""
    ints = safe_get(ps, ["armsInterventionsModule", "interventions"], [])
    row["Interventions"] = " | ".join([f"{i.get('type','')}:{i.get('name','')}" for i in ints]) if ints else ""
    records.append(row)

df = pd.DataFrame(records)
print("Raw df shape:", df.shape)

Raw df shape: (100, 19)


In [5]:
# ============= Clean text ============
def clean_text(t):
    if not isinstance(t, str): return ""
    t = re.sub(r"\s+", " ", t)
    t = re.sub(r"[•\n\t\r]", " ", t)
    t = re.sub(r"\(Version [^\)]*\)", "", t)
    return t.strip()

for col in ["Title", "Brief_Summary", "Official_Title", "Eligibility"]:
    if col in df.columns:
        df[col] = df[col].apply(clean_text)

# optional: drop trials that have no eligibility text or extremely short eligibility
df["elig_len"] = df["Eligibility"].apply(lambda x: len(x or ""))
# df = df[df["elig_len"] > 50].copy()
print("After filtering short elig texts:", df.shape)

After filtering short elig texts: (100, 20)


In [None]:
# pd.set_option('display.max_colwidth', None)

In [6]:
pd.reset_option('display.max_colwidth')

In [7]:
df

Unnamed: 0,NCT_ID,Title,Official_Title,Status,Start_Date,Primary_Completion,Study_Type,Phases,Enrollment,Conditions,Sex,Min_Age,Max_Age,Lead_Sponsor,Brief_Summary,Eligibility,Primary_Measures,Secondary_Measures,Interventions,elig_len
0,NCT03556228,Selective TrkA Inhibitor VMD-928 to Treat TrkA...,"An Open-Label, Multiple-Dose, Dose-Escalation ...",RECRUITING,2018-06-08,2026-12,INTERVENTIONAL,PHASE1,82,"Head and Neck Carcinoma, Adenoid Cystic Carcin...",ALL,18 Years,80 Years,"VM Oncology, LLC","This is a multicenter, open-label, Phase 1 stu...",Key Inclusion Criteria: * Histologically or cy...,Number and severity of treatment-emergent Adve...,Area under the plasma concentration versus tim...,DRUG:VMD-928 300 mg Tablet (ongoing); 100 mg C...,4124
1,NCT00009971,Fenretinide in Treating Patients With Recurren...,Phase II Trial Of Fenretinide (NSC-374551; IND...,COMPLETED,2000-11,2005-09,INTERVENTIONAL,PHASE2,32,Lung Cancer,ALL,18 Years,,National Cancer Institute (NCI),Phase II trial to study the effectiveness of f...,DISEASE CHARACTERISTICS: * Histologically or c...,,,DRUG:fenretinide,1815
2,NCT00260871,Genetic Epidemiological Study of Lung Cancer i...,Genetic Epidemiological Study of Lung Cancer i...,UNKNOWN,2002-05,,OBSERVATIONAL,,1500,Lung Cancer,ALL,,,"National Health Research Institutes, Taiwan",Lung adenocarcinoma in Chinese females is hypo...,Inclusion Criteria: * lung adenocarcinoma Excl...,,,,64
3,NCT00721981,An Evaluation of the Clinical Treatment and Pa...,A Non-interventional Study for Evaluation of t...,TERMINATED,2009-04,,OBSERVATIONAL,,120,"Lung Cancer, Non-small Cell Lung Cancer",ALL,18 Years,,AstraZeneca,The main purpose of this study is the identifi...,Inclusion Criteria: * Patients suffering from ...,To obtain patient preferences in direct correl...,current medical practice treatment objectives ...,,646
4,NCT03167281,Early Intrapleural TPA Instillation Versus Late,Early Intrapleural TPA Instillation Versus Late,WITHDRAWN,2017-02-17,2021-03-10,INTERVENTIONAL,PHASE4,0,Tissue Plasminogen Activator,ALL,18 Years,100 Years,Prisma Health-Midlands,Chest tubes are used for air or fluid removal ...,"Inclusion Criteria: * Adult patients, age 18 y...",Chest tube removal,Fluid drainage | Need for other interventions ...,COMBINATION_PRODUCT:early tPA and DNase,387
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,NCT03906331,Expanded Access for the Treatment of Cancers W...,Expanded Access for the Treatment of Cancers W...,APPROVED_FOR_MARKETING,,,EXPANDED_ACCESS,,,"Non Small Cell Lung Cancer, Medullary Thyroid ...",ALL,18 Years,,Eli Lilly and Company,Expanded access for participants with cancer w...,Inclusion Criteria: * Diagnosis of cancer with...,,,DRUG:Selpercatinib,836
96,NCT04731467,A Study of CM24 in Combination with Nivolumab ...,"A Phase 1/2 Study to Assess the Safety, Tolera...",COMPLETED,2021-03-19,2024-09-30,INTERVENTIONAL,"PHASE1, PHASE2",79,"Solid Tumor, Non Small Cell Lung Cancer, Pancr...",ALL,18 Years,,Famewave Ltd.,"This is an open-label, multicenter, multi-dose...",Inclusion Criteria: 1. Part A: Previously trea...,Part A: Incidence of treatment emergent advers...,Maximum serum concentration [Cmax] | Time of m...,DRUG:CM-24 and Nivolumab - Dose Escalation | D...,4302
97,NCT00241267,Bronchial Ultrasonography Via a Fibrescope: Pi...,Bronchial Ultrasonography Via a Fibrescope: Pi...,TERMINATED,2005-03,,OBSERVATIONAL,,20,Pulmonary Neoplasms,ALL,18 Years,,"University Hospital, Tours",The exploration of neoplastic pulmonary pathol...,Inclusion Criteria: * Assumed proximal neoplas...,,,DEVICE:Dedicated ultrasonographic probe during...,151
98,NCT03231111,The Effectiveness of Using Preventive Multimed...,The Effectiveness of Using Preventive Multimed...,UNKNOWN,2017-07-25,2018-07-01,INTERVENTIONAL,,60,Lung Cancer,FEMALE,20 Years,80 Years,National Taiwan University Hospital,The Effectiveness of Using Preventive Multimed...,Inclusion Criteria: 1. Patients older than 20 ...,skindex-29,,OTHER:Preventive Multimedia Teaching Program,469


In [8]:
def is_systemic_anticancer_trial(row):
    text = " ".join([
        str(row.get('Title', '')),
        str(row.get('Official_Title', '')),
        str(row.get('Brief_Summary', '')),
        str(row.get('Eligibility', '')),
        str(row.get('Conditions', '')),
        str(row.get('Interventions', ''))
    ]).lower()

    # 1. HARD EXCLUDE — non-drug / procedural / local therapy (kill 99% of false positives instantly)
    junk_keywords = [
        # Locoregional / ablation / device
        "hifu", "radiofrequency ablation", "rfa", "cryoablation", "microwave ablation",
        "tace", "deb-tace", "y90", "radioembolization", "sir-spheres", "therasphere",
        "hepatic arterial", "haic", "chemoembolization", "embolization",
        "sbrt", "cyberknife", "gamma knife", "stereotactic body", "stereotactic radiosurgery",
        "brachytherapy", "hdr ", "intrathecal", "lumbar puncture", "pleurodesis",
        "thoracentesis", "paracentesis", "pleural catheter", "chest tube", "pleurx",

        # Surgical / diagnostic procedures
        "surgery", "resection", "lobectomy", "wedge resection", "thoracoscopic", "vats", "ivats",
        "biopsy only", "diagnostic biopsy", "tissue collection",

        # Prevention / screening / supportive care
        "prevention", "chemoprevention", "smoking cessation", "aspirin", "statin",
        "screening", "early detection", "surveillance",

        # Non-cancer or minimal anticancer intent
        "supportive care", "palliative care only", "best supportive care", "placebo",
        "radiation only", "radiotherapy alone", "radiation therapy as single modality",

        # Others
        "stage iiia", "stage iii", "inoperable locally advanced", "concurrent chemoradiation", "definitive chemoradiation",
        "induction chemotherapy.*allowed", "no study drug", "radiation therapy.*primary", "thoracic radiation"
    ]
    if any(kw in text for kw in junk_keywords):
        return False

    # 2. Must have at least ONE real anticancer drug keyword
    drug_keywords = [
        "chemotherapy", "targeted therapy", "immunotherapy", "pd-1", "pd-l1", "ctla-4",
        "parp inhibitor", "tkis", "egfr", "alk ", "braf", "mek", "mtor", "pi3k",
        "cdk4/6", "bcl-2", "antibody-drug conjugate", "adc", "bispecific",
        "car-t", "tcr-t", "til therapy", "vaccine", # (vaccine often therapeutic in oncology)
        "olaparib", "osimertinib", "pembrolizumab", "nivolumab", "atezolizumab",
        "bevacizumab", "trastuzumab", "cetuximab", "ramucirumab", "everolimus",
        "lenvatinib", "sunitinib", "pazopanib", "cabozantinib", "regorafenib"
    ]
    if not any(kw in text for kw in drug_keywords):
        return False

    # 3. Final safety net: must contain at least one of these strong signals
    strong_signals = [
        "phase 1", "phase i", "phase 2", "phase ii", "phase 3", "phase iii",
        "dose escalation", "maximum tolerated dose", "recommended phase 2 dose",
        "progression-free survival", "overall survival", "objective response rate",
        "recist", "irrc", "pfs", "os", "orr", "dcr"
    ]
    if not any(signal in text for signal in strong_signals):
        return False

    return True


# Apply it
df = df[
    (df['Study_Type'] == 'INTERVENTIONAL') &
    df.apply(is_systemic_anticancer_trial, axis=1)
].copy()

In [9]:
df

Unnamed: 0,NCT_ID,Title,Official_Title,Status,Start_Date,Primary_Completion,Study_Type,Phases,Enrollment,Conditions,Sex,Min_Age,Max_Age,Lead_Sponsor,Brief_Summary,Eligibility,Primary_Measures,Secondary_Measures,Interventions,elig_len
1,NCT00009971,Fenretinide in Treating Patients With Recurren...,Phase II Trial Of Fenretinide (NSC-374551; IND...,COMPLETED,2000-11,2005-09,INTERVENTIONAL,PHASE2,32,Lung Cancer,ALL,18 Years,,National Cancer Institute (NCI),Phase II trial to study the effectiveness of f...,DISEASE CHARACTERISTICS: * Histologically or c...,,,DRUG:fenretinide,1815
18,NCT03836352,"Study of an Immunotherapeutic, DPX-Survivac, i...","A Phase 2, Open-label, Multicenter, Study of a...",ACTIVE_NOT_RECRUITING,2018-12-21,2023-12-31,INTERVENTIONAL,PHASE2,184,"Ovarian Cancer, Hepatocellular Carcinoma, Non-...",ALL,18 Years,,"ImmunoVaccine Technologies, Inc. (IMV Inc.)",This study will assess the safety and efficacy...,Key Inclusion Criteria: * Subjects with advanc...,Efficacy as measured by objective response rat...,Objective response rate | Duration of response...,OTHER:DPX-Survivac | DRUG:Cyclophosphamide | D...,2335
34,NCT01249443,Paclitaxel and Carboplatin in Treating Patient...,A Phase 1 Study of Paclitaxel and Carboplatin ...,TERMINATED,2013-11,2016-05,INTERVENTIONAL,PHASE1,17,"HIV Infection, Recurrent Anal Cancer, Recurren...",ALL,18 Years,,AIDS Malignancy Consortium,This phase I clinical trial is studying the si...,Inclusion Criteria: * Patients must have known...,Incidence of adverse events during paclitaxel ...,"Response rates in patients with lung, head and...",DRUG:vorinostat | OTHER:diagnostic laboratory ...,8612
50,NCT04421352,Safety and Tolerability Evaluation of Low-dose...,"A Phase Ia/Ib, Open-Label, Multiple-Dose, Dose...",COMPLETED,2021-01-12,2023-12-22,INTERVENTIONAL,PHASE1,11,Relapsed Small Cell Lung Cancer,ALL,18 Years,75 Years,CStone Pharmaceuticals,A Phase Ia/Ib Safety and Tolerability Evaluati...,Inclusion Criteria: 1. Patients with histologi...,To determine the recommended radiation dose an...,Incidence of Adverse Events (AE) and Dose-Limi...,DRUG:CS1001,1904
62,NCT01774526,Molecular Epidemiology of Lung Adenocarcinoma ...,Molecular Epidemiology of Lung Adenocarcinoma ...,UNKNOWN,2010-12,2016-12,INTERVENTIONAL,,40,Lung Cancer,ALL,21 Years,,"National University Hospital, Singapore",Lung Cancer continues to be the major cause of...,Inclusion Criteria: * Asian ethnicity * Age \>...,Descriptive study of our patients with lung ad...,,OTHER:Characterise the molecular epidemiology ...,535
72,NCT01780675,Hippocampus Avoidance PCI vs PCI,Prophylactic Cranial Irradiation With or Witho...,COMPLETED,2013-04,2019-04-01,INTERVENTIONAL,PHASE3,168,"Small Cell Lung Cancer, Lung Cancer",ALL,18 Years,,The Netherlands Cancer Institute,Using Intensity Modulated radiotherapy it is p...,Inclusion Criteria: * \- Small Cell Lung Cance...,neurocognitive decline,safety,RADIATION:Radiation Prophylactic Cranial Irrad...,741
98,NCT03231111,The Effectiveness of Using Preventive Multimed...,The Effectiveness of Using Preventive Multimed...,UNKNOWN,2017-07-25,2018-07-01,INTERVENTIONAL,,60,Lung Cancer,FEMALE,20 Years,80 Years,National Taiwan University Hospital,The Effectiveness of Using Preventive Multimed...,Inclusion Criteria: 1. Patients older than 20 ...,skindex-29,,OTHER:Preventive Multimedia Teaching Program,469


In [None]:

# Step 1: Keep only rows where elig_len <= 3500
df_filtered = df[(df['elig_len'] <= 3500) & (df['elig_len'] >= 300)].copy()

# Step 2: If still more than 100 rows → randomly sample 100 (reproducible if you set seed)
if len(df_filtered) > 100:
    df_filtered = df_filtered.sample(n=100, random_state=2)   # remove random_state if you want true random each time
else:
    print(f"After filtering, only {len(df_filtered)} rows remain (≤100), keeping all.")

# Reset index if you want clean numbering
df = df_filtered.reset_index(drop=True)

After filtering, only 6 rows remain (≤100), keeping all.


In [11]:
df

Unnamed: 0,NCT_ID,Title,Official_Title,Status,Start_Date,Primary_Completion,Study_Type,Phases,Enrollment,Conditions,Sex,Min_Age,Max_Age,Lead_Sponsor,Brief_Summary,Eligibility,Primary_Measures,Secondary_Measures,Interventions,elig_len
0,NCT00009971,Fenretinide in Treating Patients With Recurren...,Phase II Trial Of Fenretinide (NSC-374551; IND...,COMPLETED,2000-11,2005-09,INTERVENTIONAL,PHASE2,32,Lung Cancer,ALL,18 Years,,National Cancer Institute (NCI),Phase II trial to study the effectiveness of f...,DISEASE CHARACTERISTICS: * Histologically or c...,,,DRUG:fenretinide,1815
1,NCT03836352,"Study of an Immunotherapeutic, DPX-Survivac, i...","A Phase 2, Open-label, Multicenter, Study of a...",ACTIVE_NOT_RECRUITING,2018-12-21,2023-12-31,INTERVENTIONAL,PHASE2,184,"Ovarian Cancer, Hepatocellular Carcinoma, Non-...",ALL,18 Years,,"ImmunoVaccine Technologies, Inc. (IMV Inc.)",This study will assess the safety and efficacy...,Key Inclusion Criteria: * Subjects with advanc...,Efficacy as measured by objective response rat...,Objective response rate | Duration of response...,OTHER:DPX-Survivac | DRUG:Cyclophosphamide | D...,2335
2,NCT04421352,Safety and Tolerability Evaluation of Low-dose...,"A Phase Ia/Ib, Open-Label, Multiple-Dose, Dose...",COMPLETED,2021-01-12,2023-12-22,INTERVENTIONAL,PHASE1,11,Relapsed Small Cell Lung Cancer,ALL,18 Years,75 Years,CStone Pharmaceuticals,A Phase Ia/Ib Safety and Tolerability Evaluati...,Inclusion Criteria: 1. Patients with histologi...,To determine the recommended radiation dose an...,Incidence of Adverse Events (AE) and Dose-Limi...,DRUG:CS1001,1904
3,NCT01774526,Molecular Epidemiology of Lung Adenocarcinoma ...,Molecular Epidemiology of Lung Adenocarcinoma ...,UNKNOWN,2010-12,2016-12,INTERVENTIONAL,,40,Lung Cancer,ALL,21 Years,,"National University Hospital, Singapore",Lung Cancer continues to be the major cause of...,Inclusion Criteria: * Asian ethnicity * Age \>...,Descriptive study of our patients with lung ad...,,OTHER:Characterise the molecular epidemiology ...,535
4,NCT01780675,Hippocampus Avoidance PCI vs PCI,Prophylactic Cranial Irradiation With or Witho...,COMPLETED,2013-04,2019-04-01,INTERVENTIONAL,PHASE3,168,"Small Cell Lung Cancer, Lung Cancer",ALL,18 Years,,The Netherlands Cancer Institute,Using Intensity Modulated radiotherapy it is p...,Inclusion Criteria: * \- Small Cell Lung Cance...,neurocognitive decline,safety,RADIATION:Radiation Prophylactic Cranial Irrad...,741
5,NCT03231111,The Effectiveness of Using Preventive Multimed...,The Effectiveness of Using Preventive Multimed...,UNKNOWN,2017-07-25,2018-07-01,INTERVENTIONAL,,60,Lung Cancer,FEMALE,20 Years,80 Years,National Taiwan University Hospital,The Effectiveness of Using Preventive Multimed...,Inclusion Criteria: 1. Patients older than 20 ...,skindex-29,,OTHER:Preventive Multimedia Teaching Program,469


In [None]:
# ============= Save initial cleaned CSV ============
out_csv = f"trials_clean_N{len(df)}.csv"
df.to_csv(out_csv, index=False)
print("Saved:", out_csv)

In [None]:
df = pd.read_csv(out_csv)

In [None]:
from groq import Groq
from getpass import getpass


GROQ_API_KEY = getpass("Get your free key at https://console.groq.com/keys → ")

client = Groq(api_key=GROQ_API_KEY)

Get your free key at https://console.groq.com/keys → ··········


In [13]:
df_1 = df.copy()

In [14]:
pd.set_option('display.max_colwidth', None)

In [None]:
# ─────────────────────────────────────────────────────────────
# FINAL 2025 GOLD SYSTEM PROMPT
# ─────────────────────────────────────────────────────────────
SYSTEM_PROMPT = """
You are a world-class, high-precision clinical trial eligibility parser specialized in oncology interventional therapeutic trials. Your only job is to output perfect, strictly valid JSON using the exact schema below. Never add, remove, or rename any field. Never output explanations, markdown, or extra text.

### MANDATORY SCHEMA (use EXACTLY this structure)
{
  "trial_id": null,
  "trial_category": "therapeutic_interventional",
  "age": {"min": 18, "max": null},
  "region_specific_age": {"japan_min": null},
  "inclusion": {
    "disease": {
      "confirmed_by": null,
      "cancer_type": "",
      "histology_subtype": "",
      "stage": "",
      "stage_list": [],
      "metastatic": null,
      "measurable_disease_recist": null,
      "biomarker_required": []
    },
    "performance_status": {
      "scale": null,
      "min": null,
      "max": null
    },
    "life_expectancy_weeks": null,
    "prior_therapy": {
      "required": [],
      "allowed": [],
      "disallowed": [],
      "max_lines_systemic": null,
      "washout_weeks": {
        "chemotherapy": null,
        "targeted_therapy": null,
        "immunotherapy": null,
        "investigational": null,
        "radiation": null,
        "major_surgery": null
      }
    },
    "brain_metastases": null,
    "brain_mets_stable_duration_weeks": null,
    "organ_function": {
      "anc": null,
      "platelets": null,
      "hemoglobin_g_per_dl": null,
      "creatinine_clearance_ml_min": null,
      "bilirubin_x_uln": null,
      "ast_alt_x_uln": null,
      "albumin_g_per_dl": null
    },
    "cardiac": {
      "qtcf_ms_max": null,
      "recent_mi_months_exclusion": null,
      "nyha_class_max": null,
      "lvef_percent_min": null
    },
    "contraception_required": null,
    "other_inclusions": []
  },
  "exclusion": {
    "pregnant_or_breastfeeding": null,
    "active_cns_metastases": null,
    "uncontrolled_intercurrent_illness": null,
    "grade_2_or_higher_neuropathy": null,
    "history_of": [],
    "concurrent_medications_disallowed": [],
    "other_exclusions": []
  }
}

### STRICT RULES & CONVENTIONS (follow exactly)

1. Age
   - Default "min": 18 unless explicitly different
   - Japan-specific pediatric trials → fill japan_min only

2. Performance Status
   - Convert everything to ECOG 0–5 scale
   - Karnofsky 70–100 → ECOG 0–1 ("min": 0, "max": 1)
   - Karnofsky ≥70 → "max": 1
   - ECOG ≤1 → "max": 1
   - Always fill "scale": "ECOG"

3. Brain Metastases – ONLY use these 5 values:
   null | "excluded" | "allowed_if_asymptomatic" | "allowed_if_stable" | "allowed_if_treated_and_stable"
   - If stable duration specified → fill brain_mets_stable_duration_weeks
   - "Symptomatic", "uncontrolled", "requiring steroids" → "excluded"

4. Biomarkers
   - Use exact wording from trial: "ALK positive by FDA-approved test", "EGFR exon 19 del or L858R", "PD-L1 TPS ≥50%", "BRCA1/2 mutated", "MSI-H/dMMR"

5. Prior Therapy
   - "max_lines_systemic" = total systemic lines (not including adjuvant)
   - "required" = must have received
   - "disallowed" = must NOT have received

6. Lab Values – ALWAYS normalize
   - Bilirubin ≤1.5 mg/dL → "bilirubin_x_uln": 1.5
   - AST/ALT ≤3×ULN (≤5× if liver mets) → "ast_alt_x_uln": 3 + note exception in other_inclusions
   - Hemoglobin ≥9 g/dL → "hemoglobin_g_per_dl": 9

7. Life Expectancy
   - ≥3 months = 12, ≥6 months = 24, ≥12 months = 52

8. Contraception
   - Any mention of highly effective contraception → "contraception_required": true

9. cancer_type & histology_subtype
   - cancer_type: "non-small cell lung cancer", "hepatocellular carcinoma", "urothelial carcinoma"
   - histology_subtype: "adenocarcinoma", "squamous", "small cell", "clear cell RCC"

10. NEVER leave critical fields null if information exists
    - If ECOG mentioned → fill performance_status
    - If any labs mentioned → fill organ_function
    - If any prior therapy rules → fill prior_therapy

11. other_inclusions / other_exclusions
    - Short, precise bullets only
    - Examples:
      "Archival tumor tissue required"
      "Liver metastases: AST/ALT ≤5×ULN allowed"
      "Able to swallow tablets"
      "No active autoimmune disease requiring systemic treatment in past 2 years"

Only output raw JSON. No ```json wrapper, no extra characters, no thinking step.

Now parse the following trial eligibility text:
"""

# ─────────────────────────────────────────────────────────────
# FINAL PERFECT EXTRACTION FUNCTION
# ─────────────────────────────────────────────────────────────
def extract_eligibility_perfect(
    text: str,
    client,
    model: str = "meta-llama/llama-4-maverick-17b-128e-instruct",
    max_retries: int = 3
) -> Dict[str, Any]:
    """
    Final ultra-strict parser using your exact schema + rules.
    Assumes input is a real therapeutic interventional trial.
    Returns perfect JSON or raises exception.
    """
    if not text or not text.strip():
        raise ValueError("Empty eligibility text")

    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": SYSTEM_PROMPT},
                    {"role": "user", "content": text}
                ],
                temperature=0.0,
                top_p=1.0,
                max_tokens=3000,
                timeout=90
            )

            raw = response.choices[0].message.content.strip()

            # Remove any accidental wrappers
            raw = re.sub(r"^```json\s*", "", raw, flags=re.IGNORECASE)
            raw = re.sub(r"^```\s*", "", raw)
            raw = re.sub(r"```$", "", raw)
            raw = raw.strip()

            if not raw.startswith("{"):
                raise ValueError("Output does not start with {")

            # Find first complete JSON object
            brace_level = 0
            end_idx = None
            for i, char in enumerate(raw):
                if char == '{': brace_level += 1
                if char == '}': brace_level -= 1
                if brace_level == 0:
                    end_idx = i + 1
                    break
            if end_idx is None:
                raise ValueError("Unbalanced braces")

            json_str = raw[:end_idx]
            result = json.loads(json_str)

            # Basic validation
            if not isinstance(result, dict) or "inclusion" not in result:
                raise ValueError("Invalid top-level structure")

            # Final safety: ensure performance_status scale is ECOG
            ps = result["inclusion"].get("performance_status", {})
            if ps.get("scale") in [None, ""]:
                ps["scale"] = "ECOG"

            return result

        except Exception as e:
            print(f"[Attempt {attempt + 1}] Failed: {e}")
            if attempt < max_retries - 1:
                time.sleep(2 ** attempt)
                continue
            else:
                raise RuntimeError(f"Eligibility parsing failed after {max_retries} attempts") from e

    raise RuntimeError("extract_eligibility_perfect exited unexpectedly")

In [16]:
tqdm.pandas()

df_1["eligibility_json"] = df_1["Eligibility"].progress_apply(
    lambda x: extract_eligibility_perfect(x, client=client)
)

100%|██████████| 6/6 [00:54<00:00,  9.06s/it]


In [17]:
df_1[["eligibility_json","Eligibility"] ].head(10)

Unnamed: 0,eligibility_json,Eligibility
0,"{'trial_id': None, 'trial_category': 'therapeutic_interventional', 'age': {'min': 18, 'max': None}, 'region_specific_age': {'japan_min': None}, 'inclusion': {'disease': {'confirmed_by': 'histology or cytology', 'cancer_type': 'small cell lung cancer', 'histology_subtype': '', 'stage': 'recurrent', 'stage_list': ['limited stage', 'extensive stage'], 'metastatic': None, 'measurable_disease_recist': True, 'biomarker_required': []}, 'performance_status': {'scale': 'ECOG', 'min': 0, 'max': 2}, 'life_expectancy_weeks': None, 'prior_therapy': {'required': ['platinum-containing chemotherapy regimen'], 'allowed': ['thoracic radiotherapy', 'steroids at stable dose'], 'disallowed': [], 'max_lines_systemic': 2, 'washout_weeks': {'chemotherapy': 3, 'targeted_therapy': None, 'immunotherapy': None, 'investigational': None, 'radiation': None, 'major_surgery': None}}, 'brain_metastases': 'allowed_if_treated_and_stable', 'brain_mets_stable_duration_weeks': None, 'organ_function': {'anc': None, 'platelets': 70000, 'hemoglobin_g_per_dl': None, 'creatinine_clearance_ml_min': 60, 'bilirubin_x_uln': 1.5, 'ast_alt_x_uln': 2, 'albumin_g_per_dl': None}, 'cardiac': {'qtcf_ms_max': None, 'recent_mi_months_exclusion': 6, 'nyha_class_max': None, 'lvef_percent_min': None}, 'contraception_required': True, 'other_inclusions': ['WBC at least 2,500/mm^3', 'Creatinine no greater than 1.5 mg/dL or creatinine clearance at least 60 mL/min']}, 'exclusion': {'pregnant_or_breastfeeding': True, 'active_cns_metastases': True, 'uncontrolled_intercurrent_illness': True, 'grade_2_or_higher_neuropathy': None, 'history_of': ['other malignancy within the past 5 years except localized nonmelanoma skin cancer or carcinoma in situ', 'pre-existing retinal degenerative disease'], 'concurrent_medications_disallowed': ['anticonvulsants', 'systemic retinoid or carotenoid therapy'], 'other_exclusions': ['symptomatic heart disease']}}","DISEASE CHARACTERISTICS: * Histologically or cytologically confirmed recurrent small cell lung cancer (SCLC) after platinum-containing chemotherapy regimen with or without thoracic radiotherapy * Limited stage or extensive stage SCLC * Measurable disease * At least 20 mm by conventional techniques OR * At least 10 mm with spiral CT scan * No pleural effusions, bone metastases, brain metastases, or abnormal radionucleotide scans as sole evidence of disease * No symptomatic or uncontrolled brain or leptomeningeal disease * Previously treated brain metastases allowed if neurologically stable PATIENT CHARACTERISTICS: Age: * 18 and over Performance status: * Zubrod 0-2 Hematopoietic: * WBC at least 2,500/mm\^3 * Platelet count at least 70,000/mm\^3 Hepatic: * Bilirubin no greater than 1.5 mg/dL * SGOT no greater than 2 times upper limit of normal Renal: * Creatinine no greater than 1.5 mg/dL OR * Creatinine clearance at least 60 mL/min Cardiovascular: * No symptomatic heart disease * No myocardial infarction within the past 6 months Other: * Not pregnant or nursing * Negative pregnancy test * Fertile patients must use effective contraception for 1 month before, during, and for 2 months after study * No pre-existing retinal degenerative disease (e.g., retinitis pigmentosa or associated disorders) * No other serious concurrent illness * No other malignancy within the past 5 years except localized nonmelanoma skin cancer or carcinoma in situ PRIOR CONCURRENT THERAPY: Chemotherapy: * At least 3 weeks since prior chemotherapy * No more than 2 prior chemotherapy regimens Endocrine therapy: * Concurrent steroids allowed at stable dose Radiotherapy: * No prior radiotherapy to study lesions Other: * At least 3 weeks since prior systemic retinoid or carotenoid therapy * No concurrent anticonvulsants"
1,"{'trial_id': None, 'trial_category': 'therapeutic_interventional', 'age': {'min': 18, 'max': None}, 'region_specific_age': {'japan_min': None}, 'inclusion': {'disease': {'confirmed_by': None, 'cancer_type': '', 'histology_subtype': '', 'stage': 'advanced or metastatic', 'stage_list': [], 'metastatic': True, 'measurable_disease_recist': True, 'biomarker_required': ['microsatellite instability high']}, 'performance_status': {'scale': 'ECOG', 'min': 0, 'max': 1}, 'life_expectancy_weeks': 24, 'prior_therapy': {'required': ['first line therapy'], 'allowed': [], 'disallowed': [], 'max_lines_systemic': 1, 'washout_weeks': {'chemotherapy': 4, 'targeted_therapy': None, 'immunotherapy': 4, 'investigational': None, 'radiation': 2, 'major_surgery': None}}, 'brain_metastases': 'excluded', 'brain_mets_stable_duration_weeks': None, 'organ_function': {'anc': None, 'platelets': None, 'hemoglobin_g_per_dl': None, 'creatinine_clearance_ml_min': None, 'bilirubin_x_uln': None, 'ast_alt_x_uln': None, 'albumin_g_per_dl': None}, 'cardiac': {'qtcf_ms_max': None, 'recent_mi_months_exclusion': 6, 'nyha_class_max': None, 'lvef_percent_min': None}, 'contraception_required': None, 'other_inclusions': ['Completion of pre-treatment tumour biopsy required', 'Radiologic and/or biochemical evidence of disease progression']}, 'exclusion': {'pregnant_or_breastfeeding': None, 'active_cns_metastases': True, 'uncontrolled_intercurrent_illness': True, 'grade_2_or_higher_neuropathy': None, 'history_of': ['prior therapy with anti-PD-1/PD-L1/PD-L2 agent with Grade 3 or higher immune-related toxicity', 'survivin-based vaccine(s) and/or immunotherapies', 'thyroiditis', 'non-infectious pneumonitis requiring steroid therapy', 'bowel obstruction', 'cerebrovascular event within 6 months', 'myocardial infarction within 6 months', 'allogenic tissue/solid organ transplant'], 'concurrent_medications_disallowed': ['steroid therapy or other immunosuppressive', 'live attenuated vaccines'], 'other_exclusions': ['Clinical ascites or pleural fluid that cannot be managed', 'Malignant bowel obstruction', 'For OvCa, any single lesion greater than 5 cm', 'Autoimmune disease requiring treatment within the last two years (except replacement therapy)', 'GI condition that might limit absorption of oral agents', 'Acute or chronic skin and/or microvascular disorders', 'Edema or lymphedema in the lower limbs > grade 2', 'Severe hypersensitivity (≥ Grade 3) to pembrolizumab']}}","Key Inclusion Criteria: * Subjects with advanced or metastatic solid tumours who have completed treatment with first line therapy: 1. Epithelial ovarian, fallopian tube, or peritoneal cancer 2. Hepatocellular carcinoma 3. Non-small cell lung cancer 4. Urothelial cancer 5. Microsatellite instability high solid tumours, other than the above indications * Radiologic and/or biochemical evidence of disease progression * Completion of pre-treatment tumour biopsy * Must have measurable disease by RECIST v1.1 * Ambulatory with an ECOG 0-1 * Life expectancy ≥ 6 months * Meet protocol-specified laboratory requirements Key Exclusion Criteria: * Chemotherapy or immunotherapy within treatment within 28 days of start of study treatment * Radiotherapy within treatment within 2 weeks of start of study treatment * Prior therapy with an anti-PD-1, anti-PD-L1, or anti PD L2 agent or with an agent directed to another stimulatory or co-inhibitory T cell receptor where subject was discontinued from that treatment due to a Grade 3 or higher immune-related toxicity * For NSCLC subjects: Known EGFR mutations or ALK rearrangements * Prior receipt of survivin-based vaccine(s) and/or immunotherapies * Concurrent second malignancy other than non-melanoma skin cancer, cervical carcinoma in situ, or controlled bladder cancer * Clinical ascites or pleural fluid that cannot be managed * Malignant bowel obstruction or recent history of bowel obstruction * For OvCa, subjects with any single lesion greater than 5 cm * Autoimmune disease requiring treatment within the last two years (except replacement therapy) * Recent history of thyroiditis * Any history of (non-infectious) pneumonitis that required steroid therapy or current pneumonitis * Presence of a serious acute or chronic infection * Active CNS metastases and/or carcinomatous meningitis * GI condition that might limit absorption of oral agents * Allogenic tissue/solid organ transplant * Other serious intercurrent chronic or acute illness, including myocardial infarction or cerebrovascular event within 6 months * Ongoing treatment with steroid therapy or other immunosuppressive * Receipt of live attenuated vaccines * Acute or chronic skin and/or microvascular disorders * Edema or lymphedema in the lower limbs \> grade 2 * Severe hypersensitivity (≥ Grade 3) to pembrolizumab"
2,"{'trial_id': None, 'trial_category': 'therapeutic_interventional', 'age': {'min': 18, 'max': None}, 'region_specific_age': {'japan_min': None}, 'inclusion': {'disease': {'confirmed_by': 'histology or cytology', 'cancer_type': 'small cell lung cancer', 'histology_subtype': '', 'stage': 'limited or extensive', 'stage_list': ['LS-SCLC', 'ES-SCLC'], 'metastatic': None, 'measurable_disease_recist': True, 'biomarker_required': []}, 'performance_status': {'scale': 'ECOG', 'min': 0, 'max': 1}, 'life_expectancy_weeks': 12, 'prior_therapy': {'required': ['platinum-containing dual-drug chemotherapy'], 'allowed': [], 'disallowed': ['immune checkpoint proteins/antibody/medicine'], 'max_lines_systemic': 1, 'washout_weeks': {'chemotherapy': None, 'targeted_therapy': None, 'immunotherapy': None, 'investigational': None, 'radiation': None, 'major_surgery': None}}, 'brain_metastases': 'allowed_if_treated_and_stable', 'brain_mets_stable_duration_weeks': 12, 'organ_function': {'anc': None, 'platelets': None, 'hemoglobin_g_per_dl': None, 'creatinine_clearance_ml_min': None, 'bilirubin_x_uln': None, 'ast_alt_x_uln': None, 'albumin_g_per_dl': None}, 'cardiac': {'qtcf_ms_max': None, 'recent_mi_months_exclusion': None, 'nyha_class_max': None, 'lvef_percent_min': None}, 'contraception_required': True, 'other_inclusions': ['No radiotherapy contraindications', 'At least one extracranial measurable lesion (RECIST v1.1)', 'For lesions that have received radiotherapy, progression after radiotherapy must be confirmed']}, 'exclusion': {'pregnant_or_breastfeeding': None, 'active_cns_metastases': True, 'uncontrolled_intercurrent_illness': None, 'grade_2_or_higher_neuropathy': None, 'history_of': ['autoimmune diseases', 'HIV infection', 'alcoholism or drug abuse', 'grade 3 or above radiation pneumonitis'], 'concurrent_medications_disallowed': [], 'other_exclusions': ['Active chronic hepatitis B or active hepatitis C', 'Serious hypersensitive reaction to monoclonal antibodies', 'History of uncontrolled allergic asthma', 'Primary CNS tumors or meningeal metastases']}}","Inclusion Criteria: 1. Patients with histologically or cytologically confirmed LS-SCLC or ES-SCLC and experienced progression since first-line standard platinum containing dual-drug chemotherapy. 2. Patients whose initial diagnosis was limited must undergo radical chest radiotherapy and the time of tumor progression is not less than 3 months from the end of radiotherapy, or cannot receive radical chest radiotherapy due to specific reasons 3. At least one extracranial measurable lesion (RECIST v1.1), and for a lesion that has received radiotherapy, progression of the lesion after radiotherapy must be confirmed. 4. Patients with brain metastases are allowed to receive previous radiotherapy and their condition is stable, but the time to the end of radiotherapy must not be less than 3 months. 5. No radiotherapy contraindications were judged by the radiologist 6. ECOG performance status of 0 or 1. 7. Patients with life expectancy ≥ 3 months. 8. Patients must have adequate organ function. 9. Fertile men and women of childbearing potential must agree to use an effective method of birth control from providing signed consent and for 6 months after last study drug administration. Exclusion Criteria: 1. Subjects known to have primary CNS tumors or meningeal metastases or unstable CNS metastases. 2. Patients with active autoimmune diseases or history of autoimmune diseases should be excluded. 3. Patients who have received immune checkpoint proteins/antibody/medicine (including PD-1, PD-L1, etc) for treatment. 4. Known history of HIV infection. 5. Subjects with active chronic hepatitis B or active hepatitis C . 6. Patients who have serious hypersensitive reaction to monoclonal antibodies, and have history of uncontrolled allergic asthma. 7. Known history of alcoholism or drugs abuse. 8. Subjects with history of radiation pneumonitis of grade 3 or above, regardless of recovered or not."
3,"{'trial_id': None, 'trial_category': 'therapeutic_interventional', 'age': {'min': 21, 'max': None}, 'region_specific_age': {'japan_min': None}, 'inclusion': {'disease': {'confirmed_by': None, 'cancer_type': 'non-small cell lung cancer', 'histology_subtype': 'adenocarcinoma', 'stage': 'metastatic', 'stage_list': [], 'metastatic': True, 'measurable_disease_recist': None, 'biomarker_required': []}, 'performance_status': {'scale': 'ECOG', 'min': 1, 'max': 2}, 'life_expectancy_weeks': None, 'prior_therapy': {'required': [], 'allowed': [], 'disallowed': [], 'max_lines_systemic': None, 'washout_weeks': {'chemotherapy': None, 'targeted_therapy': None, 'immunotherapy': None, 'investigational': None, 'radiation': None, 'major_surgery': None}}, 'brain_metastases': None, 'brain_mets_stable_duration_weeks': None, 'organ_function': {'anc': None, 'platelets': None, 'hemoglobin_g_per_dl': None, 'creatinine_clearance_ml_min': None, 'bilirubin_x_uln': None, 'ast_alt_x_uln': None, 'albumin_g_per_dl': None}, 'cardiac': {'qtcf_ms_max': None, 'recent_mi_months_exclusion': None, 'nyha_class_max': None, 'lvef_percent_min': None}, 'contraception_required': None, 'other_inclusions': ['Asian ethnicity', 'Non-smoker', 'Pleural effusion', 'Fit for pleuroscopy and biopsy', 'Suitable for palliative chemotherapy or EGFR-TKI']}, 'exclusion': {'pregnant_or_breastfeeding': None, 'active_cns_metastases': None, 'uncontrolled_intercurrent_illness': None, 'grade_2_or_higher_neuropathy': None, 'history_of': [], 'concurrent_medications_disallowed': [], 'other_exclusions': ['Non-Asian ethnicity', 'Smoker', 'Poor ECOG status', 'Unwilling to undergo pleuroscopy and biopsy', 'Not suitable for palliative chemotherapy or EGFR-TKI', 'Other histological subtypes and stages of disease']}}",Inclusion Criteria: * Asian ethnicity * Age \> 21 years old * Non-smoker * Metastatic pleural effusion due to lung adenocarcinoma * Good ECOG status (ECOG 1-2) fit to undergo pleuroscopy and biopsy and agreeable for palliative chemotherapy and or EGFR-TKI Exclusion Criteria: * Non-Asian ethnicity * Age \< 21 years old * Smoker * Subjects with poor ECOG status or unwilling to undergo pleuroscopy and biopsy * Subjects not suitable to receive palliative chemotherapy or EGFR-TKI * All other histological subtypes and stages of disease
4,"{'trial_id': None, 'trial_category': 'therapeutic_interventional', 'age': {'min': 18, 'max': None}, 'region_specific_age': {'japan_min': None}, 'inclusion': {'disease': {'confirmed_by': None, 'cancer_type': 'small cell lung cancer', 'histology_subtype': '', 'stage': 'I-III or IV without brain metastases', 'stage_list': ['I', 'II', 'III', 'IV'], 'metastatic': False, 'measurable_disease_recist': None, 'biomarker_required': []}, 'performance_status': {'scale': 'ECOG', 'min': None, 'max': None}, 'life_expectancy_weeks': None, 'prior_therapy': {'required': ['chemo-radiotherapy for stage I-III or chemotherapy for stage IV'], 'allowed': [], 'disallowed': [], 'max_lines_systemic': None, 'washout_weeks': {'chemotherapy': None, 'targeted_therapy': None, 'immunotherapy': None, 'investigational': None, 'radiation': None, 'major_surgery': None}}, 'brain_metastases': 'excluded', 'brain_mets_stable_duration_weeks': None, 'organ_function': {'anc': None, 'platelets': None, 'hemoglobin_g_per_dl': None, 'creatinine_clearance_ml_min': None, 'bilirubin_x_uln': None, 'ast_alt_x_uln': None, 'albumin_g_per_dl': None}, 'cardiac': {'qtcf_ms_max': None, 'recent_mi_months_exclusion': None, 'nyha_class_max': None, 'lvef_percent_min': None}, 'contraception_required': None, 'other_inclusions': ['Sufficient proficiency in Dutch']}, 'exclusion': {'pregnant_or_breastfeeding': True, 'active_cns_metastases': True, 'uncontrolled_intercurrent_illness': None, 'grade_2_or_higher_neuropathy': None, 'history_of': ['prior radiotherapy to the brain', 'primary brain tumors', 'previous malignancy within 2 years ago except basal cell carcinoma or carcinoma in situ of the cervix'], 'concurrent_medications_disallowed': [], 'other_exclusions': ['Any systemic anticancer treatment during PCI or within 3 weeks before start PCI']}}","Inclusion Criteria: * \- Small Cell Lung Cancer patients (stage I-III or stage IV without clinical or radiological evidence of brain metastases) candidate for PCI, i.e. without progressive disease after chemo-radiotherapy in stage I-III or after a remission after chemotherapy in stage IV * Sufficient proficiency in Dutch Exclusion Criteria: * Prior radiotherapy to the brain * Clinical evidence for brain metastases or primary brain tumors- Evidence of progressive extracranial metastatic disease * Previous malignancy \< 2 years ago except for adequately treated basal cell carcinoma of the skin and carcinoma in situ of the cervix * Any systemic anticancer treatment during PCI or within 3 weeks before start PCI * Pregnancy or lactation"
5,"{'trial_id': None, 'trial_category': 'therapeutic_interventional', 'age': {'min': 20, 'max': None}, 'region_specific_age': {'japan_min': None}, 'inclusion': {'disease': {'confirmed_by': None, 'cancer_type': 'non-small cell lung cancer', 'histology_subtype': '', 'stage': 'stage III or IV', 'stage_list': ['III', 'IV'], 'metastatic': None, 'measurable_disease_recist': None, 'biomarker_required': []}, 'performance_status': {'scale': 'ECOG', 'min': None, 'max': None}, 'life_expectancy_weeks': None, 'prior_therapy': {'required': [], 'allowed': [], 'disallowed': ['prior EGFR TKI'], 'max_lines_systemic': 0, 'washout_weeks': {'chemotherapy': None, 'targeted_therapy': None, 'immunotherapy': None, 'investigational': None, 'radiation': None, 'major_surgery': None}}, 'brain_metastases': None, 'brain_mets_stable_duration_weeks': None, 'organ_function': {'anc': None, 'platelets': None, 'hemoglobin_g_per_dl': None, 'creatinine_clearance_ml_min': None, 'bilirubin_x_uln': None, 'ast_alt_x_uln': None, 'albumin_g_per_dl': None}, 'cardiac': {'qtcf_ms_max': None, 'recent_mi_months_exclusion': None, 'nyha_class_max': None, 'lvef_percent_min': None}, 'contraception_required': None, 'other_inclusions': ['Able to speak, write, read Chinese', 'Clear consciousness, no cognitive impairment', 'Receiving treatment at Taipei medical center']}, 'exclusion': {'pregnant_or_breastfeeding': None, 'active_cns_metastases': None, 'uncontrolled_intercurrent_illness': None, 'grade_2_or_higher_neuropathy': None, 'history_of': [], 'concurrent_medications_disallowed': [], 'other_exclusions': []}}","Inclusion Criteria: 1. Patients older than 20 years of age. 2. The first time to accept the target drug treatment (Iressa, Tarceva) treatment, diagnosis of lung cancer third or fourth stage of the women. 3. There are computer equipment at home to play the disc. 4. can speak, write, read Chinese, clear consciousness, no cognitive impairment. 5. In the medical center of Taipei, a medical department, medical or surgical clinic for treatment. Exclusion Criteria: * none"


In [None]:
# df_1[["eligibility_json","Eligibility"]].iloc[11:20]

In [18]:
df_1_nice = df_1.copy()

In [19]:
# ============= Save CSV ============
csv_name = "mavrik_trials_parse+preview.csv"
df_1_nice.to_csv(csv_name, index=False)
print("Saved:", csv_name)

Saved: mavrik_trials_parse+preview.csv


In [20]:
df_1_nice = pd.read_csv(csv_name)

In [22]:
pd.reset_option('display.max_colwidth')
df_1_nice

Unnamed: 0,NCT_ID,Title,Official_Title,Status,Start_Date,Primary_Completion,Study_Type,Phases,Enrollment,Conditions,...,Min_Age,Max_Age,Lead_Sponsor,Brief_Summary,Eligibility,Primary_Measures,Secondary_Measures,Interventions,elig_len,eligibility_json
0,NCT00009971,Fenretinide in Treating Patients With Recurren...,Phase II Trial Of Fenretinide (NSC-374551; IND...,COMPLETED,2000-11,2005-09,INTERVENTIONAL,PHASE2,32,Lung Cancer,...,18 Years,,National Cancer Institute (NCI),Phase II trial to study the effectiveness of f...,DISEASE CHARACTERISTICS: * Histologically or c...,,,DRUG:fenretinide,1815,"{'trial_id': None, 'trial_category': 'therapeu..."
1,NCT03836352,"Study of an Immunotherapeutic, DPX-Survivac, i...","A Phase 2, Open-label, Multicenter, Study of a...",ACTIVE_NOT_RECRUITING,2018-12-21,2023-12-31,INTERVENTIONAL,PHASE2,184,"Ovarian Cancer, Hepatocellular Carcinoma, Non-...",...,18 Years,,"ImmunoVaccine Technologies, Inc. (IMV Inc.)",This study will assess the safety and efficacy...,Key Inclusion Criteria: * Subjects with advanc...,Efficacy as measured by objective response rat...,Objective response rate | Duration of response...,OTHER:DPX-Survivac | DRUG:Cyclophosphamide | D...,2335,"{'trial_id': None, 'trial_category': 'therapeu..."
2,NCT04421352,Safety and Tolerability Evaluation of Low-dose...,"A Phase Ia/Ib, Open-Label, Multiple-Dose, Dose...",COMPLETED,2021-01-12,2023-12-22,INTERVENTIONAL,PHASE1,11,Relapsed Small Cell Lung Cancer,...,18 Years,75 Years,CStone Pharmaceuticals,A Phase Ia/Ib Safety and Tolerability Evaluati...,Inclusion Criteria: 1. Patients with histologi...,To determine the recommended radiation dose an...,Incidence of Adverse Events (AE) and Dose-Limi...,DRUG:CS1001,1904,"{'trial_id': None, 'trial_category': 'therapeu..."
3,NCT01774526,Molecular Epidemiology of Lung Adenocarcinoma ...,Molecular Epidemiology of Lung Adenocarcinoma ...,UNKNOWN,2010-12,2016-12,INTERVENTIONAL,,40,Lung Cancer,...,21 Years,,"National University Hospital, Singapore",Lung Cancer continues to be the major cause of...,Inclusion Criteria: * Asian ethnicity * Age \>...,Descriptive study of our patients with lung ad...,,OTHER:Characterise the molecular epidemiology ...,535,"{'trial_id': None, 'trial_category': 'therapeu..."
4,NCT01780675,Hippocampus Avoidance PCI vs PCI,Prophylactic Cranial Irradiation With or Witho...,COMPLETED,2013-04,2019-04-01,INTERVENTIONAL,PHASE3,168,"Small Cell Lung Cancer, Lung Cancer",...,18 Years,,The Netherlands Cancer Institute,Using Intensity Modulated radiotherapy it is p...,Inclusion Criteria: * \- Small Cell Lung Cance...,neurocognitive decline,safety,RADIATION:Radiation Prophylactic Cranial Irrad...,741,"{'trial_id': None, 'trial_category': 'therapeu..."
5,NCT03231111,The Effectiveness of Using Preventive Multimed...,The Effectiveness of Using Preventive Multimed...,UNKNOWN,2017-07-25,2018-07-01,INTERVENTIONAL,,60,Lung Cancer,...,20 Years,80 Years,National Taiwan University Hospital,The Effectiveness of Using Preventive Multimed...,Inclusion Criteria: 1. Patients older than 20 ...,skindex-29,,OTHER:Preventive Multimedia Teaching Program,469,"{'trial_id': None, 'trial_category': 'therapeu..."
