In [1]:
# prepare the key for openai and pubmed
import os

# E2B sandbox API
with open("e2b.key", "r") as f:
    E2B_API_KEY = f.read()
    os.environ["E2B_API_KEY"] = E2B_API_KEY

# OpenAI API key
with open("openai.key", "r") as f:
    OPENAI_API_KEY_GLOBAL = f.read()
    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY_GLOBAL

#  Pubmed API key
with open("pubmed.key", "r") as f:
    DEFAULT_PUBMED_API_KEY = f.read()
    os.environ["PUBMED_API_KEY"] = DEFAULT_PUBMED_API_KEY

# Get example paper's content

In [2]:
from trialmind.pubmed import pmid2biocxml, parse_bioc_xml
pmid_list = [
 '26451310',
    ]
res = pmid2biocxml(pmid_list)
res = [parse_bioc_xml(r) for r in res]
# transform the parsed xml into paper content
papers = []
for parsed in res:
    paper_content = []
    for parsed_ in parsed["passage"]:
        paper_content.append(parsed_['content'])
    paper_content = "\n".join(paper_content)
    papers.append(paper_content)
papers

["Tolerance and efficacy of autologous or donor-derived T cells expressing CD19 chimeric antigen receptors in adult B-ALL with extramedullary leukemia\nThe engineering of T lymphocytes to express chimeric antigen receptors (CARs) aims to establish T cell-mediated tumor immunity rapidly. In this study, we conducted a pilot clinical trial of autologous or donor- derived T cells genetically modified to express a CAR targeting the B-cell antigen CD19 harboring 4-1BB and the CD3Î¶ moiety. All enrolled patients had relapsed or chemotherapy-refractory B-cell lineage acute lymphocytic leukemia (B-ALL). Of the nine patients, six had definite extramedullary involvement, and the rate of overall survival at 18Â\xa0weeks was 56%. One of the two patients who received conditioning chemotherapy achieved a three-month durable complete response with partial regression of extramedullary lesions. Four of seven patients who did not receive conditioning chemotherapy achieved dramatic regression or a mixed r

# Study result extraction

In [3]:
from trialmind.api import StudyResultExtraction
api = StudyResultExtraction()
results = api.run(
    papers=papers,
    outcome = "Complete Response Rate: defined as the absence of detectable cancer",
    cohort = "Patients with refractory or relapsed hematologic or solid malignancies. Treated by CAR-T therapy.",
    llm = "openai-gpt-4o"
)

In [4]:
results

[[{'Group Name': 'Patients with refractory or relapsed hematologic or solid malignancies treated by CAR-T therapy',
   'N': 9,
   'Results': '6/9 patients showed objective clinical response; 2/9 achieved complete response'}]]

# Study result standardization

In [5]:
from trialmind.api import StudyResultStandardization
from trialmind.sandbox import E2BSandbox
import json

# create the sandbox to execute the code
sandbox = E2BSandbox()

api = StudyResultStandardization()
standardized = api.run(
    population = "Patients with refractory or relapsed hematologic or solid malignancies",
    intervention = "CAR-T therapy",
    comparator = "",
    outcome = "Complete Response Rate: defined as the absence of detectable cancer",
    data_type="binary",
    llm = "openai-gpt-4o",
    results = [json.dumps(results[0], indent=4)],
    sandbox_id = sandbox.sandbox_id,
)

In [9]:
print(standardized[0]['raw_data'])

[{'Group': 'Patients with refractory or relapsed hematologic or solid malignancies treated by CAR-T therapy', 'N': 9, 'Complete Response Rate': 2}]


In [10]:
print(standardized[0]['code'])


import pandas as pd
import numpy as np

# df = pd.DataFrame([{'Group': 'Patients with refractory or relapsed hematologic or solid malignancies treated by CAR-T therapy', 'N': 9, 'Complete Response Rate': 2}])

def classify_arms(df: pd.DataFrame):
    # Classify arms based on the research question
    df['Group'] = df['Group'].apply(lambda x: 'Experimental' if 'CAR-T therapy' in x else 'Control')
    return df

def consolidate_data(df: pd.DataFrame):
    # Consolidate data for each group
    experimental = df[df['Group'] == 'Experimental']
    control = df[df['Group'] == 'Control']
    
    exp_event = experimental['Complete Response Rate'].sum()
    exp_total = experimental['N'].sum()
    exp_no_event = exp_total - exp_event
    
    if not control.empty:
        ctrl_event = control['Complete Response Rate'].sum()
        ctrl_total = control['N'].sum()
        ctrl_no_event = ctrl_total - ctrl_event
    else:
        ctrl_event = 0
        ctrl_total = 0
        ctrl_no_event = 0
  