In [None]:
from model import *

In [None]:
from transformers import StoppingCriteria, StoppingCriteriaList
import torch

stop_list = ["\n\n", "\n\n\n", "Task:\nBelow"]
# stop_list = ['\nHuman:', '\n```\n']
stop_token_ids = [tokenizer(x, add_special_tokens=False)['input_ids'] for x in stop_list]
stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
print(stop_token_ids)

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [28]:
from langchain.llms import HuggingFacePipeline

generate_text = transformers.pipeline(
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    stopping_criteria=stopping_criteria,  # without this model rambles during chat
    temperature=0.1,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=256,  # max number of tokens to generate in the output
    repetition_penalty=1.1,  # without this output begins repeating
    do_sample=True,
    # streamer = transformers.TextStreamer(tokenizer)
)

llm = HuggingFacePipeline(pipeline=generate_text)

In [73]:
from prompt_examples.summarization_examples import prompt, prompt_zero, prompt_zero_minimal, examples

In [None]:
print(prompt_zero_minimal.format(criteria="""Inclusion Criteria
    •	COHORT A: At least one measurable CNS metastasis, defined as >= 10 mm in at least one dimension
    •	COHORT A: Unequivocal evidence of new and/or progressive brain metastases, and at least one of the following scenarios:
    •	Treated with stereotactic radiosurgery (SRS) or surgery with residual un-treated lesions remaining. Such participants are eligible for immediate enrollment on this study providing that at least one untreated lesion is measurable
    •	Participants who have had prior whole brain radiotherapy (WBRT) and/or SRS and then whose lesions have subsequently progressed or who have new lesions are also eligible. In this case, lesions which have been treated with SRS may be considered as target lesions if there is unequivocal evidence, in the opinion of the treating physician, of progression following SRS
    •	Participants who have not previously been treated with cranial radiation (e.g., WBRT or SRS) are eligible to enter the study, but such participants must be asymptomatic from their CNS metastases and not requiring corticosteroids for symptom control
    •	Participants who present with systemic stable/absent or progressive disease are eligible to this trial, as long as they fulfill one of the above criteria
    •	COHORT B: New and/or progressive brain metastasis(es) with clinical indication for resection
    •	Pathologically confirmed HER2-positive MBC by local laboratory with the following requirements: HER2 overexpressed or amplified (immunohistochemistry of 3+ or HER2 gene amplification by in situ hybridization with a ratio of HER2-gene signals to centromere 17 signals >= 2.0 or average HER2 copy number >= 6.0 signals/cells)
    •	Eastern Cooperative Oncology Group (ECOG) performance status of =< 2
    •	Left ventricular ejection fraction (LVEF) >= 50% by echocardiogram (ECHO) or multigated acquisition (MUGA) scan

    Exclusion Criteria
    •	Visceral crisis or impending visceral crisis at time of screening
    •	CNS complications for whom urgent neurosurgical intervention is indicated (e.g., resection, shunt placement)
    •	Known leptomeningeal metastases (defined as positive CSF cytology and/or unequivocal radiological evidence of clinically significant leptomeningeal involvement. CSF sampling is not required in the absence of suggestive symptoms to exclude leptomeningeal involvement)
    •	Patients with known contraindication to magnetic resonance imaging (MRI) (e.g., due to pacemaker, ferromagnetic implants, claustrophobia, extreme obesity, hypersensitivity, etc.). However, head computed tomography (CT) with contrast may be used in place of MRI at baseline and throughout the trial if MRI is contraindicated and a participant’s brain metastases are clearly measurable by head CT
    •	Chemotherapy or targeted therapy within 14 days prior to initiation of protocol therapy. No washout is required for trastuzumab
    •	Has received prior therapy with a PI3K or mTOR inhibitor
    •	No washout is required for endocrine therapy. If a patient has been on ovarian suppression for at least 28 days prior to initiation of study treatment, continuation of ovarian suppression is permitted on protocol. Starting a new endocrine therapy during protocol therapy is not permitted
    •	Current use or history of receiving a non-approved, investigational treatment within 14 days prior to initiation of protocol therapy
    •	Subjects with a history of hypersensitivity to compounds of similar biologic composition to paxalisib (GDC-0084) or any constituent of the product
    •	The subject has an uncontrolled intercurrent illness, including, but not limited to, ongoing or active infection, uncontrolled hypertension, unstable angina pectoris, uncontrolled cardiac arrhythmia, congestive heart failure-New York Heart Association class III or IV, active ischemic heart disease, myocardial infarction within the previous six months, uncontrolled diabetes mellitus (DM), gastric or duodenal ulceration diagnosed within the previous 6 months, chronic liver or renal disease, or severe malnutrition. If a participant has controlled DM but is unable to monitor blood sugars at home, they will be excluded from the trial
"""))

In [24]:
from token_counting import *
globalize_token_metrics(examples)
print('  avg prompt:', AVG_PROMPT_LEN)
print('  min prompt:', MIN_PROMPT_LEN)
print('  max prompt:', MAX_PROMPT_LEN)
print('avg response:', AVG_RES_LEN)

  avg prompt: 698
  min prompt: 698
  max prompt: 698
avg response: 214


In [76]:
from langchain.chains import LLMChain

llm_chain = LLMChain(llm=llm, prompt=prompt)
llm_chain_zero = LLMChain(llm=llm, prompt=prompt_zero)
llm_chain_zero_minimal = LLMChain(llm=llm, prompt=prompt_zero_minimal)

In [None]:
import langchain
langchain.debug = True
langchain.verbose = True

n = '02'
folder = f'test_results/trial{n}'

with open(f'{folder}/unstructured_ec.txt', encoding='utf-8') as filein:
    doc = filein.read()

results = llm_chain.invoke(input={'criteria': doc})
results_zero = llm_chain_zero.invoke(input={'criteria': doc})
results_zero_minimal = llm_chain_zero_minimal.invoke(input={'criteria': doc})

with open(f'{folder}/summarization_task_output.txt', 'w', encoding='utf-8') as fileout:
    fileout.write(results['text'])
with open(f'{folder}/summarization_task_zeroshot_output.txt', 'w', encoding='utf-8') as fileout:
    fileout.write(results_zero['text'])
with open(f'{folder}/summarization_task_minimal_output.txt', 'w', encoding='utf-8') as fileout:
    fileout.write(results_zero_minimal['text'])

In [79]:
# Copy the prompt that Langchain logs
prompt_used = "Task:\nYou are in the role of an abstractor who will analyze eligibility criteria for a clinical trial and represent the information as a list of individual criteria in a tabular format that will contain the following columns: \nType: listing whether criterion is an Exclusion or Inclusion criterion\nOriginal Text: the original text of the criterion\nDisease/Condition: If the criterion contains a disease or condition name it by its canonical name\nProcedure: If the criterion contains a therapeutic procedure name it by its canonical name\nDrug:  If the criterion contains a therapeutic drug name it by its canonical name\nBiomarker:  If the criterion contains a biomarker name it by its canonical name\nComputable Rule: Translate the criteria into a logical expression that could be interpreted programmatically\nHere is the criteria to analyze:\n    Inclusion Criteria\n    •\tAge 18 or older\n    •\tWilling and able to provide informed consent\n    •\tMetastatic breast cancer, biopsy proven\n    o\tEstrogen receptor (ER)+/HER2-, defined as > 5% ER+ staining\n    o\tHER2+ (regardless of ER status), including HER2-low and high expressors\n    •\tHistory of at least 6 months, sustained response to systemic therapy (clinically or radiographically defined as complete or stable response without progression)\n    •\tIsolated site of disease progression on fludeoxyglucose F-18 (FDG) positron emission tomography (PET) scan\n    •\tConsented to 12-245\n    •\tEastern Cooperative Oncology Group (ECOG) performance status 0-1\n\n    Exclusion Criteria\n    •\tPregnancy\n    •\tSerious medical comorbidity precluding radiation, including connective tissue disorders\n    •\tIntracranial disease (including previous intracranial involvement)\n    •\tPrevious radiotherapy to the intended treatment site that precludes developing a treatment plan that respects normal tissue tolerances \n\n| Type | Original Text | Disease/Condition | Procedure | Drug | Biomarker | Computable Rule |\n| --- | --- | --- | --- | --- | --- | --- |\n| Inclusion | Metastatic breast cancer, biopsy proven | Metastatic breast cancer | | | | diagnosis == \"Metastatic breast cancer\" |\n| Inclusion | Estrogen receptor (ER)+/HER2-, defined as > 5% ER+ staining | | | | HER2- | HER2 > 5% ER+ staining |\n| Inclusion | HER2+ (regardless of ER status), including HER2-low and high expressors | | | | HER2+ | |\n| Exclusion | Previous radiotherapy to the intended treatment site that precludes developing a treatment plan that respects normal tissue tolerances | | Prior radiation therapy | | | Prior radiation therapy is True |\n\n\nTask:\nYou are in the role of an abstractor who will analyze eligibility criteria for a clinical trial and represent the information as a list of individual criteria in a tabular format that will contain the following columns:\nType: listing whether criterion is an Exclusion or Inclusion criterion\nOriginal Text: the original text of the criterion\nDisease/Condition: If the criterion contains a disease or condition name it by its canonical name\nProcedure: If the criterion contains a therapeutic procedure name it by its canonical name\nDrug:  If the criterion contains a therapeutic drug name it by its canonical name\nBiomarker:  If the criterion contains a biomarker name it by its canonical name\nComputable Rule: Translate the criteria into a logical expression that could be interpreted programmatically\nHere is the criteria to analyze:\n    Inclusion Criteria\no\tCompletion of informed consent prior to any study specific procedures \no\tPatients must agree to tissue collection for correlative studies at the specified timepoints \no\tPatients must consent to the MD Anderson Immunotherapy Platform laboratory protocol PA13-0291 \no\tMale aged 18 years and above \no\tHistologically or cytologically confirmed prostate carcinoma \no\tPresence of metastatic disease documented on imaging studies (bone scan, computed tomography [CT] and/or magnetic resonance imaging [MRI] scans)\no\tPatients must meet at least one of the following AVPC criteria:\n•\tHistologically proven small cell (neuroendocrine) prostate carcinoma\n•\tExclusive visceral metastases\n•\tPredominantly lytic bone metastases identified by plain x-ray or CT scan\n•\tBulky (>= 5 cm in longest dimension) lymphadenopathy or high-grade tumor mass in prostate/pelvis\n•\tLow PSA (=< 10 ng/mL) at initial presentation (prior to androgen ablation or at symptomatic progression in the castrate-setting) plus high volume (>= 20) bone metastases\n•\tElevated serum lactate dehydrogenase (LDH) (>= 2 x ULN) or elevated serum carcinoembryonic antigen (CEA) (>= 2 x ULN) in the absence of other etiologies\n•\tShort interval (=< 180 days) to castrate-resistant progression following initiation of hormonal therapy\n•\tKnown loss or mutation (by Clinical Laboratory Improvement Act [CLIA] certified molecular testing, immunohistochemistry [IHC] and/or deoxyribonucleic acid [DNA] sequencing) in at least 2 of Tp53, RB1 and PTEN defined as:\n\tAVPC determination by immunohistochemistry. As previously described, tumor samples are considered negative (and thus abnormal) for RB1 and PTEN if their labeling index is =< 10% and positive (and thus aberrant) for Tp53 if their labeling index is >= 10%, where the labeling index is defined as the percentage of positive cells, and calculated as the number of positively stained epithelial cells divided by the total number of epithelial cells, at X200 magnification\n\tAVPC determination by DNA sequencing. As previously described, the TP53, RB1 and PTEN genes will be considered aberrant if they contain exonic nonsynonymous missense or stop-gain mutations, frameshift or non frameshift indels (insertions or deletions), and/or copy number losses\n\tPatients who have castration-resistant disease progression per RECIST in the absence of PSA values rising to >= 1.0ng/ml as per Prostate Cancer Working Group 3 (PCWG3) PSA progression criteria (inclusion--- t) | \no\tPatients must have documented evidence of progressive disease as defined by any of the following: a) PSA progression: minimum of 2 rising values (3 measurements) obtained a minimum of 7 days apart with the last result being at least >= 1.0 ng/mL; b) New or increasing non-bone disease (RECIST); c) Positive bone scan with 2 or more new lesions (Prostate Cancer Working Group 3 [PCWG3]); d) Increasing symptoms unequivocally attributed to disease progression as judged by the treating physician and the principal investigator (PI) \no\tSurgically or ongoing medically castrated, with baseline testosterone levels of =< 50 ng/dL (=< 2.0 nM). Exception: Patients with de novo primary small cell carcinoma of the prostate may begin chemotherapy on study once treatment with an LHRH agonist or antagonist has been initiated, even if testosterone levels have not reached =< 50ng/dL\no\tEastern Cooperative Oncology Group (ECOG) performance status of =< 2\no\tHemoglobin >= 10.0 g/dL (unless due to bone marrow infiltration by tumor, in which case hemoglobin > 8 g/dL is allowed) (within 7 days prior to treatment registration). Patient may have blood transfusions prior to study enrollment\no\tAbsolute neutrophil count (ANC) >= 1.5 x 10^9/L (unless due to bone marrow infiltration by tumor, in which case ANC > 1,000/mm^3 is allowed) (within 7 days prior to treatment registration)\no\tWhite blood cells (WBC) > 3 x 10^9/L (unless due to bone marrow infiltration by tumor, in which case WBC > 2 x 10^9/L is allowed) (within 7 days prior to treatment registration) \no\tNo features suggestive of myelodysplastic syndrome (MDS)/acute myeloid leukemia (AML) on peripheral blood smear (within 7 days prior to treatment registration)\no\tPlatelet count >= 100 x 10^9/L (unless due to bone marrow infiltration by tumor, in which case platelet > 50,000/mm^3 is allowed) (within 7 days prior to treatment registration) \no\tTotal bilirubin =< 1.5 x institutional upper limit of normal (ULN) (except for patients with known Gilbert’s disease) (within 7 days prior to treatment registration) \no\tAspartate aminotransferase (AST) (serum glutamic oxaloacetic transaminase [SGOT]) and alanine aminotransferase (ALT) (serum glutamic pyruvic transaminase [SGPT]) =< 2.5 x institutional upper limit of normal (unless liver metastases are present in which case it must be =< 5 x ULN) (within 7 days prior to treatment registration)\no\tCalculated creatinine clearance (Cockcroft-Gault equation) >= 30 mL/min (within 7 days prior to treatment registration) \no\tAble to swallow study drugs whole as a tablet/capsule\no\tPatients who have partners of childbearing potential (e.g. female that has not been surgically sterilized or who are not amenorrheic for >= 12 months) must be willing to use a method of birth control in addition to adequate barrier protection as determined to be acceptable by the investigator during the study and for 3 months after last dose of niraparib administration and 5 months after the last dose of cetrelimab. In addition men should not donate sperm during this period. Please note that the efficacy of hormonal contraception may be decreased if administered with niraparib\no\tPatient is willing and able to comply with the protocol for the duration of the study including undergoing treatment and scheduled visits and examinations including follow up \n\nExclusion Criteria\n\tAny prior treatment for castration-resistant prostate cancer (CRPC) with carboplatin, cisplatin, cabazitaxel, PARP-inhibitor or an anti-PD1 or anti-PDL1 inhibitor \n\tPatients who have received more than one line of chemotherapy. Any number of prior hormonal or targeted therapies are allowed \n\tPatients who have not recovered from adverse events secondary to systemic therapy (except for luteinizing hormone-releasing hormone [LHRH] agonist or antagonist treatment for prostate cancer, and bisphosphonates or RANK ligand inhibitors for bone strengthening), major surgery or radiotherapy for the treatment of prostate cancer to a grade =< 2 \n\tAny unresolved toxicity (Common Terminology Criteria for Adverse Events [CTCAE] grade >= 2) from previous anti-cancer therapy. Subjects with irreversible toxicity that is not reasonably expected to be exacerbated by the investigational product may be included (e.g., hearing loss, peripherally neuropathy) \n\tHistory or current diagnosis of MDS/AML \n\tActive uncontrolled infection (patients completing a course of antibiotic or antiviral therapy whose infection is deemed to be controlled may be allowed on study after discussion with the PI; the PI will serve as the final arbiter regarding eligibility) \n\tActive or symptomatic viral hepatitis or chronic liver disease (inclusion--- f) | \n\tA history of pneumonitis or extensive bilateral lung disease of non-malignant etiology \n\tA malignancy (other than the one treated in this study) which has a >= 30% probability of recurrence within 24 months (except for adequately treated non-melanoma skin cancer, curatively treated in-situ cancer of the cervix or Ta urothelial carcinomas) \n\tAny underlying medical or psychiatric condition, which in the opinion of the investigator, will make the administration of study drug hazardous or obscure the interpretation of adverse events. Examples include, but are not limited to, uncontrolled ventricular arrhythmia, recent (within 3 months) myocardial infarction, superior vena cava syndrome, extensive bilateral lung disease on high resolution computed tomography (HRCT) scan, uncontrolled seizures, history of allogeneic organ transplant, history of primary immunodeficiency or any psychiatric disorder that prohibits obtaining informed consent\n\tPatients unable to swallow orally administered medication and patients with gastrointestinal disorders likely to interfere with absorption of the study medication\n\tPatients with symptomatic uncontrolled brain metastases or spinal cord compressions. A scan to confirm the absence of brain metastases is not required\n\tPatients with a known hypersensitivity to niraparib, carboplatin, cabazitaxel or an anti-PD1 or anti-PDL1 inhibitor \n\tCurrent or prior use of immunosuppressive medication within 21 days before the first dose of cetrelimab, with the exceptions of intranasal and inhaled corticosteroids or systemic corticosteroids at physiological doses, which are not to exceed 10 mg/day of prednisone, or an equivalent corticosteroid or steroids as pre-medication for hypersensitivity reactions (e.g. CT scan premedication)\n\tActive or prior documented autoimmune or inflammatory disorders (including inflammatory bowel disease [e.g., colitis or Crohn's disease], diverticulitis [with the exception of diverticulosis], systemic lupus erythematosus, Sarcoidosis syndrome, or Wegener syndrome [granulomatosis with polyangiitis, Graves' disease, rheumatoid arthritis, hypophysitis, uveitis, etc.]). The following are exceptions to this criterion:\n•\tPatients with vitiligo or alopecia\n•\tPatients with hypothyroidism (e.g., following Hashimoto syndrome) stable on hormone replacement\n•\tAny chronic skin condition that does not require systemic therapy\n•\tPatients without active disease in the last 5 years may be included but only after consultation with the study physician\n•\tPatients with celiac disease controlled by diet alone\no\tReceipt of live attenuated vaccination within 30 days of receiving cetrelimab (inclusion--- f) | \no\tPrisoners or subjects who are involuntarily incarcerated \no\tSubjects who are compulsorily detained for treatment of either a psychiatric or physical (e.g. infectious disease) illness"
prompt_zero_used = "Task:\nYou are in the role of an abstractor who will analyze eligibility criteria for a clinical trial and represent the information as a list of individual criteria in a tabular format that will contain the following columns:\nType: listing whether criterion is an Exclusion or Inclusion criterion\nOriginal Text: the original text of the criterion\nDisease/Condition: If the criterion contains a disease or condition name it by its canonical name\nProcedure: If the criterion contains a therapeutic procedure name it by its canonical name\nDrug:  If the criterion contains a therapeutic drug name it by its canonical name\nBiomarker:  If the criterion contains a biomarker name it by its canonical name\nComputable Rule: Translate the criteria into a logical expression that could be interpreted programmatically\nHere is the criteria to analyze:\n    Inclusion Criteria\no\tCompletion of informed consent prior to any study specific procedures \no\tPatients must agree to tissue collection for correlative studies at the specified timepoints \no\tPatients must consent to the MD Anderson Immunotherapy Platform laboratory protocol PA13-0291 \no\tMale aged 18 years and above \no\tHistologically or cytologically confirmed prostate carcinoma \no\tPresence of metastatic disease documented on imaging studies (bone scan, computed tomography [CT] and/or magnetic resonance imaging [MRI] scans)\no\tPatients must meet at least one of the following AVPC criteria:\n•\tHistologically proven small cell (neuroendocrine) prostate carcinoma\n•\tExclusive visceral metastases\n•\tPredominantly lytic bone metastases identified by plain x-ray or CT scan\n•\tBulky (>= 5 cm in longest dimension) lymphadenopathy or high-grade tumor mass in prostate/pelvis\n•\tLow PSA (=< 10 ng/mL) at initial presentation (prior to androgen ablation or at symptomatic progression in the castrate-setting) plus high volume (>= 20) bone metastases\n•\tElevated serum lactate dehydrogenase (LDH) (>= 2 x ULN) or elevated serum carcinoembryonic antigen (CEA) (>= 2 x ULN) in the absence of other etiologies\n•\tShort interval (=< 180 days) to castrate-resistant progression following initiation of hormonal therapy\n•\tKnown loss or mutation (by Clinical Laboratory Improvement Act [CLIA] certified molecular testing, immunohistochemistry [IHC] and/or deoxyribonucleic acid [DNA] sequencing) in at least 2 of Tp53, RB1 and PTEN defined as:\n\tAVPC determination by immunohistochemistry. As previously described, tumor samples are considered negative (and thus abnormal) for RB1 and PTEN if their labeling index is =< 10% and positive (and thus aberrant) for Tp53 if their labeling index is >= 10%, where the labeling index is defined as the percentage of positive cells, and calculated as the number of positively stained epithelial cells divided by the total number of epithelial cells, at X200 magnification\n\tAVPC determination by DNA sequencing. As previously described, the TP53, RB1 and PTEN genes will be considered aberrant if they contain exonic nonsynonymous missense or stop-gain mutations, frameshift or non frameshift indels (insertions or deletions), and/or copy number losses\n\tPatients who have castration-resistant disease progression per RECIST in the absence of PSA values rising to >= 1.0ng/ml as per Prostate Cancer Working Group 3 (PCWG3) PSA progression criteria (inclusion--- t) | \no\tPatients must have documented evidence of progressive disease as defined by any of the following: a) PSA progression: minimum of 2 rising values (3 measurements) obtained a minimum of 7 days apart with the last result being at least >= 1.0 ng/mL; b) New or increasing non-bone disease (RECIST); c) Positive bone scan with 2 or more new lesions (Prostate Cancer Working Group 3 [PCWG3]); d) Increasing symptoms unequivocally attributed to disease progression as judged by the treating physician and the principal investigator (PI) \no\tSurgically or ongoing medically castrated, with baseline testosterone levels of =< 50 ng/dL (=< 2.0 nM). Exception: Patients with de novo primary small cell carcinoma of the prostate may begin chemotherapy on study once treatment with an LHRH agonist or antagonist has been initiated, even if testosterone levels have not reached =< 50ng/dL\no\tEastern Cooperative Oncology Group (ECOG) performance status of =< 2\no\tHemoglobin >= 10.0 g/dL (unless due to bone marrow infiltration by tumor, in which case hemoglobin > 8 g/dL is allowed) (within 7 days prior to treatment registration). Patient may have blood transfusions prior to study enrollment\no\tAbsolute neutrophil count (ANC) >= 1.5 x 10^9/L (unless due to bone marrow infiltration by tumor, in which case ANC > 1,000/mm^3 is allowed) (within 7 days prior to treatment registration)\no\tWhite blood cells (WBC) > 3 x 10^9/L (unless due to bone marrow infiltration by tumor, in which case WBC > 2 x 10^9/L is allowed) (within 7 days prior to treatment registration) \no\tNo features suggestive of myelodysplastic syndrome (MDS)/acute myeloid leukemia (AML) on peripheral blood smear (within 7 days prior to treatment registration)\no\tPlatelet count >= 100 x 10^9/L (unless due to bone marrow infiltration by tumor, in which case platelet > 50,000/mm^3 is allowed) (within 7 days prior to treatment registration) \no\tTotal bilirubin =< 1.5 x institutional upper limit of normal (ULN) (except for patients with known Gilbert’s disease) (within 7 days prior to treatment registration) \no\tAspartate aminotransferase (AST) (serum glutamic oxaloacetic transaminase [SGOT]) and alanine aminotransferase (ALT) (serum glutamic pyruvic transaminase [SGPT]) =< 2.5 x institutional upper limit of normal (unless liver metastases are present in which case it must be =< 5 x ULN) (within 7 days prior to treatment registration)\no\tCalculated creatinine clearance (Cockcroft-Gault equation) >= 30 mL/min (within 7 days prior to treatment registration) \no\tAble to swallow study drugs whole as a tablet/capsule\no\tPatients who have partners of childbearing potential (e.g. female that has not been surgically sterilized or who are not amenorrheic for >= 12 months) must be willing to use a method of birth control in addition to adequate barrier protection as determined to be acceptable by the investigator during the study and for 3 months after last dose of niraparib administration and 5 months after the last dose of cetrelimab. In addition men should not donate sperm during this period. Please note that the efficacy of hormonal contraception may be decreased if administered with niraparib\no\tPatient is willing and able to comply with the protocol for the duration of the study including undergoing treatment and scheduled visits and examinations including follow up \n\nExclusion Criteria\n\tAny prior treatment for castration-resistant prostate cancer (CRPC) with carboplatin, cisplatin, cabazitaxel, PARP-inhibitor or an anti-PD1 or anti-PDL1 inhibitor \n\tPatients who have received more than one line of chemotherapy. Any number of prior hormonal or targeted therapies are allowed \n\tPatients who have not recovered from adverse events secondary to systemic therapy (except for luteinizing hormone-releasing hormone [LHRH] agonist or antagonist treatment for prostate cancer, and bisphosphonates or RANK ligand inhibitors for bone strengthening), major surgery or radiotherapy for the treatment of prostate cancer to a grade =< 2 \n\tAny unresolved toxicity (Common Terminology Criteria for Adverse Events [CTCAE] grade >= 2) from previous anti-cancer therapy. Subjects with irreversible toxicity that is not reasonably expected to be exacerbated by the investigational product may be included (e.g., hearing loss, peripherally neuropathy) \n\tHistory or current diagnosis of MDS/AML \n\tActive uncontrolled infection (patients completing a course of antibiotic or antiviral therapy whose infection is deemed to be controlled may be allowed on study after discussion with the PI; the PI will serve as the final arbiter regarding eligibility) \n\tActive or symptomatic viral hepatitis or chronic liver disease (inclusion--- f) | \n\tA history of pneumonitis or extensive bilateral lung disease of non-malignant etiology \n\tA malignancy (other than the one treated in this study) which has a >= 30% probability of recurrence within 24 months (except for adequately treated non-melanoma skin cancer, curatively treated in-situ cancer of the cervix or Ta urothelial carcinomas) \n\tAny underlying medical or psychiatric condition, which in the opinion of the investigator, will make the administration of study drug hazardous or obscure the interpretation of adverse events. Examples include, but are not limited to, uncontrolled ventricular arrhythmia, recent (within 3 months) myocardial infarction, superior vena cava syndrome, extensive bilateral lung disease on high resolution computed tomography (HRCT) scan, uncontrolled seizures, history of allogeneic organ transplant, history of primary immunodeficiency or any psychiatric disorder that prohibits obtaining informed consent\n\tPatients unable to swallow orally administered medication and patients with gastrointestinal disorders likely to interfere with absorption of the study medication\n\tPatients with symptomatic uncontrolled brain metastases or spinal cord compressions. A scan to confirm the absence of brain metastases is not required\n\tPatients with a known hypersensitivity to niraparib, carboplatin, cabazitaxel or an anti-PD1 or anti-PDL1 inhibitor \n\tCurrent or prior use of immunosuppressive medication within 21 days before the first dose of cetrelimab, with the exceptions of intranasal and inhaled corticosteroids or systemic corticosteroids at physiological doses, which are not to exceed 10 mg/day of prednisone, or an equivalent corticosteroid or steroids as pre-medication for hypersensitivity reactions (e.g. CT scan premedication)\n\tActive or prior documented autoimmune or inflammatory disorders (including inflammatory bowel disease [e.g., colitis or Crohn's disease], diverticulitis [with the exception of diverticulosis], systemic lupus erythematosus, Sarcoidosis syndrome, or Wegener syndrome [granulomatosis with polyangiitis, Graves' disease, rheumatoid arthritis, hypophysitis, uveitis, etc.]). The following are exceptions to this criterion:\n•\tPatients with vitiligo or alopecia\n•\tPatients with hypothyroidism (e.g., following Hashimoto syndrome) stable on hormone replacement\n•\tAny chronic skin condition that does not require systemic therapy\n•\tPatients without active disease in the last 5 years may be included but only after consultation with the study physician\n•\tPatients with celiac disease controlled by diet alone\no\tReceipt of live attenuated vaccination within 30 days of receiving cetrelimab (inclusion--- f) | \no\tPrisoners or subjects who are involuntarily incarcerated \no\tSubjects who are compulsorily detained for treatment of either a psychiatric or physical (e.g. infectious disease) illness"
prompt_zero_minimal_used = "Based on the following list of criteria, extract the diseases, biomarkers, and prior therapies for each. If you aren't sure how to categorize a criterion, please skip that line and continue on to the next.\n    Inclusion Criteria\n•\tPatients must have undergone complete surgical resection of their stage IIA, IIB, IIIA or IIIB non-squamous or squamous b NSCLC per American Joint Committee on Cancer (AJCC) 8th edition and have had negative margins. N3 disease is not allowed.  \n•\tBaseline chest computed tomography (CT) with or without contrast must be performed within 6 months (180 days) prior to randomization to ensure no evidence of disease; if clinically indicated additional imaging studies must be performed to rule out metastatic disease\n•\tEastern Cooperative Oncology Group (ECOG) performance status 0 or 1 \n•\tPatients must be registered to the ALCHEMIST-SCREEN (ALLIANCE A151216) trial prior to randomization \n•\tPositive for translocation or inversion events involving the ALK gene locus (e.g. resulting in EML4-ALK fusion) as defined by a Clinical Laboratory Improvement Act (CLIA)-approved test including: (1) translocation or inversion events involving the ALK gene locus (e.g. resulting in EML4-ALK fusion) as determined by the Vysis Break Point fluorescence in situ hybridization (FISH) assay; (2) ALK protein expression by immunohistochemistry (IHC); or (3) ALK rearrangement identified by next generation (NextGen) sequencing; this must have been performed:\no\tBy a local CLIA certified laboratory: Report must indicate the results as well as the CLIA number of the laboratory which performed the assay; tissue must be available for submission for central, retrospective confirmation of the ALK fusion status via ALCHEMIST-SCREEN (ALLIANCE A151216); OR\no\tPatient registered to and the ALK fusion status performed centrally on the ALCHEMIST-SCREEN (ALLIANCE A151216)\n•\tAll females of childbearing potential must have a blood or urine pregnancy test within 72 hours prior to randomization to rule out pregnancy; a female of childbearing potential is any woman, regardless of sexual orientation or whether they have undergone tubal ligation, who meets the following criteria: 1) has not undergone a hysterectomy or bilateral oophorectomy; or 2) has not been naturally postmenopausal for at least 24 consecutive months (i.e., has had menses at any time in the preceding 24 consecutive months)\n•\tWomen of childbearing potential and sexually active males must be strongly advised to practice abstinence or use an accepted and effective method of contraception\n•\tPatients must be adequately recovered from surgery at the time of randomization\n•\tThe minimum time requirement between date of surgery and randomization must be at least 4 weeks (28 days)\n•\tThe maximum time requirement between surgery and randomization must be:\no\t4 months (120 days) if no adjuvant chemotherapy was administered\no\t9 months (270 days) if adjuvant chemotherapy was administered\no\t11 months (330 days) if adjuvant chemotherapy and radiation therapy were administered\n•\tPatients must have completed any prior adjuvant chemotherapy or radiation therapy 2 or more weeks (6 or more weeks for mitomycin and nitrosoureas) prior to randomization and be adequately recovered at the time of randomization\nNOTE: Patients taking low dose methotrexate for non-malignant conditions and other cytotoxic agents for non-malignant conditions are allowed to continue treatment while on study\nNOTE: Neo-adjuvant chemotherapy or radiation therapy for the resected lung cancer is not permitted (inclusion--- t) | \n•\tPatients must have adequate organ function as defined by the following criteria within 2 weeks prior to randomization:\nNOTE: it is strongly encouraged that these tests take place no more than one week prior to randomization to meet the 2 week requirement for randomization\no\tSerum aspartate aminotransferase (AST) and serum alanine aminotransferase (ALT) =< 2.5 x upper limit of normal (ULN) \no\tTotal serum bilirubin =< 1.5 x ULN\no\tAbsolute neutrophil count (ANC) >= 1500/mm^3\no\tPlatelets >= 30,000/mm^3\no\tHemoglobin >= 8.0 g/dL\no\tSerum creatinine =< 2 x ULN\n•\tPrior to randomization patients with any non-hematologic toxicity from surgery, chemotherapy, or radiation must have recovered to grade =< 1 with the exception of alopecia and the criteria outlined\n\nExclusion Criteria\n•\tNo known interstitial fibrosis or interstitial lung disease\n•\tNo prior treatment with crizotinib or another ALK inhibitor\n•\tNo ongoing cardiac dysrhythmias of grade >= 2 National Cancer Institute (NCI) Common Terminology Criteria for Adverse Events (CTCAE) version 4.0, uncontrolled atrial fibrillation (any grade), or corrected QT (QTc) interval > 470 msec\n•\tNo use of medications, herbals, or foods that are known potent cytochrome P450, subfamily 3A, polypeptide 4 (CYP3A4) inhibitors or inducers, included but not limited to those outlined\n•\tPatients may not be receiving any other investigational agents while on study\n•\tPatients must NOT have uncontrolled intercurrent illness including, but not limited to, serious ongoing or active infection, symptomatic congestive heart failure, unstable angina pectoris, uncontrolled cardiac arrhythmia, or psychiatric illness/social situations that would limit compliance with study requirements \n•\tWomen must not be pregnant or breast-feeding because, based on the mechanism of action, crizotinib may cause fetal harm when administered during pregnancy; in animal studies, teratogenicity was not evident, but embryotoxic and fetotoxic effects were noted in rats at crizotinib exposures similar to and above those observed in humans at the recommended clinical dose\n•\tPatients must not have any history of locally advanced or metastatic cancer requiring systemic therapy within 5 years from randomization, with the exception of in-situ carcinomas and non-melanoma skin cancer; patients must have no previous primary lung cancer diagnosed concurrently or within the past 2 years"
prompt_len = get_token_len(prompt_used)
prompt_zero_len = get_token_len(prompt_zero_used)
prompt_zero_minimal_len = get_token_len(prompt_zero_minimal_used)

with open(f'{folder}/summarization_task_prompt.txt', 'w', encoding='utf-8') as fileout:
    fileout.write("""[metrics]
length={prompt_len}

[prompt]
{prompt}""".format(prompt=prompt_used, prompt_len=prompt_len))

with open(f'{folder}/summarization_task_zeroshot_prompt.txt', 'w', encoding='utf-8') as fileout:
    fileout.write("""[metrics]
length={prompt_len}

[prompt]
{prompt}""".format(prompt=prompt_zero_used, prompt_len=prompt_zero_len))

with open(f'{folder}/summarization_task_minimal_prompt.txt', 'w', encoding='utf-8') as fileout:
    fileout.write("""[metrics]
length={prompt_len}

[prompt]
{prompt}""".format(prompt=prompt_zero_minimal_used, prompt_len=prompt_zero_minimal_len))