In [3]:
from src import Icsr

import datasets
from collections import Counter
import matplotlib.pyplot as plt
import numpy as np
from math import log10
import seaborn as sns
import pandas as pd
import re
import pylatex

sns.set_style('whitegrid')


### Comparison of predictions

In [249]:
# load validation split
dataset = datasets.load_dataset("FAERS-PubMed/BioDEX-ICSR")
val = dataset['validation']


Using custom data configuration FAERS-PubMed--BioDEX-ICSR-40aa49fec6af4868
Found cached dataset parquet (/Users/kldooste/.cache/huggingface/datasets/FAERS-PubMed___parquet/FAERS-PubMed--BioDEX-ICSR-40aa49fec6af4868/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


  0%|          | 0/3 [00:00<?, ?it/s]

In [250]:
# post-process prediction
def postprocess(prediction):
    prediction = prediction.replace('\n', ' ')
    return prediction.lower().strip()

In [251]:
# load predictions
models_and_files = [
    ("flan-t5-large", "./predictions/generated-eval-predictions-flan-t5-large-s(2048)-t(258).txt"),
    ("gpt-4", "./predictions/generated-eval-predictions-gpt-4-0314-run01.txt")
]

models = [mf[0] for mf in models_and_files]

predictions = []
for _, file in models_and_files:
    with open(file, 'r') as fp:
        raw = fp.readlines()
    raw = [postprocess(p) for p in raw]
    predictions.append(raw)

# parse icsrs
predictions_icsrs = []
for output in predictions:
    icsrs = [Icsr.from_string(p) for p in output]
    predictions_icsrs.append(icsrs)

In [252]:
# load targets and inputs
inputs = val['fulltext_processed']
pmids = val['pmid']
targets = [postprocess(p) for p in val['target']]
targets_icsr = [Icsr.from_string(p) for p in targets]

In [253]:
# create dataframe
min_length = min([len(p) for p in predictions])

data = {
    "input": inputs[:min_length],
    "pmid": pmids[:min_length],
    "target_icsr": targets_icsr[:min_length],
    models[0] + "_output": predictions[0][:min_length],
    models[0] + "_icsr": predictions_icsrs[0][:min_length],
    models[1] + "_output": predictions[1][:min_length],
    models[1] + "_icsr": predictions_icsrs[1][:min_length]
}

df = pd.DataFrame(data=data)

for model in models + ['target']:
    df[f'{model}_serious'] = df[f'{model}_icsr'].apply(lambda x: x.serious)
    df[f'{model}_patientsex'] = df[f'{model}_icsr'].apply(lambda x: x.patientsex)
    df[f'{model}_drugs'] = df[f'{model}_icsr'].apply(lambda x: x.drugs)
    df[f'{model}_reactions'] = df[f'{model}_icsr'].apply(lambda x: x.reactions)


In [254]:
col_view = ['input', 
            'pmid',
            'target_serious',
            'flan-t5-large_serious',
            'gpt-4_serious',
            "target_patientsex",
            "flan-t5-large_patientsex",
            "gpt-4_patientsex",
            "target_drugs",
            "flan-t5-large_drugs",
            "gpt-4_drugs",
            "target_reactions",
            "flan-t5-large_reactions",
            "gpt-4_reactions"]

df_view = df.reindex(columns=col_view)
df_view['input'] = df_view['input'].apply(lambda x: x.replace('\n\n', ' ').replace('\n',' '))

# handle lists
def handle_list(element):
    if not isinstance(element, list):
        return element
    return ', '.join(element)
# df_view = df_view.applymap(lambda x: str(x).replace('[','').replace(']', ''))
df_view = df_view.applymap(handle_list)


pd.set_option('display.max_colwidth', 1000)
latex = df_view.to_latex(index=False, longtable=True, escape=False)
latex = latex.replace('_', '\\_')

with open('./predictions/prediction_comparison.tex', 'w') as fp:
    fp.write(latex)

  latex = df_view.to_latex(index=False, longtable=True, escape=False)


In [261]:
# Set the desired input cutoff and text format
input_cutoff = 2500
n_examples = 10

subtable = '''(PMID: {pmid}) {input} \\\\ \\\\ \\begin{{tabular}}{{lll}} \
\\hline \
              & serious                                       & patientsex                                      \\\\ \\hline \
target        & {target_serious}                                             & {target_patientsex}                                                         \\\\ \
flan-t5-large & {flan-t5-large_serious}                                      & {flan-t5-large_patientsex}                                                  \\\\ \
gpt-4         & {gpt-4_serious}                                              & {gpt-4_patientsex}                                                          \\\\ \\hline \
              & \\multicolumn{{2}}{{l}}{{drugs}}                                                                       \\\\ \\hline \
target        & \\multicolumn{{2}}{{p{{13.2cm}}}}{{{target_drugs}}} \\\\ \
flan-t5-large & \\multicolumn{{2}}{{p{{13.2cm}}}}{{{flan-t5-large_drugs}}} \\\\ \
gpt-4         & \\multicolumn{{2}}{{p{{13.2cm}}}}{{{gpt-4_drugs}}} \\\\ \\hline \
              & \\multicolumn{{2}}{{l}}{{reactions}}                                                                   \\\\ \\hline \
target        & \\multicolumn{{2}}{{p{{13.2cm}}}}{{{target_reactions}}}                                                 \\\\ \
flan-t5-large & \\multicolumn{{2}}{{p{{13.2cm}}}}{{{flan-t5-large_reactions}}} \\\\ \
gpt-4         & \\multicolumn{{2}}{{p{{13.2cm}}}}{{{gpt-4_reactions}}} \\\\ \\hline \
\\end{{tabular}} \\\\'''

# Define the function to convert a row to text
def row_to_text(row):
    dct = dict(row)
    dct = {k:str(pylatex.escape_latex(v)) for k,v in dct.items()}
    dct['input'] = dct['input'][:input_cutoff] + '... [Truncated]'

    return subtable.format(**dct)

# Apply the row_to_text function to the DataFrame to convert it to a single column
df_singlecol = df_view.apply(row_to_text, axis=1).iloc[:n_examples]

# Set the max_colwidth option to a large value to prevent truncation
pd.set_option('display.max_colwidth', 10000)

# Convert the DataFrame to LaTeX table
latex_string = df_singlecol.to_string(index=False)

# Modify the LaTeX code to create a longtable that wraps the text
latex = "\\begin{longtable}{@{}p{1.00\\textwidth}@{}}\n"
latex += "\\toprule\n"
latex += "\\textbf{input} \\tabularnewline\n"
latex += "\\midrule\n"
latex += "\\endhead\n"
latex += "\\bottomrule\n"
latex += "\\endfoot\n"
latex += "\\endlastfoot\n"
latex += latex_string.replace('\n', ' \\\\\n')
latex += "\\end{longtable}\n"


# Remove HTML tags
latex = re.sub(r'[^\S\n]+', ' ', latex)



# Deal with whitespace
# latex = re.sub(r'\s+',' ', latex)

# Write the LaTeX table to a file
with open('./predictions/prediction_comparison_singlecol.tex', 'w') as fp:
    fp.write(latex)


### Formatting the prompt for in the Table

In [9]:
# prompt = "Read a biomedical paper and extract information about the adverse drug event mentioned by the authors. Return a serious value ('1' for serious, '2' for not serious). Return a patientsex value ('1' for male, '2' for female). Return a list of drugs taken and reactions experienced.\n\n---\n\nFollow the following format.\n\nQuestion: ${What adverse drug event was described in the following context?}\nContext: ${biomedical paper that describes adverse drug events}\nAnswer: ${the adverse drug event described in the context}\n\n---\n\nQuestion: What adverse drug event was described in the following context?\nContext: we report the case of a patient with b-cell prolymphocytic leukemia who was successfully treated with the novel humanized monoclonal antibody obinutuzumab. this patient was previously treated with the combination of rituximab and bendamustine and had recurrent infusion reactions. her treatment with rituximab and bendamustine was discontinued when she developed disease progression after 3 cycles of therapy. she was then treated with obinutuzumab 1000 mg on day 1 of every cycle and chlorambucil 0.5 mg/kg on days 1 and 15 every 28 days to which she had greater tolerability. after 4 cycles of treatment, she had resolution of her clinical symptoms, massive splenomegaly, and normalization of her white blood cell count.\nAnswer: serious: 1 patientsex: 2 drugs: bendamustine hydrochloride, rituximab reactions: cytopenia, treatment failure\n\nQuestion: What adverse drug event was described in the following context?\nContext: sarcoid associated pulmonary hypertension (saph) is a common complication of sarcoidosis and is associated with poor prognosis. saph can be due to multiple synergistic mechanisms and current therapeutic strategies treat systemic sarcoidosis and pulmonary hypertension separately. several studies have been performed to develop an effective therapy for saph but have been met with mixed results. the ambition trial successfully treated incident patients with pulmonary arterial hypertension (pah) with the upfront combination of ambrisentan and tadalafil; however combination therapy has not yet been studied in patients with saph. here we report a cohort of patients with newly diagnosed saph who were treated with upfront combination therapy per the ambition study protocol. we report three subjects with newly diagnosed saph who were treated with combination ambrisentan and tadalafil. baseline hemodynamics were compared with those from surveillance right heart catheterization while on therapy. mean follow up period was 17 months. each subject demonstrated clinical and hemodynamic improvement with combination therapy. this series is the first to evaluate upfront combination ambrisentan and tadalafil therapy for treatment of newly diagnosed saph. despite the impressive clinical and hemodynamic improvement, the study is limited by its small size and retrospective nature. while these initial results are promising, further work is needed to fully evaluate this regimen for treatment of saph. (sarcoidosis vasc diffuse lung dis 2020; 37 (2): 234-238).\nAnswer: serious: 1 patientsex: 2 drugs: ambrisentan, infliximab, methotrexate, prednisolone, tadalafil reactions: off label use, urosepsis\n\nQuestion: What adverse drug event was described in the following context?\nContext: haloperidol is a typical antipsychotic drug. this drug is still widely used in emergency medicine, psychiatry, and general medicine departments. it is mostly used for acute confusional state, psychotic disorders, agitation, delirium, and aggressive behaviour. overdose of haloperidol can cause sudden deaths. cardiopulmonary arrest related to use of haloperidol had been reported in literature as case reports but are very few. no such cases have been reported in india till now. we report a case of cardiac arrest due to the use of haloperidol.\nAnswer: serious: 1 patientsex: 1 drugs: haloperidol lactate reactions: cardiac arrest, ventricular tachycardia\n\nQuestion: What adverse drug event was described in the following context?\nContext: neonatal nonoliguric hyperkalemia (nohk) is a metabolic abnormality that occurs in extremely premature neonates at approximately 24\u00a0h after birth and is mainly due to the immature functioning of the sodium (na+)/potassium (k+) pump. magnesium sulfate is frequently used in obstetrical practice to prevent preterm labor and to treat preeclampsia; this medication can also cause hypermagnesemia and hyperkalemia by a mechanism that is different from that of nohk. herein, we report the first case of very early-onset neonatal hyperkalemia induced by maternal hypermagnesemia.    a neonate born at 32\u00a0weeks of gestation developed hyperkalemia (k+ 6.4\u00a0mmol/l) 2\u00a0h after birth. the neonate's blood potassium concentration reached 7.0\u00a0mmol/l 4\u00a0h after birth, despite good urine output. the neonate and his mother had severe hypermagnesemia caused by intravenous infusion of magnesium sulfate given for tocolysis due to pre-term labor.    the early-onset hyperkalemia may have been caused by the accumulation of potassium ions transported through the placenta, the shift of potassium ions from the intracellular to the extracellular space in the infant due to the malfunctioning of the na+/k+ pump and the inhibition of renal distal tube potassium ion secretion, there is a possibility that these mechanisms were induced by maternal and fetal hypermagnesemia after maternal magnesium sulfate administration. because neonatal hyperkalemia poses a significant risk for the development of life-threatening cardiac arrhythmia, this case highlights the necessity of maternal blood magnesium monitoring during magnesium sulfate administration and neonatal blood potassium monitoring when there is severe maternal hypermagnesemia at delivery.\nAnswer: serious: 1 patientsex: 2 drugs: magnesium sulfate reactions: exposure during pregnancy, hypermagnesaemia, hypocalcaemia, hypotonia, product use in unapproved indication\n\nQuestion: What adverse drug event was described in the following context?\nContext: doxycycline and minocycline are tetracyclines with the potential to cause hepatoxicity. although autoimmune-like hepatitis from minocycline is well-described, doxycycline-induced autoimmune hepatitis (diah) has only been described once. we report a rare case of diah with elevated liver enzymes over 5 times the normal upper limit, elevated immunoglobulin g, and high titers of antismooth muscle antibody and antinuclear antibody. by stopping doxycycline, our patient's liver enzymes normalized and immunoglobulin g and autoantibody titers rapidly downtrended. as long-term doxycycline therapy becomes more prevalent to treat acne vulgaris and other skin conditions, diah may become more prevalent and recognized.\nAnswer: serious: 1 patientsex: 2 drugs: doxycycline hyclate reactions: autoimmune hepatitis\n\nQuestion: What adverse drug event was described in the following context?\nContext: oral mucositis, the most common adverse effect of radiotherapy (rt) and/or chemotherapy is observed in almost 97% of patients with head and neck cancer. although several agents like corticosteroids, lidocaine and vitamins are available for its prevention or management, results are often disappointing. here we report on the effects of a topically applied, highly purified natural deoxyribonucleic acid from sturgeon gonads on three cases of moderate to severe oral mucositis in patients with head and neck cancer. three patients who had undergone rt and/or chemotherapy received an oral spray containing sodium salt-based natural deoxyribonucleic acid (pdrn) for grade 3 oral mucositis. treatment continued for one month after the end of rt. no patient reported any allergic reactions. rt and chemotherapy were not interrupted and opioid therapy was not given to any patient. pain was relieved about 2-3\u202fdays after starting treatment and oral mucositis was reduced to g2 within one week. outcomes in all 3 cases showed topical use of the sodium salt-based pdrn derived from sturgeon gonads was acceptable and safe when used topically for therapeutic and regenerative purposes.present results are encouraging and suggest a more in-depth study is warranted on its use in a larger patient cohort with rt-induced oral mucositis.\nAnswer: serious: 1 patientsex: 2 drugs: cisplatin reactions: candida infection, dehydration, pain, stomatitis, weight decreased\n\nQuestion: What adverse drug event was described in the following context?\nContext: background piperacillin/tazobactam is a commonly used antibiotic for the empirical treatment of severe diabetic foot infections. one of the most feared complications of this drug is the development of pancytopenia. the aim of this study was to determine whether the use of piperacillin/tazobactam caused any hematological changes in patients admitted with severe diabetes-related foot infections from a specialist multidisciplinary foot clinic. specifically, looking at whether it caused anemia, leukopenia, neutropenia, or thrombocytopenia.   methods a 1-year retrospective analysis of patients admitted to a tertiary care center for treatment of diabetes-related foot infection using piperacillin/tazobactam. hematological indices, urea and electrolytes, and c-reactive protein (crp) were recorded pretreatment, during treatment, and posttreatment. hba1c, vitamin b12, folate, thyroid-stimulating hormone, and free thyroxin were also analyzed to exclude any potential confounders as a cause of pancytopenia.   results a total of 154 patients were admitted between 1 january 2016 and 31 december 2016 who received piperacillin/tazobactam for severe diabetes-related foot infection. on admission, white cell count and crp were raised and fell significantly within the first 48\u00a0h. other hematological factors did not change. five patients developed a mild pancytopenia, of which three were unexplained.   conclusions in this relatively small cohort, pancytopenia did not occur. as such, piperacillin/tazobactam appeared to have a low risk of adverse hematological outcomes and remains the treatment of choice for severe diabetes-related foot infections.\nAnswer: serious: 1 patientsex: 1 drugs: piperacillin sodium\\tazobactam sodium reactions: haemoglobin decreased, pancytopenia\n\nQuestion: What adverse drug event was described in the following context?\nContext: title: cardiac safety results from a phase ii, open-label, multicenter, pilot study of two docetaxel-based regimens plus bevacizumab for the adjuvant treatment of subjects with node-positive or high-risk node-negative breast cancer.  abstract: objective adding antiangiogenic therapy to standard chemotherapy has improved response rates and progression-free survival in metastatic breast cancer (bc) patients. this phase ii study evaluated cardiac safety of bevacizumab with/without trastuzumab with two docetaxel-based regimens in early bc.   methods 127 women with non-metastatic node-positive or high-risk node-negative bc were enrolled. women with human epidermal growth factor receptor 2 (her2)-negative bc (n\u2009=\u200993) received docetaxel/doxorubicin/cyclophosphamide (tac)\u2009+\u2009bevacizumab, while women with her2-positive disease (n\u2009=\u200934) received docetaxel/carboplatin/trastuzumab (tch)\u2009+\u2009bevacizumab, every 3\u00a0weeks for six cycles. maintenance therapy with bevacizumab alone or bevacizumab plus trastuzumab, respectively, was given every 3\u00a0weeks for 52\u00a0weeks. the primary objective was to evaluate cardiac safety, as measured by the incidence of\u2009\u2265\u2009grade 3 clinical congestive heart failure (chf); the secondary objective was assessment of safety and toxicity.   results at least one cardiac adverse event (ae; chf, cardiomyopathy, or left ventricular dysfunction) was reported in 26.1% of tac (n\u2009=\u200992) and 17.6% of tch subjects (n\u2009=\u200934); there were no cardiac deaths. \u2265 grade 3 clinical chf was observed in 4.3% in the tac plus bevacizumab stratum and 0% in the tch plus bevacizumab stratum. a\u2009\u2265\u2009grade 3 treatment-emergent ae (any kind) related to study treatment was observed in 59.8% in the tac with bevacizumab and 52.9% in the tch plus bevacizumab stratum.   conclusions adding bevacizumab to a docetaxel-based regimen with trastuzumab did not appear to increase cardiotoxicity.   background clinicaltrials.gov identifier: nct00446030, registered march 8, 2007.  text: introduction breast cancer mortality has declined over the past 2 decades; however, it still remains the most common type of cancer in women, accounting for an estimated 29% of all new cases (siegel et al. 2014). the 5-year survival rate for women with breast cancer is 99% for those with localized disease and 84% for regional disease, and only 24% in patients with distant disease (siegel et al. 2014). several studies in human epidermal growth factor receptor 2 (her2)-normal metastatic breast cancer have reported that the addition of bevacizumab to chemotherapy improves response rates and progression-free survival compared with chemotherapy alone (miller et al. 2007; robert et al. 2009; brufsky et al. 2011; pivot et al. 2011). preclinical evidence also suggests that the combination of monoclonal antibodies that target her2 and vascular endothelial growth factor (vegf) may act synergistically in her2 overexpressing cancers (sweeney et al. 2001; pegram et al. 2004).  the present study was primarily initiated to evaluate the cardiac safety of bevacizumab when given in combination with a standard-of-care anthracycline-based treatment\u2014docetaxel, doxorubicin, cyclophosphamide (tac) (mackey et al. 2013)\u2014in the adjuvant setting. at the time of study initiation, larger studies, such as eastern cooperative oncology group (ecog) e5103 (national cancer institute), were being planned to evaluate bevacizumab in adjuvant, her2-negative breast cancer patients. combining anti-vegf therapy with anthracycline-based chemotherapy raises concerns regarding cardiac safety, given the association of doxorubicin with an increased risk of congestive heart failure (chf), and the tendency of bevacizumab to increase blood pressure and, as a result, cardiac \u201cafterload\u201d. the theoretical concern is that a bevacizumab-associated increase in afterload could unmask clinically occult cardiac toxicity from anthracycline, and effectively increase the rate of clinical cardiac adverse events (aes). the present study was designed in part to provide initial safety data regarding the combination of an anthracycline with bevacizumab for the larger planned studies.  for her2-positive breast cancer, the use of adjuvant trastuzumab has been shown to improve disease-free survival (dfs) and overall survival (os) when added to standard chemotherapy (romond et al. 2005; slamon et al. 2011). preclinical data suggesting that her2-positive breast cancer is particularly reliant on neoangiogenesis (davidson et al. 1987; epstein et al. 2002; yen et al. 2000; konecny et al. 2004) led to the initiation of several clinical trials that evaluated the combination of trastuzumab and bevacizumab. one of the first of these was a phase ii study that enrolled 50 subjects with her2-positive metastatic breast cancer which reported an asymptomatic cardiac event rate of 36% with a grade 4 cardiac event in 2.0% of subjects (hurvitz et al. 2009). at the time of the present study\u2019s initiation, planning was underway for beth (bevacizumab and trastuzumab adjuvant therapy in her2-positive breast cancer) (slamon et al. 2013), a large phase iii adjuvant study in which patients with her2-positive breast cancer were randomly assigned to receive docetaxel, carboplatin, and trastuzumab (tch) with or without bevacizumab. given that (1) trastuzumab is associated with a low (0.4%) risk of heart failure when given in combination with docetaxel and carboplatin (slamon et al. 2011), and (2) it is not known if adding bevacizumab to trastuzumab increases the risk of heart failure, our study included a her2-positive cohort to gauge the cardiac safety of tch plus bevacizumab.  patients and methods this was a phase ii, parallel-group, open-label, noncomparative, multicenter, pilot study (clinicaltrials.gov: nct00365365). the primary objective was to evaluate the cardiac safety of bevacizumab with/without trastuzumab, as measured by the incidence of\u2009\u2265\u2009grade 3 clinical chf, when administered with two different docetaxel-based combination regimens for the adjuvant treatment of subjects with node-positive or high-risk node-negative breast cancer. the secondary objectives were to evaluate the safety and toxicity of these same treatments. the study was originally designed to also evaluate dfs and os; however, the study was terminated early and follow-up was shortened from 10\u00a0years to 2\u00a0years; therefore, dfs and os were not evaluated (second amendment to the protocol). this study was conducted in accordance with good clinical practice and in compliance with the helsinki declaration and all applicable local regulatory requirements.\nAnswer:"

prompt_placeholder = "Read a biomedical paper and extract information about the adverse drug event mentioned by the authors. Return a serious value ('1' for serious, '2' for not serious). Return a patientsex value ('1' for male, '2' for female). Return a list of drugs taken and reactions experienced.\n\n---\n\nFollow the following format.\n\nQuestion: ${What adverse drug event was described in the following context?}\nContext: ${biomedical paper that describes adverse drug events}\nAnswer: ${the adverse drug event described in the context}\n\n---\n\nQuestion: What adverse drug event was described in the following context?\nContext: we report the case of a patient with b-cell prolymphocytic leukemia who was successfully treated with the novel humanized monoclonal antibody obinutuzumab. this patient was previously treated with the combination of rituximab and bendamustine and had recurrent infusion reactions. her treatment with rituximab and bendamustine was discontinued when she developed disease progression after 3 cycles of therapy. she was then treated with obinutuzumab 1000 mg on day 1 of every cycle and chlorambucil 0.5 mg/kg on days 1 and 15 every 28 days to which she had greater tolerability. after 4 cycles of treatment, she had resolution of her clinical symptoms, massive splenomegaly, and normalization of her white blood cell count.\nAnswer: serious: 1 patientsex: 2 drugs: bendamustine hydrochloride, rituximab reactions: cytopenia, treatment failure\n\nQuestion: What adverse drug event was described in the following context?\nContext: sarcoid associated pulmonary hypertension (saph) is a common complication of sarcoidosis and is associated with poor prognosis. saph can be due to multiple synergistic mechanisms and current therapeutic strategies treat systemic sarcoidosis and pulmonary hypertension separately. several studies have been performed to develop an effective therapy for saph but have been met with mixed results. the ambition trial successfully treated incident patients with pulmonary arterial hypertension (pah) with the upfront combination of ambrisentan and tadalafil; however combination therapy has not yet been studied in patients with saph. here we report a cohort of patients with newly diagnosed saph who were treated with upfront combination therapy per the ambition study protocol. we report three subjects with newly diagnosed saph who were treated with combination ambrisentan and tadalafil. baseline hemodynamics were compared with those from surveillance right heart catheterization while on therapy. mean follow up period was 17 months. each subject demonstrated clinical and hemodynamic improvement with combination therapy. this series is the first to evaluate upfront combination ambrisentan and tadalafil therapy for treatment of newly diagnosed saph. despite the impressive clinical and hemodynamic improvement, the study is limited by its small size and retrospective nature. while these initial results are promising, further work is needed to fully evaluate this regimen for treatment of saph. (sarcoidosis vasc diffuse lung dis 2020; 37 (2): 234-238).\nAnswer: serious: 1 patientsex: 2 drugs: ambrisentan, infliximab, methotrexate, prednisolone, tadalafil reactions: off label use, urosepsis\n\nQuestion: What adverse drug event was described in the following context?\nContext: haloperidol is a typical antipsychotic drug. this drug is still widely used in emergency medicine, psychiatry, and general medicine departments. it is mostly used for acute confusional state, psychotic disorders, agitation, delirium, and aggressive behaviour. overdose of haloperidol can cause sudden deaths. cardiopulmonary arrest related to use of haloperidol had been reported in literature as case reports but are very few. no such cases have been reported in india till now. we report a case of cardiac arrest due to the use of haloperidol.\nAnswer: serious: 1 patientsex: 1 drugs: haloperidol lactate reactions: cardiac arrest, ventricular tachycardia\n\nQuestion: What adverse drug event was described in the following context?\nContext: neonatal nonoliguric hyperkalemia (nohk) is a metabolic abnormality that occurs in extremely premature neonates at approximately 24\u00a0h after birth and is mainly due to the immature functioning of the sodium (na+)/potassium (k+) pump. magnesium sulfate is frequently used in obstetrical practice to prevent preterm labor and to treat preeclampsia; this medication can also cause hypermagnesemia and hyperkalemia by a mechanism that is different from that of nohk. herein, we report the first case of very early-onset neonatal hyperkalemia induced by maternal hypermagnesemia.    a neonate born at 32\u00a0weeks of gestation developed hyperkalemia (k+ 6.4\u00a0mmol/l) 2\u00a0h after birth. the neonate's blood potassium concentration reached 7.0\u00a0mmol/l 4\u00a0h after birth, despite good urine output. the neonate and his mother had severe hypermagnesemia caused by intravenous infusion of magnesium sulfate given for tocolysis due to pre-term labor.    the early-onset hyperkalemia may have been caused by the accumulation of potassium ions transported through the placenta, the shift of potassium ions from the intracellular to the extracellular space in the infant due to the malfunctioning of the na+/k+ pump and the inhibition of renal distal tube potassium ion secretion, there is a possibility that these mechanisms were induced by maternal and fetal hypermagnesemia after maternal magnesium sulfate administration. because neonatal hyperkalemia poses a significant risk for the development of life-threatening cardiac arrhythmia, this case highlights the necessity of maternal blood magnesium monitoring during magnesium sulfate administration and neonatal blood potassium monitoring when there is severe maternal hypermagnesemia at delivery.\nAnswer: serious: 1 patientsex: 2 drugs: magnesium sulfate reactions: exposure during pregnancy, hypermagnesaemia, hypocalcaemia, hypotonia, product use in unapproved indication\n\nQuestion: What adverse drug event was described in the following context?\nContext: doxycycline and minocycline are tetracyclines with the potential to cause hepatoxicity. although autoimmune-like hepatitis from minocycline is well-described, doxycycline-induced autoimmune hepatitis (diah) has only been described once. we report a rare case of diah with elevated liver enzymes over 5 times the normal upper limit, elevated immunoglobulin g, and high titers of antismooth muscle antibody and antinuclear antibody. by stopping doxycycline, our patient's liver enzymes normalized and immunoglobulin g and autoantibody titers rapidly downtrended. as long-term doxycycline therapy becomes more prevalent to treat acne vulgaris and other skin conditions, diah may become more prevalent and recognized.\nAnswer: serious: 1 patientsex: 2 drugs: doxycycline hyclate reactions: autoimmune hepatitis\n\nQuestion: What adverse drug event was described in the following context?\nContext: oral mucositis, the most common adverse effect of radiotherapy (rt) and/or chemotherapy is observed in almost 97% of patients with head and neck cancer. although several agents like corticosteroids, lidocaine and vitamins are available for its prevention or management, results are often disappointing. here we report on the effects of a topically applied, highly purified natural deoxyribonucleic acid from sturgeon gonads on three cases of moderate to severe oral mucositis in patients with head and neck cancer. three patients who had undergone rt and/or chemotherapy received an oral spray containing sodium salt-based natural deoxyribonucleic acid (pdrn) for grade 3 oral mucositis. treatment continued for one month after the end of rt. no patient reported any allergic reactions. rt and chemotherapy were not interrupted and opioid therapy was not given to any patient. pain was relieved about 2-3\u202fdays after starting treatment and oral mucositis was reduced to g2 within one week. outcomes in all 3 cases showed topical use of the sodium salt-based pdrn derived from sturgeon gonads was acceptable and safe when used topically for therapeutic and regenerative purposes.present results are encouraging and suggest a more in-depth study is warranted on its use in a larger patient cohort with rt-induced oral mucositis.\nAnswer: serious: 1 patientsex: 2 drugs: cisplatin reactions: candida infection, dehydration, pain, stomatitis, weight decreased\n\nQuestion: What adverse drug event was described in the following context?\nContext: background piperacillin/tazobactam is a commonly used antibiotic for the empirical treatment of severe diabetic foot infections. one of the most feared complications of this drug is the development of pancytopenia. the aim of this study was to determine whether the use of piperacillin/tazobactam caused any hematological changes in patients admitted with severe diabetes-related foot infections from a specialist multidisciplinary foot clinic. specifically, looking at whether it caused anemia, leukopenia, neutropenia, or thrombocytopenia.   methods a 1-year retrospective analysis of patients admitted to a tertiary care center for treatment of diabetes-related foot infection using piperacillin/tazobactam. hematological indices, urea and electrolytes, and c-reactive protein (crp) were recorded pretreatment, during treatment, and posttreatment. hba1c, vitamin b12, folate, thyroid-stimulating hormone, and free thyroxin were also analyzed to exclude any potential confounders as a cause of pancytopenia.   results a total of 154 patients were admitted between 1 january 2016 and 31 december 2016 who received piperacillin/tazobactam for severe diabetes-related foot infection. on admission, white cell count and crp were raised and fell significantly within the first 48\u00a0h. other hematological factors did not change. five patients developed a mild pancytopenia, of which three were unexplained.   conclusions in this relatively small cohort, pancytopenia did not occur. as such, piperacillin/tazobactam appeared to have a low risk of adverse hematological outcomes and remains the treatment of choice for severe diabetes-related foot infections.\nAnswer: serious: 1 patientsex: 1 drugs: piperacillin sodium\\tazobactam sodium reactions: haemoglobin decreased, pancytopenia\n\nQuestion: What adverse drug event was described in the following context?\nContext: {{full-text paper (as many tokens as possible)}} \nAnswer:"

In [13]:
print(pylatex.escape_latex(prompt_placeholder))

Read a biomedical paper and extract information about the adverse drug event mentioned by the authors. Return a serious value ('1' for serious, '2' for not serious). Return a patientsex value ('1' for male, '2' for female). Return a list of drugs taken and reactions experienced.\newline%
\newline%
{-}{-}{-}\newline%
\newline%
Follow the following format.\newline%
\newline%
Question: \$\{What adverse drug event was described in the following context?\}\newline%
Context: \$\{biomedical paper that describes adverse drug events\}\newline%
Answer: \$\{the adverse drug event described in the context\}\newline%
\newline%
{-}{-}{-}\newline%
\newline%
Question: What adverse drug event was described in the following context?\newline%
Context: we report the case of a patient with b{-}cell prolymphocytic leukemia who was successfully treated with the novel humanized monoclonal antibody obinutuzumab. this patient was previously treated with the combination of rituximab and bendamustine and had recu

### Dataset report fields description

In [36]:
import yaml

# read fields
report_fields = None
with open('./report_fields_without_openfda.yaml', 'r') as fp:
    report_fields = fp.read()

# remove fields
names_to_remove = {'format', 'is_exact', 'type', 'pattern'}

def remove_nodes(data, names):
    if isinstance(data, dict):
        data = {key: remove_nodes(value, names) for key, value in data.items() if key not in names}
    elif isinstance(data, list):
        data = [remove_nodes(item, names) for item in data if item not in names]
    return data

def remove_properties(data):
    if isinstance(data, dict):
        if 'properties' in data:
            properties = data.pop('properties')
            data.update(properties)
        data = {key: remove_properties(value) for key, value in data.items()}
    elif isinstance(data, list):
        data = [remove_properties(item) for item in data]
    return data

# Load and parse the YAML file
with open('./report_fields_without_openfda.yaml', 'r') as file:
    yaml_data = yaml.safe_load(file)

# Recursively remove nodes matching the specified names
modified_data = remove_nodes(yaml_data, names_to_remove)
modified_data = remove_properties(modified_data)

# Print the modified data
report_fields_latex = pylatex.escape_latex(yaml.dump(modified_data))
report_fields_latex = report_fields_latex.replace('newline', '\\')
print(report_fields_latex)

authoritynumb:\\%
  description: "Populated with the Regulatory Authority\textbackslash{}u2019s case report number,\textbackslash{}\\%
    \textbackslash{} when available."\\%
  possible\_values: null\\%
companynumb:\\%
  description: Identifier for the company providing the report. This is self{-}assigned.\\%
  possible\_values: null\\%
duplicate:\\%
  description: This value is `1` if earlier versions of this report were submitted\\%
    to FDA. openFDA only shows the most recent version.\\%
  possible\_values: null\\%
fulfillexpeditecriteria:\\%
  description: Identifies expedited reports (those that were processed within 15 days).\\%
  possible\_values:\\%
    value:\\%
      '1': true\\%
      '2': false\\%
occurcountry:\\%
  description: The name of the country where the event occurred.\\%
  possible\_values:\\%
    value:\\%
      link: http://data.okfn.org/data/core/country{-}list\\%
      name: Country codes\\%
patient:\\%
  drug:\\%
    items:\\%
      actiondrug:\\%
        

In [45]:
modified_data

{'authoritynumb': {'description': 'Populated with the Regulatory Authority’s case report number, when available.',
  'possible_values': None},
 'companynumb': {'description': 'Identifier for the company providing the report. This is self-assigned.',
  'possible_values': None},
 'duplicate': {'description': 'This value is `1` if earlier versions of this report were submitted to FDA. openFDA only shows the most recent version.',
  'possible_values': None},
 'fulfillexpeditecriteria': {'description': 'Identifies expedited reports (those that were processed within 15 days).',
  'possible_values': {'value': {'1': True, '2': False}}},
 'occurcountry': {'description': 'The name of the country where the event occurred.',
  'possible_values': {'value': {'name': 'Country codes',
    'link': 'http://data.okfn.org/data/core/country-list'}}},
 'patient': {'drug': {'items': {'actiondrug': {'description': 'Actions taken with the drug.',
     'possible_values': {'value': {'1': 'Drug withdrawn',
      

In [91]:
import yaml


def flatten_yaml_dict(data, parent="", exclude_fields=None):
    if exclude_fields is None:
        exclude_fields = []

    flattened = []
    for key, value in data.items():
        if key not in exclude_fields:
            current_key = f"{parent}.{key}" if parent else key
            flattened.append(current_key)
            if isinstance(value, dict):
                flattened.extend(flatten_yaml_dict(value, parent=current_key, exclude_fields=exclude_fields))
    return flattened

def normalize_possible_values(possible_values):
    if possible_values == None:
        return 'Undefined'
    values = possible_values['value']
    s = []
    for k,v in values.items():
        s.append(f'{k}: {v}')
    return ", ".join(s)

def get_value(field, data):
    fields = field.split('.')
    if not isinstance(fields, list):
        fields = [fields]

    current_node = data
    for f in fields:
        current_node = current_node[f]

    if 'description' and 'possible_values' in current_node:
        return {
            'description': current_node['description'],
            'possible_values': normalize_possible_values(current_node['possible_values'])
        }
    else:
        return None

exclude_fields = ['description', 'possible_values']
flattened_fields = flatten_yaml_dict(modified_data, exclude_fields=exclude_fields)
flattened_data = {k: get_value(k, modified_data) for k in flattened_fields if get_value(k, modified_data)}

In [96]:
# latex
def safe(s):
    return pylatex.escape_latex(s) + ''

def generate_latex(data):
    latex_code = ''
    for datapoint in data:
        latex_code += f"\\noindent \\textbf{{{safe(datapoint)}}}\n"
        subfields = data[datapoint]
        for subfield, value in subfields.items():
            latex_code += f"\\hspace{{1em}}{safe(subfield)}: {safe(value)}\n"

        latex_code += "\n"  # Add extra line between datapoints


    return latex_code

print(generate_latex(flattened_data))

\noindent \textbf{authoritynumb}
\hspace{1em}description: Populated with the Regulatory Authority’s case report number, when available.
\hspace{1em}possible\_values: Undefined

\noindent \textbf{companynumb}
\hspace{1em}description: Identifier for the company providing the report. This is self{-}assigned.
\hspace{1em}possible\_values: Undefined

\noindent \textbf{duplicate}
\hspace{1em}description: This value is `1` if earlier versions of this report were submitted to FDA. openFDA only shows the most recent version.
\hspace{1em}possible\_values: Undefined

\noindent \textbf{fulfillexpeditecriteria}
\hspace{1em}description: Identifies expedited reports (those that were processed within 15 days).
\hspace{1em}possible\_values: 1: True, 2: False

\noindent \textbf{occurcountry}
\hspace{1em}description: The name of the country where the event occurred.
\hspace{1em}possible\_values: name: Country codes, link: http://data.okfn.org/data/core/country{-}list

\noindent \textbf{patient.drug.items

### Dataset article fields

In [14]:
import pylatex

article_fields = {
"title" : "Title of the article",
"pmid" : "PubMed ID",
"issue": "The Issue of the journal",
"pages": "Pages of the article in the journal publication",
"abstract" : "Abstract of the article",
"fulltext": "The full text associated with the article from the PubMed Central Open Access Subset, if available",
"fulltext_license": "The license associated with the full text paper from the PubMed Central Open Access Subset, if available",
"journal" : "Journal of the given paper",
"authors" : "Authors, each separated by ';'",
"affiliations": "The affiliations of the authors",
"pubdate" : "Publication date. Defaults to year information only.",
"doi" : "DOI",
"medline_ta" : "Abbreviation of the journal name",
"nlm_unique_id" : "NLM unique identification",
"issn_linking" : "ISSN linkage, typically use to link with Web of Science dataset",
"country" : "Country extracted from journal information field",
"mesh_terms" : "List of MeSH terms with corresponding MeSH ID, each separated by ';' e.g. 'D000161:Acoustic Stimulation; D000328:Adult; ...' ",
"publication_types" : "List of publication type list each separated by ';' e.g. 'D016428:Journal Article'",
"chemical_list" : "List of chemical terms, each separated by ';'",
"keywords" : "List of keywords, each separated by ';'",
"reference" : "String of PMID each separated by ';' or list of references made to the article",
"delete" : "Boolean, 'False' means paper got updated so you might have two",
"pmc" : "PubMed Central ID",
"other_id" : "Other IDs found, each separated by ';'",
}

# latex
def safe(s):
    return pylatex.escape_latex(s) + ''

def generate_latex_article(data):
    latex_code = ''
    for datapoint in data:
        latex_code += f"\\noindent \\textbf{{{safe(datapoint)}}}\n"
        value = data[datapoint]
        latex_code += f"{safe(value)}.\n"

        latex_code += "\n"  # Add extra line between datapoints


    return latex_code

print(generate_latex_article(article_fields))

\noindent \textbf{title}
Title of the article.

\noindent \textbf{pmid}
PubMed ID.

\noindent \textbf{issue}
The Issue of the journal.

\noindent \textbf{pages}
Pages of the article in the journal publication.

\noindent \textbf{abstract}
Abstract of the article.

\noindent \textbf{fulltext}
The full text associated with the article from the PubMed Central Open Access Subset, if available.

\noindent \textbf{fulltext\_license}
The license associated with the full text paper from the PubMed Central Open Access Subset, if available.

\noindent \textbf{journal}
Journal of the given paper.

\noindent \textbf{authors}
Authors, each separated by ';'.

\noindent \textbf{affiliations}
The affiliations of the authors.

\noindent \textbf{pubdate}
Publication date. Defaults to year information only..

\noindent \textbf{doi}
DOI.

\noindent \textbf{medline\_ta}
Abbreviation of the journal name.

\noindent \textbf{nlm\_unique\_id}
NLM unique identification.

\noindent \textbf{issn\_linking}
ISSN li