In [2]:
# !pip3 install -r requirements.txt
# !pip install ipywidgets

# ## If widgets misbehave, run below commands and restart jupyter

# !pip install jupyter_contrib_nbextensions
# !jupyter contrib nbextension install --user
# !jupyter nbextension enable varInspector/main

### Postgres AACT login
variables.py file lists required entities for trial_to_paper_utils functions in this notebook.

One of note is to fill in your username and password for your AACT account

In [2]:
import ipywidgets as widgets
import pandas as pd
from transformers import pipeline
from tqdm import tqdm
from trial_to_paper_utils import *
from pprint import pprint

### Notes on TODOs

- [] % trials with associated papers at all
- [] What AEs are unique, if any, to the papers (not mentioned in trial)
- [] How do authors prioritise which AEs they are discussing in paper? Are there any trends in this?
- [] What is the lag time between trial completion and papers being published?
- [] Extract target - drug - AEs
- [] Is there any other data in CT.gov which could help inform on AE prioritisation
- [] Can we define 'severity' of AEs with models - check w Ines for severity score model
- [x] Compare examples from OTs paper (less severe indications) to cancer examples, intention to link to Ines' severity score
- [x] Sort the AE hits so they read as intended based on their indices

In [7]:
nct_id = "NCT01753193" # 'Easy' SLE example - only 1 patient group
# nct_id = "NCT05034952" # Efficacy and Safety VX-548

# Nav18_17 trials v results
# NCT02215252 NCT01769274 NCT01529346
# nct_id = "NCT01769274" #x

# Diabetes
# nct_id = "NCT03084965"

query = f"""
SELECT 
	c.names AS indication,
    s.{nct_id},
    s.brief_title AS study_title,
    s.phase AS study_phase,
	ae.adverse_event_term as adverse_event,
    ae.event_type,
	ae.organ_system,
	ae.subjects_at_risk,
    ae.subjects_affected as subjects_affected,
	ae.ctgov_group_code,
    -- ae.description,
	ae.result_group_id
FROM 
    "reported_events" as ae
JOIN
	all_conditions as c on c.nct_id = ae.nct_id
JOIN
    studies as s ON s.nct_id = ae.nct_id
WHERE
	ae.nct_id = '{nct_id}';
"""

## AACT queried with starting NCT ID of interest to collect referenced adverse events

In [9]:
study_title, aes, severe_aes, other_aes, patient_groups = aact_data_gather(nct_id, query)
# TODO Get % affected / see relevance of 'other' vs 'serious'
if study_title:
    print(
        f"For CT.gov trial:\n\t'{study_title}' ({nct_id}),\n{len(aes)} unique AEs were recorded in {len(patient_groups)} patient group(s)\n")
    # [print(f"- {a}\n") for a in aes]
else:
    print(f"No trials were found searching for ID: {nct_id}")

UndefinedColumn: column s.nct01753193 does not exist
LINE 4:     s.NCT01753193,
            ^

In [13]:
start_statement = f'Adverse events recorded for trial: {nct_id}'
display_widget(text=aes, placeholder='AEs', start_statement=start_statement)

NameError: name 'widgets' is not defined

## Search ePMC for papers mentioning trial ID, failing this search for papers relating to compound name

In [108]:
trial_in_pmids = query_epmc(query=nct_id, page_size=25)
print(f"NCT ID referenced in {len(trial_in_pmids)} PubMed paper(s).\n{trial_in_pmids}")

if len(trial_in_pmids) > 0:
    test_pmid = trial_in_pmids[0] #TODO - remove test, do for all results & consider subject test groups
    print(test_pmid)
    text = query_bioc(pmid=test_pmid)
else:
    pass
print_text = False
if print_text:
    print("\n".join(text))
    print(len(text))
    print(text[3])

NCT ID referenced in 6 PubMed paper(s).
['39193183', '37148484', '31190735', '33225631', '34768756', '35383948']
39193183


## Testing HuggingFace AE models over literature text

In [109]:
from typing import List


def check_ahead(i: int, input_df: pd.DataFrame, df_set: List, entity_str: str) -> [List, str]:
    try:
        if input_df.iloc[i+1].entity == 'B-AE': # Next term is a new phrase
            df_set.append(entity_str)
        else:
            while input_df.iloc[i+1].entity == 'I-AE':
                if input_df.iloc[i+1].word.startswith('##'): # Clean up hashes
                    word = input_df.iloc[i+1].word
                    entity_str = entity_str + word[2:]
                else:
                    entity_str = entity_str + ' ' + input_df.iloc[i+1].word
                i += 1
    except IndexError as e:
        df_set.append(entity_str) # End of df
    return df_set, entity_str

def get_set_from_col(input_df: pd.DataFrame) -> List:
    input_df = input_df.sort_values('start')
    df_set = []
    entity_str = ''
    for i, row in input_df.iterrows():
        if row.entity == 'B-AE':
            # Assign start of term
            entity_str = row.word
            # Look ahead of additional words of term
            df_set, entity_str = check_ahead(i, input_df, df_set, entity_str)

    # return list(set(df_set))
    return df_set

In [113]:
# TODO Test open source models for AE detection in text, compare to those recorded in trial
pipe = pipeline(task="token-classification", model="MutazYoune/BiomedBERT-Adverse-Events-NER_pun", tokenizer="MutazYoune/BiomedBERT-Adverse-Events-NER_pun")
# pipe = pipeline(task="token-classification", model="MutazYoune/Medical-NER-Adverse-Events-NER", tokenizer="MutazYoune/Medical-NER-Adverse-Events-NER")

all_sections = []
all_aes = []
for x in tqdm(text):
    try:
        res = pipe(x)
        if res:
            section_df = pd.DataFrame(res)
            section_set = get_set_from_col(section_df)
            [all_aes.append(x) for x in section_set]
            pprint(section_df)
            all_sections.append(section_df)
    except:
        continue

paper_aes_df = pd.concat(all_sections, ignore_index=True)
model_name = pipe.model.name_or_path
model_name = model_name.rpartition('/')[-1]
# paper_aes.to_csv(f'./output/{model_name}_AEs_{test_pmid}.csv')

Device set to use mps:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
  2%|█▉                                                                                  | 5/215 [00:00<00:12, 16.34it/s]

  entity     score  index word  start  end
0   I-AE  0.580194     22  ##s    122  123


 11%|█████████▎                                                                         | 24/215 [00:00<00:05, 33.59it/s]

  entity     score  index       word  start  end
0   B-AE  0.681990      6  increased     34   43
1   I-AE  0.813916      7     levels     44   50
2   I-AE  0.757584      8         of     51   53
3   I-AE  0.537050      9        ifn     54   57
4   I-AE  0.743058     10         in     58   60
5   I-AE  0.565262     11      serum     61   66


 15%|████████████▋                                                                      | 33/215 [00:01<00:05, 31.79it/s]

  entity     score  index      word  start  end
0   I-AE  0.583950     12   overall     57   64
1   I-AE  0.500546     13   disease     65   72
2   I-AE  0.712672     14  activity     73   81
  entity     score  index       word  start  end
0   B-AE  0.802168    147     herpes    738  744
1   I-AE  0.969673    148     zoster    745  751
2   I-AE  0.873056    149  incidence    752  761


 23%|██████████████████▉                                                                | 49/215 [00:01<00:05, 28.97it/s]

   entity     score  index        word  start  end
0    B-AE  0.619942     87        mild    446  450
1    I-AE  0.700091     88  infections    451  461
2    B-AE  0.652657     90   headaches    463  472
3    B-AE  0.639640     92         nas    474  477
4    I-AE  0.684847     93       ##oph    477  480
..    ...       ...    ...         ...    ...  ...
9    I-AE  0.700623     99       ##gia    501  504
10   B-AE  0.833818    101   injection    509  518
11   I-AE  0.867768    102        site    519  523
12   I-AE  0.866820    103         ind    524  527
13   I-AE  0.858238    104   ##uration    527  534

[14 rows x 6 columns]


 53%|███████████████████████████████████████████▊                                      | 115/215 [00:02<00:01, 63.27it/s]

  entity     score  index   word  start  end
0   I-AE  0.532042      3  event     16   21


 72%|██████████████████████████████████████████████████████████▋                       | 154/215 [00:03<00:01, 35.75it/s]

  entity     score  index       word  start  end
0   I-AE  0.551295     28  synthesis    180  189


 78%|████████████████████████████████████████████████████████████████                  | 168/215 [00:04<00:01, 41.22it/s]

  entity     score  index       word  start  end
0   B-AE  0.636648      1  photosens      0    9
1   I-AE  0.577762      2  ##itivity      9   16
  entity     score  index       word  start  end
0   I-AE  0.620246     14  nephritis     94  103


 86%|██████████████████████████████████████████████████████████████████████▌           | 185/215 [00:04<00:00, 43.07it/s]

  entity     score  index   word  start  end
0   I-AE  0.512464      7  rates     26   31


100%|██████████████████████████████████████████████████████████████████████████████████| 215/215 [00:05<00:00, 38.61it/s]


In [111]:
all_aes

['increased levels of ifn in serum',
 'herpes zoster incidence',
 'headaches',
 'injection site induration',
 'photosensitivity']

In [112]:
from IPython.display import display, HTML

def make_scrollable(df, height=500):
    return HTML(f"<div style='max-height:{height}px; overflow:auto'>{df.to_html()}</div>")

paper_aes_df = paper_aes_df.sort_values('start')
make_scrollable(paper_aes_df)

Unnamed: 0,entity,score,index,word,start,end
29,B-AE,0.636648,1,photosens,0,9
30,I-AE,0.577762,2,##itivity,9,16
27,I-AE,0.532042,3,event,16,21
32,I-AE,0.512464,7,rates,26,31
1,B-AE,0.68199,6,increased,34,43
2,I-AE,0.813916,7,levels,44,50
3,I-AE,0.757584,8,of,51,53
4,I-AE,0.53705,9,ifn,54,57
7,I-AE,0.58395,12,overall,57,64
5,I-AE,0.743058,10,in,58,60


In [96]:
print(f'{len(list(set(aes)))} recorded in clinical trial, NCT ID: {nct_id}')

print(f'{len(all_aes)} detected in paper, PMID: {test_pmid}')

339 recorded in clinical trial, NCT ID: NCT01769274
27 detected in paper, PMID: 26941184


### Novelty / inaccuracy of trial AEs

In [103]:
unique_aes_to_paper = set(all_aes).difference(set(aes))
unique_aes_to_paper

{'acute',
 'acute pain pain cold allodynia',
 'anosmia',
 'atrial fibrillation',
 'congenital to',
 'enhanced cold pain',
 'episodic',
 'erythromelalgia',
 'extreme',
 'familial',
 'heat',
 'heat hyperalgesia',
 'hyperalgesia',
 'inflammatory pain',
 'insensitivity to pain',
 'loss of perception',
 'moderate to severe inadequately controlled cancer - related pain',
 'neuropathic orofacial pain',
 'neuropathic pain',
 'orofacial neuropathic pain',
 'pain',
 'pain hypersensitivity',
 'painful neuropathytaity',
 'painruciating mechanically evoked pain',
 'postetic',
 'trigeminal'}

In [104]:
unique_aes_to_trial = set(aes).difference(set(all_aes))
len(unique_aes_to_trial)

339

### Trying w OTs reasons for trial stoppage texts

Texts not specific enough in most cases

In [42]:
ots_trial_stop = pd.read_json("hf://datasets/opentargets/clinical_trial_reason_to_stop/data.json", lines=True)
ots_trial_stop_safety = ots_trial_stop[ots_trial_stop["label_descriptions"].apply(lambda x: "Safety_Sideeffects" in x)]
text = list(ots_trial_stop_safety["text"])

display_widget(text=text, placeholder='Trial stoppage AEs', start_statement='OTs, reasons for trial stoppage')

OTs, reasons for trial stoppage


HBox(children=(HTML(value='- 1. Very low enrollment rate. Ê Ê 2. Recent studies question the effect of adding …

In [36]:
# pipe = pipeline(task="token-classification", model="MutazYoune/BiomedBERT-Adverse-Events-NER_pun", tokenizer="MutazYoune/BiomedBERT-Adverse-Events-NER_pun")
pipe = pipeline(task="token-classification", model="MutazYoune/Medical-NER-Adverse-Events-NER", tokenizer="MutazYoune/Medical-NER-Adverse-Events-NER")
all_sections = []
for x in tqdm(text):
    try:
        # print(x)
        res = pipe(x)
        if res:
            # print([x['word'] for x in res])
            # print('\n')
            section_df = pd.DataFrame(res)
            # pprint(section_df)
            all_sections.append(section_df)
    except:
        continue
ots_trial_stoppage_aes = pd.concat(all_sections, ignore_index=True)

Device set to use mps:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


1. Very low enrollment rate. Ê Ê 2. Recent studies question the effect of adding panitumumab in this category of patients. Ê Ê 3. Too high toxicity rate


  1%|▍                                          | 2/211 [00:00<00:49,  4.26it/s]

2 complications with midazolam
5 immune related serious adverse events in Phase 1 study


  1%|▌                                          | 3/211 [00:00<00:47,  4.36it/s]

['▁immune', '▁related', '▁serious', '▁adverse', '▁events']


A 300 patient safety analysis by the Data Monitoring Committee showed a trend towards higher mortality in the treatment group.


  2%|▊                                          | 4/211 [00:00<00:45,  4.52it/s]

A cluster of adverse events in everolimus arm was noted.


  2%|█                                          | 5/211 [00:01<00:46,  4.44it/s]

A cluster of deaths in the BCG-arm compared with controls


  3%|█▏                                         | 6/211 [00:01<00:46,  4.45it/s]

A re-evaluation of research risks to participants were greater than originally anticipated


  3%|█▍                                         | 7/211 [00:01<00:45,  4.52it/s]

Adverse findings from nonclinical carcinogenicity studies.


  4%|█▋                                         | 8/211 [00:01<00:47,  4.29it/s]

After interim analysis it was determined that the risks were too great in comparision to theÊ results


  4%|█▊                                         | 9/211 [00:02<00:45,  4.41it/s]

After reports of turbidity in urine in 4 of 8 volunteers in the 4th cohort, the study was halted temporarily."


  5%|█▉                                        | 10/211 [00:02<00:45,  4.42it/s]

['▁in', '▁urine']


After review of safety events and have decided that further dose escalation of MEDI-507 as a single agent is not feasible.
All patients during the study period were subjected to peripheral blocks or were excluded fromÊ the study due to exclusion criteria


  6%|██▍                                       | 12/211 [00:02<00:35,  5.61it/s]

An interim analysis revealed a significantly higher persistence/recurrence of complications of portal hypertension in the 8 mm-stent group.


  6%|██▌                                       | 13/211 [00:02<00:38,  5.18it/s]

Animal Toxicity Findings


  7%|██▊                                       | 14/211 [00:03<00:40,  4.89it/s]

Autorities vorbidden this dispositif in other trial
Based on data collected, the combination appeared to be poorly tolearated."


  8%|███▏                                      | 16/211 [00:03<00:34,  5.67it/s]

Based on the data collected, the combination of bevacizumab and sunitinib appeared to be poorly tolerated."


  8%|███▍                                      | 17/211 [00:03<00:36,  5.26it/s]

Because of withdrawal of Avandia sale due to its risks outweigh its benefits
Company stopped production of study drug due to excessive toxicities, lack of efficacy
Concerns about safety at the first interim analysis


  9%|███▉                                      | 20/211 [00:03<00:27,  6.84it/s]

DSMC recommendation based on safety data


 10%|████▏                                     | 21/211 [00:04<00:31,  6.02it/s]

Data Monitoring Committee decision on 22nd August 2011 for safety issues
Date of termination was Feb. 7, 2008. Reasons of termination were due to elevation of liver function tests and long elimination half-life of the compound."


 13%|█████▎                                    | 27/211 [00:04<00:17, 10.81it/s]

Definitive discontinuation according to safety monitoring of death from the 46th patientÊ onwards.
Development of XL999 was stopped due to cardiac toxicities in the subjects
['▁cardiac', '▁toxicities']


Development program ended due to inability to pair with other cytidine analogs and higher riskÊ of hyperlipasemia when not used with 3TC/FTC.
['▁hyper', 'lip', 'as', 'emia']


Dose limiting tox is observed. Study put on full clinical hold.
Dose limiting toxicities on lowest dose level
Due to Health Canada losartan recall, a new formulation being added to the protocol/ temporaryÊ suspension
Due to a single, unexpected serious event, the trial was stopped.
Due to celecoxib safety issues
Due to concerns about potential liver safety (See Detailed Description)


 15%|██████▎                                   | 32/211 [00:04<00:10, 16.85it/s]

Due to emerging safety data


 17%|██████▉                                   | 35/211 [00:04<00:13, 13.43it/s]

Due to extreme toxicity
Due to high incidence of neurological complication in those with ilioinguinal block
Due to potential concerns about liver safety (See Detailed Description)
Due to reported toxicity of Celecoxib at high doses


 18%|███████▎                                  | 37/211 [00:05<00:15, 10.88it/s]

Due to safety concerns
Due to safety; specifically a higher rate of deaths, including fatal infections, in the SGN33AÊ arm versus the control arm


 18%|███████▊                                  | 39/211 [00:05<00:18,  9.53it/s]

Due to the meta-analysis about CV adverse effects of rosiglitazone.
Due to toxicity of cyclophosphamide
During treatment session 3, a subject had a pattern of AEs of severe intensity, suggestive of brainstem toxicity/encephalopathy during lidocaine/saline infusion"
EMD Serono voluntarily decided to terminate this trial after observing increased MS disease activity in the atacicept treatment groups compared to placebo
Effective August 13, 2004: Unanticipated high incidence of post-transplant lymphoproliferativeÊ disorder


 22%|█████████▏                                | 46/211 [00:06<00:13, 11.81it/s]

Effects of aripiprazole was not obvious and showed adverse reaction obviously
Elevation of ALT and AST in some patients.
['▁Elevation', '▁of', '▁ALT']


Excess toxicity


 23%|█████████▌                                | 48/211 [00:06<00:16,  9.87it/s]

['▁Excess', '▁toxicity']


Extreme toxicity in Phase I, study did not proceed to Phase II
['▁toxicity']


FDA Clinical Hold as of 12/21/07 due to safety concerns
FDA concerns regarding Avandamet


 24%|█████████▉                                | 50/211 [00:06<00:17,  9.03it/s]

Failure of an inroducer sheath used in the technique specified by the study. posing a risk. No adverse outcome has occurred in a study patient.


 25%|██████████▎                               | 52/211 [00:07<00:21,  7.33it/s]

Field Safety Notice related to potential ventricular oversensing associated to the PhD featureÊ on Paradym ICDs
First two patients enrolled after trial reopened, developed grade III-IV acute GVHD and Ê subsequently passed away.


 27%|███████████▎                              | 57/211 [00:07<00:14, 10.81it/s]

Following the recall of Ionsys, the study has been stopped. Data analysis will be undertaken in accordance with the Analysis Plan."
For safety reasons the recruitment was halted prematurely. Patients on the trial continute to Ê receive treatment and are being followed up as part the protocol.
GanLee cancelled study -FDA Draft Guidance: "Clinical Immunogenicity Considerations forÊ Biosimilar & Interchangeable Insulin Products" released 11/25/2019
Given the number of toxicities seen and the difficulty with patient retention in the doseÊ escalation portion, the sponsor decided to close the trial.
Hepatic Safety Signal Identified.
Hepatic safety signal identified.
High dose citrulline treatment throughout pregnancy in rats may lead to elevated blood pressure in the offspring.


 28%|███████████▉                              | 60/211 [00:07<00:11, 13.44it/s]

Higher than expected rate of acute rejection
In a Phase II study in HIV-infected patients there were a number of seizures, although exactÊ causality could not be assessed phase 1 activity was terminated.


 29%|████████████▎                             | 62/211 [00:07<00:13, 10.65it/s]

['▁seizures']


Inadequate toxicity
['▁Inadequate', '▁toxicity']


Incidence of GI Perforation
Incidence of abnormalities of liver function tests is higher than expected in this population.


 32%|█████████████▎                            | 67/211 [00:08<00:13, 10.82it/s]

Interim safety analysis
Investigations on a Suspected Unexpected Serious Adverse Reaction (SUSAR)
Investigator letter from drug manufacturer stating animal studies showed increased risk of Ê cancer which was an unknown adverse event
Lack of response activity in the setting of an unacceptable toxicity profile


 34%|██████████████▏                           | 71/211 [00:08<00:09, 14.42it/s]

Last subject enrolled experienced bradycardia; study was terminated by Sponsor.
['▁bradycardia']


Major side-effects
Melagatran/ximelagatran was withdrawn from the market and clinical development in February 2006 in the interest of patient safety.
Melagatran/ximelagatran was withdrawn from the market and clinical development in February Ê 2006 in the interest of patient safety.


 36%|███████████████▏                          | 76/211 [00:09<00:12, 11.13it/s]

More cases of Febrile Neutropenia were observed in experimental group compared to standard treatment.
['▁Feb', 'rile', '▁Neu', 'trop', 'enia']


More than 2 of 6 patients treated experienced dose limiting toxicities.
['▁dose', '▁limiting', '▁toxicities']


Nephrotoxicity
['▁Nephro', 'toxicity']


New compounds available in the indication (nivolumab/pembrolizumab), toxicity of ipilimumab
Newly identified safety concerns have changed the risk and benefit considerations


 39%|████████████████▎                         | 82/211 [00:09<00:08, 15.05it/s]

Observed lower cure rates and higher mortality rates in one of the treatment groups.
Occurrence of severe respiratory depression in the remifentanil group.
['▁severe', '▁respiratory', '▁depression']


On 25April2014, study was terminated before any dosing due to an AE of safety concern thatÊ occurred in protocol B1731003 with the same drug.
Other - Higher than expected incidence of hearing loss
['▁Higher', '▁than', '▁expected', '▁incidence', '▁of', '▁hearing', '▁loss']


Patient safety - Unacceptable toxicity


 41%|█████████████████▎                        | 87/211 [00:09<00:07, 17.52it/s]

Poor tolerability of drug and side effects
Post-marketing reports of hepatotoxicity associated with obeticholic acid emerged in June 2017,Ê investigators temporarily halted patient recruitment June 2017.
['▁hepato', 'toxicity']


Potential hepatic safety signal
Pre-Clinical Safety Tox Findings
Protocol was stopped due to the safety issue in Global


 43%|█████████████████▉                        | 90/211 [00:09<00:06, 18.45it/s]

Questions of the benefit efficacy/risks of ddI during the meal not resolved
Reactogenicity met study halting criteria
Recruitment suspended until DSMB reviews a Serious Adverse Event
Reports of significant laboratory abnormalities and adverse events in a number of clinical Ê study participants.
Repros stopped the study for safety and FDA put the study on hold for safety.


 45%|██████████████████▉                       | 95/211 [00:10<00:07, 14.73it/s]

Rofecoxib was withdrawn from the market due to safety concerns.
SAE- risk of overall protocol treatment outweighs benefits
SAEs with n=1 serotonin syndrome, n=2 SAEs after sertraline interruption.
Safety


 47%|███████████████████▋                      | 99/211 [00:10<00:08, 12.90it/s]

Safety Issue: The trial was prematurely terminated on Dec 9, 2010, due to safety concerns,Ê specifically new emerging evidence of hepatic injury.
Safety Issues
Safety Review
Safety concerns
Safety concerns in the treatment arm


 49%|████████████████████▏                    | 104/211 [00:10<00:06, 15.83it/s]

Safety concerns with the COX-2 specific inhibitor class of drug.
Safety concerns.
Safety evaluation due to recent publications.
Safety issue
Safety issue observed on another hepatocellular study using OSI906


 53%|█████████████████████▊                   | 112/211 [00:11<00:03, 25.67it/s]

Safety issues
Safety issues related to outpatient intravenous infusion in India
Safety of patients
Safety reasons
Safety reasons, though no safety issues arose."
Safety-related
Serious adverse events with prednisolone, primarily temporary growth retardation, <5thÊ percentile.
['▁growth', '▁retardation']


Several enrolled subjects experienced hypotensionafter receiving Nicardipine and after revier the Principal investigator decided to stop enrollment.


 57%|███████████████████████▎                 | 120/211 [00:11<00:03, 25.84it/s]

['▁hypotension']


Side effects valuation
Significant Adverse Effects - Futility
['▁F', 'utility']


Significant Toxicities Experienced
['▁Significant', '▁Toxic', 'ities']


Some patients experienced asymptomatic, transient elevations in liver transaminases"
['▁elevations', '▁in', '▁liver', '▁trans', 'amina', 'ses']


Sorafenib administered in the combination with pemetrexed-carboplatin appears to enhance thrombocytopenia compared to historical data.
['▁thrombocytopenia']


Stopped at the request of the Data Safety Monitoring Board for safety reasons.


 58%|███████████████████████▉                 | 123/211 [00:11<00:03, 25.36it/s]

Stopped by DSMB due to increased wound infection rate.
Stopping rule-acute rejection threshold-was met based on local biopsy results
Study closed due to unanticipated toxicity/risks to subjects.
Study halted on Korean PI request, to await ACIP evaluation of febrile seizure reports and final CDC recommendation regarding product useage"
Study inactivated due to detection of hyperintensities of unclear etiology on brain MRI. Protocol discontinued with clinical &amp; radiological follow up continuing


 61%|█████████████████████████                | 129/211 [00:11<00:04, 17.81it/s]

Study prematurely terminated due to safety concerns leading to clinical hold
Study stopped because of toxicity concerns.
Study stopped due to increased cardiovascular risks associated with Celebrex
Study suspended because of safety reasons
Study terminated for safety and the FDA put the study on hold for safety.


 63%|█████████████████████████▋               | 132/211 [00:12<00:04, 18.14it/s]

Study was discontinued due to FDA recommendations of the potential toxicities of theÊ combination of drugs.
Study was stopped because of safety concerns.
Study was stopped due to time dependent drug accumulation
Study was stopped early due to lack of efficacy signal and safety concerns


 65%|██████████████████████████▊              | 138/211 [00:12<00:03, 19.02it/s]

Study was terminated due to cardiac toxicities
['▁cardiac', '▁toxicities']


Study was terminated due to cardiac toxicities in the subejcts
['▁cardiac', '▁toxicities']


Study was terminated due to cardiac toxicities in the subjects
['▁cardiac', '▁toxicities']


Suspended due to lorcaserin recall
Suspension due to a safety concern, while risk/benefit is being assessed"
Suspension due to a safety concern, while risk/benefit is being assessed."


 68%|███████████████████████████▉             | 144/211 [00:12<00:02, 23.05it/s]

Terminated (halted prematurely) due to tolerability issues.
Terminated by sponsor due to general risk that aranesp caused tumor progression
Terminated due to dose limiting toxicity
Terminated due to poor enrollment and grade 3 toxicities noted during an interim analysis.
['▁grade', '▁3', '▁toxicities']


Terminated due to safety concerns
Terminated due to safety concerns.
Terminated for safety reasons


 72%|█████████████████████████████▎           | 151/211 [00:12<00:02, 23.67it/s]

Terminated: Test article, Omontys, was recalled from the market; Enrollment has haltedÊ prematurely and will not resume; participants are no longer being treated
Termination of study was due to safety reasons
The DMC detected an imbalance in safety outcomes between the two groups.
The DSMB recommended stopping the study due to safety concerns.
The DSMB stopped the trial due to unacceptable side effects in the experimental arm which has Ê not yet been verified


 74%|██████████████████████████████▌          | 157/211 [00:13<00:02, 24.04it/s]

The European Medical Agency granted a Paediatric Investigational Product-specific waiver on theÊ grounds that idelalisib is likely to be unsafe in paediatrics
The decision to terminate this study after 202 mg/m2 cohort was taken as MTD achieved in other study at 185 mg/m2
The results of the dose escalation phase did not identify a well-tolerated dose that would Ê permit further study in Phase 2.
The risk-benefit ratio for patients has changed since we initiated the study, and that theÊ study in its current form cannot be justified
The risk-benefit ratio for the use of onercept in this condition was not sufficiently Ê favorable to justify continued development


 76%|███████████████████████████████          | 160/211 [00:13<00:02, 21.89it/s]

The safety profile of combination therapy became unacceptable.
The stopping rule for incidence of acute rejection was met.
The study is prematurely terminated due to a safety issue
The study terminated for safety reasons.


 79%|████████████████████████████████▍        | 167/211 [00:13<00:01, 23.42it/s]

The study was &quot;withdrawn&quot; due to certain adverse events [hypersensitivity].
The study was stopped because of safety concerns
The study was stopped due to safety concerns
The study was terminated based on a recommendation of the DSMB following the identification of two patients with significant elevations in serum transaminases
The study was terminated because of Early Treatment Failure in child.The justification for this decision are concerns about safety of children.
The study was terminated due to hepatoxicity of compound


 81%|█████████████████████████████████▏       | 171/211 [00:13<00:01, 25.55it/s]

['▁he', 'pa', 'toxicity']


The study was terminated in the interest of patient safety.
This study was terminated based on preliminary safety data.
This study was terminated on January 6, 2010, due to toxicology findings in animals exposed to Ê PF-04455242 for three months.
This trial is being closed based on lack of substantive efficacy, slow accrual and overallÊ tolerance in patients treated to date.
To many infants in the experimental group developed hypertriglyceridemia
['▁hyper', 'tri', 'glyceride', 'mia']


Too frequent grade 3-4 toxicities


 84%|██████████████████████████████████▍      | 177/211 [00:13<00:01, 23.49it/s]

Toxicity
Two patients in the first dose level be counted as reaching DLT. DSMB recommend terminatedÊ early this trial.
Unacceptable Neurotoxicity (2 cases)
['acceptable', '▁Neuro', 'toxicity']


Unacceptable incidence of wound dehiscence in the PDS group
Unexpected adverse event


 87%|███████████████████████████████████▌     | 183/211 [00:14<00:01, 22.33it/s]

Unexpected safety events.
Unexpected side-effects: reversible and mild to moderate neurological impairment
['▁mild', '▁neurological', '▁impairment']


Unrelated serious adverse events involving one of the proposed medications
Unsatisfactory filter deployment in several cases
Unusual high frequency of elevated liver function tests.


 88%|████████████████████████████████████▏    | 186/211 [00:14<00:01, 21.33it/s]

['▁elevated', '▁liver', '▁function', '▁tests']


When used appear DNE3 toxic properties. The DSMB determined that due to higher than expected Ê risk and safety concerns the study should be closed.
Withdrawn due to &quot;toxicity&quot; problems
Withdrawn due to an excess of toxic deaths
Withdrawn due to drug toxicity
Withdrawn due to toxicity


 91%|█████████████████████████████████████▌   | 193/211 [00:14<00:00, 24.76it/s]

closed due to toxicity
drug-related harm
due to celecoxib safety issues
due to multiple subjects reporting pain and burning at the phlebotomy sites after 38hr
due to strong side effect
during Period 1 due to numerous orthostatic AEs that occurred. Subsequently, RLD was reduced to 5 mg due to safety concerns."
enrollment stopped for safety issues


 95%|██████████████████████████████████████▊  | 200/211 [00:14<00:00, 26.65it/s]

four adverse events encountered
higher incidence of pain related sexual dysfunction in the TO arm
['▁pain', '▁related', '▁sexual', '▁dysfunction']


investigation of adverse events
negative outcome
post-surgical complications in the last 2 patients
potential harm of insulin infusion outweights the benefit.
reaching of step 1 (recruitment of 8 patients) per protocol -&gt; risk assessment -&gt; termination because of occurance of toxicity Grade 3 and 4


 98%|████████████████████████████████████████▏| 207/211 [00:15<00:00, 19.84it/s]

safety concerns regarding use of rosiglitazone
safety issues (toxicity)
safety reasons
side effect profile did not match expectations
toxicities required dose reduction compromising effectiveness
toxicity


100%|█████████████████████████████████████████| 211/211 [00:15<00:00, 13.57it/s]

unexpected level of toxicity
voluntarily by Sponsor to investigate an anticipated SAE





In [32]:
pd.set_option('display.max_columns', None)
ots_trial_stoppage_aes

Unnamed: 0,entity,score,index,word,start,end
0,B-AE,0.977275,2,immune,2,8
1,I-AE,0.981264,3,related,9,16
2,I-AE,0.986399,4,serious,17,24
3,I-AE,0.994388,5,adverse,25,32
4,I-AE,0.994540,6,events,33,39
...,...,...,...,...,...,...
129,B-AE,0.833021,4,pain,20,24
130,I-AE,0.876938,5,related,25,32
131,I-AE,0.484166,6,sexual,33,39
132,I-AE,0.989740,7,dysfunction,40,51


### ADE corpus

In [49]:
df = pd.read_parquet("hf://datasets/ade-benchmark-corpus/ade_corpus_v2/Ade_corpus_v2_drug_ade_relation/train-00000-of-00001.parquet")
text = list(df["text"])

# pipe = pipeline(task="token-classification", model="MutazYoune/BiomedBERT-Adverse-Events-NER_pun", tokenizer="MutazYoune/BiomedBERT-Adverse-Events-NER_pun")
pipe = pipeline(task="token-classification", model="MutazYoune/Medical-NER-Adverse-Events-NER", tokenizer="MutazYoune/Medical-NER-Adverse-Events-NER")
all_sections = []
for x in tqdm(text):
    try:
        print(x)
        res = pipe(x)
        if res:
            print([x['word'] for x in res])
            print('\n')
            section_df = pd.DataFrame(res)
            # pprint(section_df)
            all_sections.append(section_df)
    except:
        continue
ade_bench = pd.concat(all_sections, ignore_index=True)

ReadTimeout: (ReadTimeoutError("HTTPSConnectionPool(host='huggingface.co', port=443): Read timed out. (read timeout=None)"), '(Request ID: 88e37dda-af95-4a31-aa4f-697ac6c0ec69)')

In [47]:
df = pd.read_parquet("hf://datasets/ade-benchmark-corpus/ade_corpus_v2/Ade_corpus_v2_drug_ade_relation/train-00000-of-00001.parquet")
df

Unnamed: 0,text,drug,effect,indexes
0,Intravenous azithromycin-induced ototoxicity.,azithromycin,ototoxicity,"{'drug': {'start_char': [12], 'end_char': [24]..."
1,"Immobilization, while Paget's bone disease was...",dihydrotachysterol,increased calcium-release,"{'drug': {'start_char': [91], 'end_char': [109..."
2,Unaccountable severe hypercalcemia in a patien...,dihydrotachysterol,hypercalcemia,"{'drug': {'start_char': [84], 'end_char': [102..."
3,METHODS: We report two cases of pseudoporphyri...,naproxen,pseudoporphyria,"{'drug': {'start_char': [58], 'end_char': [66]..."
4,METHODS: We report two cases of pseudoporphyri...,oxaprozin,pseudoporphyria,"{'drug': {'start_char': [71], 'end_char': [80]..."
...,...,...,...,...
6816,Lithium treatment was terminated in 1975 becau...,Lithium,lithium intoxication,"{'drug': {'start_char': [0], 'end_char': [7]},..."
6817,Lithium treatment was terminated in 1975 becau...,lithium,lithium intoxication,"{'drug': {'start_char': [52], 'end_char': [59]..."
6818,Eosinophilia caused by clozapine was observed ...,clozapine,Eosinophilia,"{'drug': {'start_char': [23], 'end_char': [32]..."
6819,Eosinophilia has been encountered from 0.2 to ...,clozapine,Eosinophilia,"{'drug': {'start_char': [55], 'end_char': [64]..."


## Early phase kidney failure trials

In [5]:
nct_id = "NCT06901102"
nct_id = "NCT06901011"
query = f"""
SELECT 
	c.names AS indication,
    s.{nct_id},
    s.brief_title AS study_title,
    s.phase AS study_phase,
	ae.adverse_event_term as adverse_event,
    ae.event_type,
	ae.organ_system,
	ae.subjects_at_risk,
    ae.subjects_affected as subjects_affected,
	ae.ctgov_group_code,
    -- ae.description,
	ae.result_group_id
FROM 
    "reported_events" as ae
JOIN
	all_conditions as c on c.nct_id = ae.nct_id
JOIN
    studies as s ON s.nct_id = ae.nct_id
WHERE
	ae.nct_id = '{nct_id}';
"""

In [6]:
study_title, aes, severe_aes, other_aes, patient_groups = aact_data_gather(nct_id, query)
# TODO Get % affected / see relevance of 'other' vs 'serious'
if study_title:
    print(
        f"For CT.gov trial:\n\t'{study_title}' ({nct_id}),\n{len(aes)} unique AEs were recorded in {len(patient_groups)} patient group(s)\n")
    # [print(f"- {a}\n") for a in aes]
else:
    print(f"No trials were found searching for ID: {nct_id}")

UndefinedColumn: column s.nct06901011 does not exist
LINE 4:     s.NCT06901011,
            ^