In [1]:
# Import dependencies
%matplotlib inline
import pandas as pd
import numpy as np
import os 
from collections import OrderedDict
pd.set_option('display.width',100000)
pd.set_option('max_colwidth',4000)
import matplotlib.pyplot as plt
import xml.etree.ElementTree
import spacy
from spacy.matcher import Matcher
import textacy

FDA_DIR = "/Users/charityhilton/repos/FDA_AE_challenge_2019"


import claritynlp_notebook_helpers as claritynlp



ClarityNLP notebook helpers loaded successfully!


In [2]:
labels_and_info = pd.read_csv("./data/labels_and_info.csv")
labels_and_info.head()

Unnamed: 0.1,Unnamed: 0,file,len,reason,type,meddra_llt,meddra_pt,meddra_pt_id,description
0,0,impavido,4,general_term,NonOSE_AE,Died in sleep,Death,10011906,"General terms or non-specific text such as broad categories (e.g., MedDRA system organ class) used to introdue AEs or text describe an outcome (e.g., death) rather than an AE. These are not of interest."
1,1,impavido,4,general_term,NonOSE_AE,Died in sleep,Death,10011906,"General terms or non-specific text such as broad categories (e.g., MedDRA system organ class) used to introdue AEs or text describe an outcome (e.g., death) rather than an AE. These are not of interest."
2,2,impavido,26,general_term,NonOSE_AE,Disorder gastrointestinal,Gastrointestinal disorder,10017944,"General terms or non-specific text such as broad categories (e.g., MedDRA system organ class) used to introdue AEs or text describe an outcome (e.g., death) rather than an AE. These are not of interest."
3,3,impavido,17,general_term,NonOSE_AE,Ill-defined disorder,Ill-defined disorder,10061520,"General terms or non-specific text such as broad categories (e.g., MedDRA system organ class) used to introdue AEs or text describe an outcome (e.g., death) rather than an AE. These are not of interest."
4,4,impavido,34,general_term,NonOSE_AE,Metabolic disorder,Metabolic disorder,10058097,"General terms or non-specific text such as broad categories (e.g., MedDRA system organ class) used to introdue AEs or text describe an outcome (e.g., death) rather than an AE. These are not of interest."


In [3]:
reason_counts = pd.DataFrame(labels_and_info.groupby(['type','reason', 'description'])['type','reason', 'description'].size().reset_index(name='counts')).sort_values(['counts'], ascending=False)
reason_counts

Unnamed: 0,type,reason,description,counts
4,NonOSE_AE,AE_only_as_instruction,"AES mentioned in instructions are often mentioned in a hypothetical context, with instructions for what to do if they develop. These AES are not of interest.",3384
7,NonOSE_AE,general_term,"General terms or non-specific text such as broad categories (e.g., MedDRA system organ class) used to introdue AEs or text describe an outcome (e.g., death) rather than an AE. These are not of interest.",2122
14,Not_AE_Candidate,preexisting_condition_or_risk_factor,"Mentions that describe a condition that developed prior to applying the medication of interest, or condition that increases the likelihood of developing a disease or injury. These mentions are not AEs.",1882
12,Not_AE_Candidate,indication,A clinical symptom or circumstance for which the use of the drug of interest would be appropriate. These mentions are not AEs.,1434
8,NonOSE_AE,manifestation_or_complication,"Text describing signs, symptoms, or changes in lab resuts related to the manifestations of an AE and the sequelae of an AE are not of interest.",1144
2,NonOSE_AE,AE_from_drug_interaction,AEs that result from drug-drug interaction or co-administration are not of interest.,328
5,NonOSE_AE,AE_rate_lteq_placebo,Aes with incidence rate equal to or lower than placebo are not of interest.,306
9,NonOSE_AE,negation,AE whose presence or occurrence is negated or denied. These AEs are not of interst.,245
0,NonOSE_AE,AE_animal,AEs observed in animal data are not of interest.,241
6,NonOSE_AE,OD_or_withdrawal,"AE associated with discontinuing a medication or taking more than the prescribed amount. Drug overdoes and withdrawal do not generally occur when a drug is used as indicated. Additionally, in the context of pharmacovigilance, identifying AEs associated with the drug when used as indicated is the highest priority. These AEs are not of interest.",206


In [4]:
feat_mat = pd.read_csv("./data/feature_matrix.csv", header=0, index_col=0)
feat_mat.head()

Unnamed: 0,file,sent_id,sent_start,sent_end,sentence,nonoseaeaeonlyasinstruction,nonoseaegeneralterm,notaecandidatepreexistingconditionorriskfactor,notaecandidateindication,nonoseaemanifestationorcomplication,nonoseaeaefromdruginteraction,nonoseaeaeratelteqplacebo,nonoseaenegation,nonoseaeaeanimal,nonoseaeodorwithdrawal,nonoseaeaefromofflabel,notaecandidatecontraindication,nonoseaeaeforanotherdruginclass,notaecandidateother,nonoseaeother
0,carboplatin,0,0,49,"[' ADVERSE REACTIONS\n', '\n', ' For a comparison of toxicities when carboplatin or cisplatin was given in combination with cyclophosphamide, see CLINICAL STUDIES, Use with Cyclophosphamide for Initial Treatment of notaecandidateindication Comparative Toxicity .",0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
1,carboplatin,1,49,155,"\n', '\n', ' ADVERSE EXPERIENCES IN PATIENTS WITH OVARIAN CANCER \n', ' First Line Combination Therapy [note: Use with Cyclophosphamide for Initial Treatment of notaecandidateindication Data are based on the experience of 393 patients with notaecandidateindication (regardless of baseline status) who received initial combination therapy with carboplatin and cyclophosphamide in two randomized controlled studies conducted by SWOG and NCIC (see CLINICAL STUDIES).Combination with cyclophosphamide as well as duration of treatment may be responsible for the differences that can be noted in the adverse experience table.]",0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
2,carboplatin,2,155,157,Percent,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,carboplatin,3,157,163,Second Line Single Agent,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,carboplatin,4,163,165,Therapy,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [10]:
def filter_feature_matrix(list_of_reasons, f_matrix, keep_only_true=True, replace_reason_name_w_noun=False, reason_replace_dict=None):
    
    '''
    We can use this function to subset the feature matrix by ground-truth label(s) and/or to keep only rows where the value for >=1 of the labels we've selected is TRUE
    '''
    
    cols = ["file", "sent_id", "sent_start", "sent_end", "sentence"]
    cols += list_of_reasons
    
    temp = f_matrix.copy()
    
    if replace_reason_name_w_noun and reason_replace_dict is not None:

        for r in list_of_reasons:
        
            temp.loc[:, "sentence"] = ["{}".format(sent).replace(r, reason_replace_dict[r]) for sent in temp.loc[:, "sentence"]]
    
    # For initial review, it can be helpful to only review the positive instances
    if keep_only_true:
        
        subset_df = temp.loc[:, cols]
        subset_df['row_sum'] = temp.loc[:, list_of_reasons].sum(axis=1)  
        
        return subset_df[subset_df["row_sum"] >= 1]
    
    # But it's important not to forget the sentences w/ FALSE values for the subset of labels we've specified 
    else:
        return temp.loc[:, [cols]]
    
def generate_ngrams(f_mat, r_list, n=3, filter_punc=True, filter_ngrams_by_r=True):
    
    ngram_df = pd.DataFrame()
    
    cols = ["drug_name", "n"]
    
    for i in range(n):
        cols.append("token_{}".format(i))

    counter = 0
    
    for i, row in f_mat.iterrows():
        drug_name = row['file'].lower()
        doc = textacy.Doc(textacy.preprocess_text(u'{}'.format(row['sentence'].lower())), lang='en')
        ngrams = textacy.extract.ngrams(doc, n, filter_stops=False, filter_punct=filter_punc, filter_nums=False)

        if  filter_ngrams_by_r:
            
            #for x in ngrams:
                #print(x, any(reason in str(x) for reason in r_list))
        
            ngrams = [x for x in ngrams if any(reason in str(x).lower() for reason in r_list)]
        
        for ngram in ngrams:
                ngram_df.loc[counter, "drug_name"] = drug_name
                ngram_df.loc[counter, "n"] = n
                ngram_df.loc[counter, "reason"] = None
                
                for i in range(n):
                    ngram_df.loc[counter, "token_{}".format(i)] = ngram[i] if not None else " "
                    
                    if u'{}'.format(ngram[i]).strip() in r_list:
                        #print(ngram[i])
                        ngram_df.loc[counter, 'reason'] = str(ngram[i])
                counter += 1
        
    return ngram_df

def find_ngrams_with_label_not_in_initial_position(ngramdf, r_list):
    
    out = pd.DataFrame()
    
    cols = ["{}_in_pos_0".format(r) for r in r_list]

    for r in r_list:
        
        ngramdf["{}_in_pos_0".format(r)] = 0
        
        for i, row in ngramdf.iterrows():

            if row['token_0'].text == r:
                ngramdf.loc[i, "{}_in_pos_0".format(r)] = 1
                
    
    ngramdf['row_sum'] = ngramdf.loc[:, cols].sum(axis=1)
    print("Min row sum= ", ngramdf['row_sum'].min())
    return ngramdf[ngramdf['row_sum'] == 0]


In [7]:
# We have to consider contraindication here again because pregnancy mentions show up under that type_reason. 
labels_b = ["notaecandidatecontraindication", "nonoseaeodorwithdrawal"] 
subdf_b = filter_feature_matrix(labels_b, feat_mat, keep_only_true=True)
print(subdf_b.shape)


(86, 8)


In [9]:
subdf_b.head()

Unnamed: 0,file,sent_id,sent_start,sent_end,sentence,notaecandidatecontraindication,nonoseaeodorwithdrawal,row_sum
43,valium,43,2398,2464,"Abrupt withdrawal of Valium in such cases may also be associated with a temporary increase in the frequency and/or severity of nonoseaeodorwithdrawal \n', '\n', ' Pregnancy\n', '\n', ' An increased risk of oselabeledaeclasseffect and other oselabeledaeclasseffect associated with the use of benzodiazepine drugs during notaecandidatepreexistingconditionorriskfactor has been suggested.",0,1,1
68,cytoxan,68,2219,2266,"nonoseaeaeonlyasinstruction can be nonoseaegeneralterm oselabeledaefromdruguse can occur with short-term or long-term use of cyclophosphamide.\n', '\n', ' Before starting treatment, exclude or correct any notaecandidatecontraindication [see Contraindications (4) ].",1,0,1
79,anoro,79,2355,2451,"Increasing the daily dose of ANORO ELLIPTA beyond the recommended dose is not appropriate in this situation.\n"", '\n', '\n', '\n', ' 5.3 Excessive Use of ANORO ELLIPTA and Use With Other Long-Acting Beta2-Agonists\n', '\n', '\n', '\n', ' ANORO ELLIPTA should not be used more often than recommended, at higher doses than recommended, or in conjunction with other medicines containing LABA, as an nonoseaeodorwithdrawal may result.",0,1,1
80,anoro,80,2451,2470,Clinically significant nonoseaeodorwithdrawal and fatalities have been reported in association with excessive use of inhaled sympathomimetic drugs.,0,1,1
3,lioresal,3,199,242,"Eleven patients who developed nonoseaeodorwithdrawal secondary to nonoseaeodorwithdrawal had their treatment temporarily suspended, but all were subsequently re-started and were not, therefore, considered to be true discontinuations.\n', '\n', ' nonoseaegeneralterm",0,1,1


In [17]:
ngramdf_b = generate_ngrams(subdf_b, labels_b, n=5, filter_punc=True, filter_ngrams_by_r=True)
ngramdf_b.shape

(425, 8)

In [19]:
ngramdf_b.head()

Unnamed: 0,drug_name,n,reason,token_0,token_1,token_2,token_3,token_4,notaecandidatecontraindication_in_pos_0,nonoseaeodorwithdrawal_in_pos_0,row_sum
0,valium,5.0,nonoseaeodorwithdrawal,frequency,and/or,severity,of,nonoseaeodorwithdrawal,0,0,0
1,valium,5.0,nonoseaeodorwithdrawal,and/or,severity,of,nonoseaeodorwithdrawal,\n,0,0,0
2,cytoxan,5.0,notaecandidatecontraindication,exclude,or,correct,any,notaecandidatecontraindication,0,0,0
3,anoro,5.0,nonoseaeodorwithdrawal,as,an,nonoseaeodorwithdrawal,may,result,0,0,0
4,anoro,5.0,nonoseaeodorwithdrawal,clinically,significant,nonoseaeodorwithdrawal,and,fatalities,0,0,0


In [18]:
# Looking at the n-gram results for these labels can help us build upon our existing knowledge of the world 
# to develop an initial set of terms, and/or patterns
ngramdf_b_non_token_zero = find_ngrams_with_label_not_in_initial_position(ngramdf_b, labels_b)
ngramdf_b_non_token_zero.shape

Min row sum=  0


(262, 11)

In [20]:
ngramdf_b_non_token_zero.head()

Unnamed: 0,drug_name,n,reason,token_0,token_1,token_2,token_3,token_4,notaecandidatecontraindication_in_pos_0,nonoseaeodorwithdrawal_in_pos_0,row_sum
0,valium,5.0,nonoseaeodorwithdrawal,frequency,and/or,severity,of,nonoseaeodorwithdrawal,0,0,0
1,valium,5.0,nonoseaeodorwithdrawal,and/or,severity,of,nonoseaeodorwithdrawal,\n,0,0,0
2,cytoxan,5.0,notaecandidatecontraindication,exclude,or,correct,any,notaecandidatecontraindication,0,0,0
3,anoro,5.0,nonoseaeodorwithdrawal,as,an,nonoseaeodorwithdrawal,may,result,0,0,0
4,anoro,5.0,nonoseaeodorwithdrawal,clinically,significant,nonoseaeodorwithdrawal,and,fatalities,0,0,0


## NOTES HERE

* temporality - suddeness
* chemical compounds

## NEW THINGS START HERE

#### Detecting Overdose and Withdrawal

In [22]:
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5977697/

overdose_df =  subdf_b[subdf_b['sentence'].str.contains("nonoseaeodorwithdrawal")]
overdose_df.shape

(43, 8)

In [62]:
overdose_df.head(n=10)

Unnamed: 0,file,sent_id,sent_start,sent_end,sentence,notaecandidatecontraindication,nonoseaeodorwithdrawal,row_sum
43,valium,43,2398,2464,"Abrupt withdrawal of Valium in such cases may also be associated with a temporary increase in the frequency and/or severity of nonoseaeodorwithdrawal \n', '\n', ' Pregnancy\n', '\n', ' An increased risk of oselabeledaeclasseffect and other oselabeledaeclasseffect associated with the use of benzodiazepine drugs during notaecandidatepreexistingconditionorriskfactor has been suggested.",0,1,1
79,anoro,79,2355,2451,"Increasing the daily dose of ANORO ELLIPTA beyond the recommended dose is not appropriate in this situation.\n"", '\n', '\n', '\n', ' 5.3 Excessive Use of ANORO ELLIPTA and Use With Other Long-Acting Beta2-Agonists\n', '\n', '\n', '\n', ' ANORO ELLIPTA should not be used more often than recommended, at higher doses than recommended, or in conjunction with other medicines containing LABA, as an nonoseaeodorwithdrawal may result.",0,1,1
80,anoro,80,2451,2470,Clinically significant nonoseaeodorwithdrawal and fatalities have been reported in association with excessive use of inhaled sympathomimetic drugs.,0,1,1
3,lioresal,3,199,242,"Eleven patients who developed nonoseaeodorwithdrawal secondary to nonoseaeodorwithdrawal had their treatment temporarily suspended, but all were subsequently re-started and were not, therefore, considered to be true discontinuations.\n', '\n', ' nonoseaegeneralterm",0,1,1
243,lioresal,243,3117,3137,"nonoseaeodorwithdrawal has been reported following baclofen nonoseaeaeonlyasinstruction \n', '\n', ' BOXED WARNING:",0,1,1
244,lioresal,244,3137,3221,"\n', '\n', ' oselabeledaefromdruguse regardless of the cause, has resulted in sequelae that include nonoseaeodorwithdrawal nonoseaeodorwithdrawal nonoseaeodorwithdrawal and nonoseaeodorwithdrawal that in rare cases has advanced to nonoseaeodorwithdrawal nonoseaeodorwithdrawal and nonoseaegeneralterm \n', '\n', ' Prevention of abrupt discontinuation of intrathecal baclofen requires careful attention to programming and monitoring of the infusion system, refill scheduling and procedures, and pump alarms.",0,1,1
250,lioresal,250,3534,3552,Subcutaneous injection may result in symptoms of a systemic nonoseaeodorwithdrawal or early depletion of the reservoir.,0,1,1
253,lioresal,253,3660,3823,"oselabeledaemedicationerror through the catheter access port may cause a life-threatening nonoseaeodorwithdrawal \n', '\n', ' Additional considerations pertaining to dosage adjustment\n', '\n', ' It may be important to titrate the dose to maintain some degree of muscle tone and allow occasional nonoseaeaeonlyasinstruction to: 1) help support circulatory function, 2) possibly prevent the formation of nonoseaeaeonlyasinstruction 3) optimize activities of daily living and ease of care.\n', '\n', ' Except in nonoseaeaeonlyasinstruction related emergencies, the dose of LIORESAL INTRATHECAL should ordinarily be reduced slowly if the drug is discontinued for any reason.\n', '\n', ' An attempt should be made to discontinue concomitant oral antispasticity medication to avoid possible nonoseaeaeonlyasinstruction or adverse drug interactions, either prior to screening or following implant and initiation of chronic LIORESAL INTRATHECAL infusion.",0,1,1
262,lioresal,262,4075,4353,"In some cases, performance of an imaging procedure may be appropriate to confirm or rule-out the diagnosis of an nonoseaeaeonlyasinstruction \n', '\n', ' Precautions in special patient populations\n', '\n', ' Careful dose titration of LIORESAL INTRATHECAL is needed when nonoseaeaeonlyasinstruction is necessary to sustain upright posture and balance in locomotion or whenever nonoseaeaeonlyasinstruction is used to obtain optimal function and care.\n', '\n', ' Patients suffering from notaecandidatepreexistingconditionorriskfactor notaecandidatepreexistingconditionorriskfactor or notaecandidatepreexistingconditionorriskfactor should be treated cautiously with LIORESAL INTRATHECAL and kept under careful surveillance, because exacerbations of these conditions have been observed with oral administration.\n', '\n', ' LIORESAL INTRATHECAL should be used with caution in patients with a history of notaecandidatepreexistingconditionorriskfactor The presence of nociceptive stimuli or abrupt nonoseaeaeonlyasinstruction of LIORESAL INTRATHECAL (baclofen injection) may cause an nonoseaeodorwithdrawal \n', '\n', ' Because LIORESAL is primarily excreted unchanged by the kidneys, it should be given with caution in patients with notaecandidatepreexistingconditionorriskfactor and it may be necessary to reduce the dosage.\n', '\n', ' LABORATORY TESTS\n', '\n', ' No specific laboratory tests are deemed essential for the management of patients on LIORESAL INTRATHECAL.\n', '\n', ' DRUG INTERACTIONS\n', '\n', ' There is inadequate systematic experience with the use of LIORESAL INTRATHECAL in combination with other medications to predict specific drug-drug interactions.",0,1,1
283,lioresal,283,5185,5259,"Acute massive oselabeledaefromdruguse may present as nonoseaeodorwithdrawal Less sudden and/or less severe forms of oselabeledaefromdruguse may present with signs of nonoseaeodorwithdrawal nonoseaeodorwithdrawal nonoseaeodorwithdrawal nonoseaeodorwithdrawal nonoseaeodorwithdrawal nonoseaeodorwithdrawal rostral progression of nonoseaeodorwithdrawal and nonoseaeodorwithdrawal progressing to nonoseaeodorwithdrawal Should nonoseaeaeonlyasinstruction appear likely, the patient should be taken immediately to a hospital for assessment and emptying of the pump reservoir.",0,1,1


In [39]:
labeled_overdose_drugs = set(overdose_df['file'].values)

In [29]:
len(labeled_overdose_drugs)

18

In [None]:
overdose_nlpql = 
'''

phenotype "AE overdose/withdrawal terms";

 include ClarityCore version "1.0" called Clarity;

 termset OverdoseTerms:
    ["overdose", "overdosing", "overdosed", "overdosage", "overdoses", "withdrawal", "withdrawing", "withdrawal"];

documentset Notes:
Clarity.createReportTypeList(["FDA Challenge Drug Label"]);

  define final OverdoseWithdrawalTerms:
Clarity.ProviderAssertion({
  termset: [OverdoseTerms],
  documentset: [Notes]
});
'''

In [36]:
clarity_overdose_drugs_df = pd.read_csv('http://18.220.133.76:5000/job_results/1164/cohort')
clarity_overdose_drugs_df.head()

Unnamed: 0,_id,batch,concept_code,end,experiencer,inserted_date,job_id,negation,nlpql_feature,owner,...,report_id,report_type,section,sentence,solr_id,source,start,subject,temporality,term
0,5c3f35485727ea01d7e4e21f,75,-1,63,Patient,2019-01-16 13:44:40.934000,1164,Affirmed,OverdoseWithdrawalTerms,claritynlp,...,FDA_AE_28,FDA Challenge Drug Label,UNKNOWN,"Young animals were slightly more intolerant to acute overdosage and to subtle reductions in erythrocytes, platelets and leukocytes but were less sensitive to toxicity in the liver, kidney, thymus, and genitalia.",FDA_AE_28,FDA_AE_2019,53,BIAXIN,Recent,overdosage
1,5c3f35495727ea01dae4e21f,25,-1,161,Patient,2019-01-16 13:44:41.080000,1164,Affirmed,OverdoseWithdrawalTerms,claritynlp,...,FDA_AE_195,FDA Challenge Drug Label,SEIZURE_FAMILY_HISTORY,"Prescriptions for Savella should be written for the smallest quantity of tablets consistent with good patient management, in order to reduce the risk of overdose.5.2 Serotonin Syndrome",FDA_AE_195,FDA_AE_2019,153,SAVELLA,Recent,overdose
2,5c3f35495727ea01dae4e220,25,-1,10,Patient,2019-01-16 13:44:41.084000,1164,Affirmed,OverdoseWithdrawalTerms,claritynlp,...,FDA_AE_195,FDA Challenge Drug Label,SEIZURE_FAMILY_HISTORY,Withdrawal symptoms have been reported in patients when discontinuing treatment with Savella.,FDA_AE_195,FDA_AE_2019,0,SAVELLA,Recent,Withdrawal
3,5c3f35495727ea01dae4e221,25,-1,138,Patient,2019-01-16 13:44:41.085000,1164,Affirmed,OverdoseWithdrawalTerms,claritynlp,...,FDA_AE_195,FDA Challenge Drug Label,SEIZURE_FAMILY_HISTORY,"During marketing of milnacipran, and other SNRIs and SSRIs, there have been spontaneous reports of adverse events indicative of withdrawal and physical dependence occurring upon discontinuation of these drugs, particularly when discontinuation is abrupt.",FDA_AE_195,FDA_AE_2019,128,SAVELLA,Recent,withdrawal
4,5c3f35495727ea01dae4e222,25,-1,10,Patient,2019-01-16 13:44:41.085000,1164,Affirmed,OverdoseWithdrawalTerms,claritynlp,...,FDA_AE_195,FDA Challenge Drug Label,SEIZURE_FAMILY_HISTORY,Withdrawal symptoms have been reported in patients when discontinuing treatment with Savella.,FDA_AE_195,FDA_AE_2019,0,SAVELLA,Recent,Withdrawal


In [40]:
clarity_overdose_drugs = set([x.lower() for x in clarity_overdose_drugs_df['subject'].values])
len(clarity_overdose_drugs)

29

In [41]:
labeled_overdose_drugs

{'anoro',
 'brilinta',
 'daytrana',
 'dexamethasone',
 'effexor',
 'effient',
 'lexapro',
 'lioresal',
 'paxil',
 'plavix',
 'remeron',
 'savella',
 'seroquel',
 'striverdi',
 'valium',
 'viibryd',
 'vimpat',
 'xarelto'}

In [42]:
clarity_overdose_drugs

{'arcalyst',
 'belsomra',
 'belviq',
 'biaxin',
 'daytrana',
 'dexamethasone',
 'effexor_xr',
 'enbrel',
 'hetlioz',
 'kadcyla',
 'latuda',
 'lexapro',
 'lioresal',
 'movantik',
 'paxil',
 'pennsaid',
 'prepopik',
 'relistor',
 'remeron',
 'renvela',
 'risperdal',
 'sabril',
 'savella',
 'sensipar',
 'seroquel',
 'valium',
 'victoza',
 'vimpat',
 'vyvanse'}

In [44]:
not_found_in_clarity_drugs = labeled_overdose_drugs - clarity_overdose_drugs
not_found_in_clarity_drugs

{'anoro',
 'brilinta',
 'effexor',
 'effient',
 'plavix',
 'striverdi',
 'viibryd',
 'xarelto'}

In [46]:
brilinta =  overdose_df[overdose_df['file'].str.contains("brilinta")]
brilinta

Unnamed: 0,file,sent_id,sent_start,sent_end,sentence,notaecandidatecontraindication,nonoseaeodorwithdrawal,row_sum
90,brilinta,90,2933,2945,Stopping BRILINTA increases the risk of subsequent nonoseaeodorwithdrawal [ see,0,1,1
100,brilinta,100,3301,3383,"\n', '\n', '\n', '\n', ' 5.4 Discontinuation of BRILINTA \n', '\n', '\n', '\n', ' Discontinuation of BRILINTA will increase the risk of nonoseaeodorwithdrawal nonoseaeodorwithdrawal and nonoseaegeneralterm If BRILINTA must be temporarily discontinued (e.g., to treat nonoseaeaeonlyasinstruction or for significant surgery), restart it as soon as possible.",0,1,1


In [None]:
with open('{}/ose_xml_training_20181101/{}.xml'.format(FDA_DIR, 'BRILINTA')) as f:
    contents = f.read()

print(contents)

In [57]:
effient =  overdose_df[overdose_df['file'].str.contains("effient")]
effient

Unnamed: 0,file,sent_id,sent_start,sent_end,sentence,notaecandidatecontraindication,nonoseaeodorwithdrawal,row_sum
113,effient,113,2528,2572,"Discontinuing Effient, particularly in the first few weeks after notaecandidatepreexistingconditionorriskfactor increases the risk of subsequent nonoseaeodorwithdrawal 5 WARNINGS AND PRECAUTIONS\n', '\n', '\n', '\n', ' EXCERPT:",0,1,1
140,effient,140,3840,3993,"In patients who are managed with PCI and stent placement, premature discontinuation of any antiplatelet medication, including thienopyridines, conveys an increased risk of nonoseaeodorwithdrawal nonoseaeodorwithdrawal and nonoseaegeneralterm Patients who require premature discontinuation of a thienopyridine will be at increased risk for nonoseaeodorwithdrawal Lapses in therapy should be avoided, and if thienopyridines must be temporarily discontinued because of an adverse event(s), they should be restarted as soon as possible [see Contraindications ( 4.1 , 4.2 ) and Warnings and Precautions ( 5.1 )] .\n', '\n', '\n', '\n', ' 5.4 Thrombotic Thrombocytopenic Purpura\n', '\n', '\n', '\n', ' oselabeledaefromdruguse has been reported with the use of Effient.",0,1,1


In [59]:
only_clarity_found_drugs = clarity_overdose_drugs - labeled_overdose_drugs
only_clarity_found_drugs

{'arcalyst',
 'belsomra',
 'belviq',
 'biaxin',
 'effexor_xr',
 'enbrel',
 'hetlioz',
 'kadcyla',
 'latuda',
 'movantik',
 'pennsaid',
 'prepopik',
 'relistor',
 'renvela',
 'risperdal',
 'sabril',
 'sensipar',
 'victoza',
 'vyvanse'}

In [61]:
same_drugs = clarity_overdose_drugs.intersection(labeled_overdose_drugs)
same_drugs

{'daytrana',
 'dexamethasone',
 'lexapro',
 'lioresal',
 'paxil',
 'remeron',
 'savella',
 'seroquel',
 'valium',
 'vimpat'}

In [65]:
clarity_overdose_drugs_df = pd.read_csv('http://18.220.133.76:5000/job_results/1175/cohort')
clarity_overdose_drugs = set([x.lower() for x in clarity_overdose_drugs_df['subject'].values])
clarity_overdose_drugs

{'afinitor',
 'androgel',
 'aranesp',
 'arcalyst',
 'aubagio',
 'avastin',
 'belsomra',
 'belviq',
 'benlysta',
 'biaxin',
 'brilinta',
 'carac',
 'carboplatin',
 'celebrex',
 'cyramza',
 'cytoxan',
 'daytrana',
 'dexamethasone',
 'doxil',
 'doxil_liposomal_',
 'effexor',
 'effexor_xr',
 'effient',
 'egrifta',
 'enbrel',
 'erbitux',
 'folotyn',
 'gazyva',
 'genotropin',
 'genotropin_preservative_free',
 'hetlioz',
 'impavido',
 'inlyta',
 'invokana',
 'istodax',
 'janumet',
 'kadcyla',
 'kyprolis',
 'latuda',
 'levaquin',
 'lexapro',
 'linzess',
 'lioresal',
 'lipitor',
 'livalo',
 'lynparza',
 'mekinist',
 'movantik',
 'myrbetriq',
 'onglyza',
 'opsumit',
 'paxil',
 'pennsaid',
 'plavix',
 'prepopik',
 'prograf',
 'raxibacumab',
 'relistor',
 'remeron',
 'remicade',
 'renvela',
 'risperdal',
 'rituxan',
 'sabril',
 'savella',
 'sensipar',
 'seroquel',
 'signifor',
 'simponi',
 'striverdi',
 'thalomid',
 'tradjenta',
 'valium',
 'victoza',
 'viekira',
 'viibryd',
 'vimpat',
 'votrient'

In [66]:
clarity_teratogenic_drugs_df = pd.read_csv('http://18.220.133.76:5000/job_results/1173/cohort')
clarity_teratogenic_drugs = set([x.lower() for x in clarity_teratogenic_drugs_df['subject'].values])
clarity_teratogenic_drugs

{'afinitor',
 'aubagio',
 'avastin',
 'biaxin',
 'carboplatin',
 'cyramza',
 'cytoxan',
 'dexamethasone',
 'doxil',
 'doxil_liposomal_',
 'folotyn',
 'impavido',
 'inlyta',
 'istodax',
 'kadcyla',
 'kyprolis',
 'lynparza',
 'mekinist',
 'opsumit',
 'paxil',
 'thalomid',
 'tobi',
 'valium',
 'votrient',
 'xalkori',
 'xeloda'}

In [67]:
clarity_pregnancy_drugs_df = pd.read_csv('http://18.220.133.76:5000/job_results/1172/cohort')
clarity_pregnancy_drugs = set([x.lower() for x in clarity_pregnancy_drugs_df['subject'].values])
clarity_pregnancy_drugs

{'afinitor',
 'aubagio',
 'avastin',
 'biaxin',
 'carboplatin',
 'celebrex',
 'cyramza',
 'cytoxan',
 'dexamethasone',
 'doxil',
 'doxil_liposomal_',
 'ella',
 'folotyn',
 'furosemide',
 'impavido',
 'inlyta',
 'istodax',
 'kadcyla',
 'kyprolis',
 'lexapro',
 'lioresal',
 'lynparza',
 'mekinist',
 'natroba',
 'opsumit',
 'paxil',
 'pennsaid',
 'premarin',
 'prograf',
 'remeron',
 'remicade',
 'sabril',
 'seroquel',
 'signifor',
 'surfaxin',
 'thalomid',
 'tobi',
 'valium',
 'votrient',
 'xalkori',
 'xarelto',
 'xeloda'}