# Drug Name Entity Classifier
## AHLT - MIRI 2018



## Initialization

Load needed modules and specify the working directory

In [1]:
# Load needed packages
import pandas as pd
import numpy as np
import scipy


import time # Execution time of some blocks
from nltk.tag import StanfordPOSTagger

# sklearn package
from sklearn.metrics import make_scorer
from sklearn.cross_validation import cross_val_score
from sklearn.grid_search import RandomizedSearchCV

import sklearn_crfsuite
from sklearn_crfsuite import scorers
from sklearn_crfsuite import metrics


# Import our defined functions
from NER_functions import *
from datasetBuilder import *
from feature_creation_ner import *



In [2]:
# init time
first_init = time.time()

In [3]:
train_dirs_whereto_parse = ['data/Train/DrugBank','data/Train/MedLine']
test_dirs_whereto_parse = ['data/Test/Test for DrugNER task/DrugBank','data/Test/Test for DrugNER task/MedLine']

## Reading the train and test data from the XML files
Accessing to all the files of the directory and storing id's and text's in two arrays.
We have also added the token 'STOP' at the end of each sentence.

In [4]:
train_texts_entities = createSet(train_dirs_whereto_parse)
test_texts_entities = createSet(test_dirs_whereto_parse)

In [5]:
print(train_texts_entities[3])

('In controlled clinical trials of AUGMENTIN XR, 22 patients received concomitant allopurinol and AUGMENTIN XR.', [('AUGMENTIN XR', 'brand'), ('allopurinol', 'drug'), ('AUGMENTIN XR', 'brand')])


Before computing features, I want the input data to have a special format:

In [6]:
# we want each dataset with the following format: 
# for ['have','Ibuprofeno'] ---- [('hola','V','O'),('Ibuprofeno','NN','B')]

def buildSet(text_entities):
    dataset = []
    a = 0
    for text,drugs in text_entities:
        a+=1
        print(a)
        print('drugs: ',drugs)
        # tokenizing
        tokenized_sentence = nltk.word_tokenize(text)
        # BIO tagging
        tokens_tags = BIOTagger(text, drugs)
        # POS tagging
        tokens_pos = nltk.pos_tag(tokenized_sentence)
        text_triples = []
        for idx,token in enumerate(tokenized_sentence):
            text_triples.append((token,tokens_pos[idx][1],tokens_tags[idx][1]))
        dataset.append(text_triples)
    
    return dataset

# train_set and test_set are list of list of triples; each list of triples refers to a different sentence
train_set = buildSet(train_texts_entities) 
test_set = buildSet(test_texts_entities)

1
drugs:  [('calcium', 'drug'), ('EMCYT', 'brand')]
2
drugs:  [('allopurinol', 'drug'), ('ampicillin', 'drug'), ('ampicillin', 'drug')]
3
drugs:  [('ampicillin', 'drug'), ('allopurinol', 'drug')]
4
drugs:  [('AUGMENTIN XR', 'brand'), ('allopurinol', 'drug'), ('AUGMENTIN XR', 'brand')]
5
drugs:  [('AUGMENTIN XR', 'brand'), ('allopurinol', 'drug')]
6
drugs:  [('broad-spectrum antibiotics', 'group'), ('AUGMENTIN XR', 'brand'), ('contraceptives', 'group')]
7
drugs:  [('PROCRIT', 'brand')]
8
drugs:  [('central nervous system depressants', 'group'), ('benzodiazepines', 'group')]
9
drugs:  [('ROMAZICON', 'brand'), ('narcotics', 'group'), ('anesthetics', 'group'), ('muscle relaxants', 'group')]
10
drugs:  [('ROMAZICON', 'brand'), ('cyclic antidepressants', 'group'), ('benzodiazepine', 'group'), ('flumazenil', 'drug')]
11
drugs:  [('ROMAZICON', 'brand')]
12
drugs:  [('ROMAZICON', 'brand'), ('benzodiazepine', 'group')]
13
drugs:  [('ROMAZICON', 'brand'), ('benzodiazepines', 'group')]
14
drugs:  

120
drugs:  [('bupropion', 'drug')]
121
drugs:  [('bupropion', 'drug')]
122
drugs:  [('bupropion', 'drug'), ('antidepressants', 'group'), ('nortriptyline', 'drug'), ('imipramine', 'drug'), ('desipramine', 'drug'), ('paroxetine', 'drug'), ('fluoxetine', 'drug'), ('sertraline', 'drug'), ('antipsychotics', 'group'), ('haloperidol', 'drug'), ('risperidone', 'drug'), ('thioridazine', 'drug'), ('beta-blockers', 'group'), ('metoprolol', 'drug'), ('Type 1C antiarrhythmics', 'group'), ('propafenone', 'drug'), ('flecainide', 'drug')]
123
drugs:  [('bupropion', 'drug')]
124
drugs:  [('MAO Inhibitors', 'group'), ('bupropion', 'drug'), ('MAO inhibitor', 'group'), ('phenelzine', 'drug')]
125
drugs:  [('Levodopa', 'drug'), ('Amantadine', 'drug'), ('bupropion', 'drug'), ('levodopa', 'drug'), ('amantadine', 'drug')]
126
drugs:  [('WELLBUTRIN', 'brand'), ('levodopa', 'drug'), ('amantadine', 'drug')]
127
drugs:  [('WELLBUTRIN', 'brand'), ('antipsychotics', 'group'), ('antidepressants', 'group'), ('theoph

214
drugs:  [('Phenobarbital', 'drug'), ('Primidone', 'drug'), ('tiagabine', 'drug'), ('phenobarbital', 'drug'), ('primidone', 'drug')]
215
drugs:  [('tiagabine', 'drug'), ('phenobarbital', 'drug'), ('primidone', 'drug')]
216
drugs:  [('AEDs', 'group'), ('GABITRIL', 'brand'), ('Carbamazepine', 'drug'), ('tiagabine', 'drug'), ('carbamazepine', 'drug'), ('AEDs', 'group')]
217
drugs:  [('Phenytoin', 'drug'), ('tiagabine', 'drug'), ('phenytoin', 'drug'), ('AEDs', 'group')]
218
drugs:  [('Phenobarbital', 'drug'), ('Primidone', 'drug'), ('tiagabine', 'drug'), ('phenobarbital', 'drug'), ('primidone', 'drug'), ('AEDs', 'group')]
219
drugs:  [('Valproate', 'drug'), ('tiagabine', 'drug'), ('valproate', 'drug'), ('tiagabine', 'drug'), ('valproate', 'drug'), ('tiagabine', 'drug')]
220
drugs:  [('GABITRIL', 'brand'), ('Cimetidine', 'drug'), ('cimetidine', 'drug'), ('tiagabine', 'drug'), ('tiagabine', 'drug')]
221
drugs:  [('Theophylline', 'drug'), ('tiagabine', 'drug'), ('theophylline', 'drug')]
22

313
drugs:  [('antidiabetic drug', 'group')]
314
drugs:  [('reserpine', 'drug')]
315
drugs:  [('beta-blockers', 'group')]
316
drugs:  [('epinephrine', 'drug')]
317
drugs:  [('theophylline', 'drug'), ('quinolone', 'group')]
318
drugs:  [('theophylline', 'drug'), ('quinolones', 'group'), ('theophylline', 'drug')]
319
drugs:  [('theophylline', 'drug'), ('theophylline', 'drug')]
320
drugs:  [('Quinolones', 'group'), ('caffeine', 'drug')]
321
drugs:  [('caffeine', 'drug')]
322
drugs:  [('Quinolones', 'group'), ('nalidixic acid', 'drug'), ('anticoagulant', 'group'), ('warfarin', 'drug')]
323
drugs:  [('Nitrofurantoin', 'drug'), ('nalidixic acid', 'drug')]
324
drugs:  [('Antacids', 'group'), ('magnesium', 'drug'), ('aluminum', 'drug'), ('calcium', 'drug')]
325
drugs:  [('sucralfate', 'drug'), ('iron', 'drug')]
326
drugs:  [('multivitamins', 'group'), ('zinc', 'drug')]
327
drugs:  [('Videx', 'brand'), ('Didanosine', 'drug'), ('quinolones', 'group')]
328
drugs:  [('nalidixic acid', 'drug')]
329

453
drugs:  [('Vaccines', 'group'), ('corticosteroid', 'group'), ('live vaccines', 'group'), ('inactivated vaccines', 'group')]
454
drugs:  [('Corticosteroids', 'group'), ('live attenuated vaccines', 'group')]
455
drugs:  [('vaccines', 'group'), ('corticosteroid', 'group')]
456
drugs:  [('Cytosine arabinoside', 'drug'), ('cytostatic agent', 'group'), ('flucytosine', 'drug')]
457
drugs:  [('flucytosine', 'drug')]
458
drugs:  [('Ancobon', 'brand')]
459
drugs:  [('HYDROXYZINE', 'drug'), ('CENTRAL NERVOUS SYSTEM DEPRESSANTS', 'group'), ('NARCOTICS', 'group'), ('NON-NARCOTIC ANALGESICS', 'group'), ('BARBITURATES', 'group')]
460
drugs:  [('central nervous system depressants', 'group'), ('hydroxyzine', 'drug')]
461
drugs:  [('Atarax', 'brand')]
462
drugs:  [('CNS depressant drugs', 'group'), ('alcohol', 'drug')]
463
drugs:  [('ZOLADEX', 'brand')]
464
drugs:  [('Amantadine', 'drug'), ('tricyclic antidepressants', 'group'), ('MAOIs', 'group'), ('clidinium', 'drug')]
465
drugs:  [('Clidinium', '

553
drugs:  [('Starlix', 'brand')]
554
drugs:  [('Starlix', 'brand'), ('tolbutamide', 'drug')]
555
drugs:  [('Glyburide', 'drug'), ('Starlix', 'brand'), ('glyburide', 'drug')]
556
drugs:  [('Metformin', 'drug'), ('Starlix', 'brand'), ('metformin', 'drug')]
557
drugs:  [('Digoxin', 'drug'), ('Starlix', 'brand'), ('digoxin', 'drug')]
558
drugs:  [('Warfarin', 'drug'), ('Starlix', 'brand'), ('warfarin', 'drug')]
559
drugs:  [('Diclofenac', 'drug'), ('Starlix', 'brand'), ('diclofenac', 'drug')]
560
drugs:  [('Nateglinide', 'drug')]
561
drugs:  [('furosemide', 'drug'), ('propranolol', 'drug'), ('captopril', 'drug'), ('nicardipine', 'drug'), ('pravastatin', 'drug'), ('glyburide', 'drug'), ('warfarin', 'drug'), ('phenytoin', 'drug'), ('acetylsalicylic acid', 'drug'), ('tolbutamide', 'drug'), ('metformin', 'drug'), ('nateglinide', 'drug')]
562
drugs:  [('nateglinide', 'drug'), ('propranolol', 'drug'), ('glyburide', 'drug'), ('nicardipine', 'drug'), ('warfarin', 'drug'), ('phenytoin', 'drug'), 

692
drugs:  [('Phenytoin', 'drug'), ('Amphetamines', 'group'), ('phenytoin', 'drug')]
693
drugs:  [('phenytoin', 'drug')]
694
drugs:  [('Propoxyphene', 'drug'), ('propoxyphene', 'drug'), ('amphetamine', 'group')]
695
drugs:  [('Veratrum alkaloids', 'group'), ('Amphetamines', 'group'), ('veratrum alkaloids', 'group')]
696
drugs:  [('Amphetamines', 'group'), ('corticosteroid', 'group')]
697
drugs:  [('Amphetamines', 'group')]
698
drugs:  [('Loratadine', 'drug'), ('erythromycin', 'drug'), ('cimetidine', 'drug'), ('ketoconazole', 'drug')]
699
drugs:  [('loratadine', 'drug'), ('descarboethoxyloratadine', 'drug'), ('loratadine', 'drug'), ('loratadine', 'drug')]
700
drugs:  [('cimetidine', 'drug'), ('ketoconazole', 'drug')]
701
drugs:  [('erythromycin', 'drug'), ('loratadine', 'drug'), ('erythromycin', 'drug')]
702
drugs:  [('Loratadine', 'drug'), ('Descarboethoxyloratadine', 'drug'), ('Loratadine', 'drug')]
703
drugs:  [('Loratadine', 'drug'), ('Descarboethoxyloratadine', 'drug')]
704
drugs:

813
drugs:  [('Soriatane', 'brand')]
814
drugs:  [('Soriatane', 'brand')]
815
drugs:  [('Soriatane', 'brand')]
816
drugs:  [('anidulafungin', 'drug')]
817
drugs:  [('Cerubidine', 'brand'), ('doxorubicin', 'drug')]
818
drugs:  [('Cerubidine', 'brand'), ('doxorubicin', 'drug'), ('Cerubidine', 'brand')]
819
drugs:  [('Cyclophosphamide', 'drug'), ('Cerubidine', 'brand')]
820
drugs:  [('Cerubidine', 'brand')]
821
drugs:  [('methotrexate', 'drug')]
822
drugs:  [('Heparin', 'drug'), ('heparin', 'drug'), ('heparin', 'drug'), ('Argatroban', 'drug'), ('heparin', 'drug')]
823
drugs:  [('Argatroban', 'drug'), ('heparin', 'drug'), ('Argatroban', 'drug')]
824
drugs:  [('Aspirin', 'brand'), ('Acetaminophen', 'drug'), ('Argatroban', 'drug'), ('aspirin', 'brand'), ('Argatroban', 'drug'), ('acetaminophen', 'drug'), ('Argatroban', 'drug')]
825
drugs:  [('anticoagulant agents', 'group'), ('Argatroban', 'drug'), ('warfarin', 'drug')]
826
drugs:  [('Argatroban', 'drug'), ('warfarin', 'drug')]
827
drugs:  [(

906
drugs:  [('vitamin D', 'group')]
907
drugs:  [('Phenytoin', 'drug'), ('Phenobarbital', 'drug'), ('phenytoin', 'drug'), ('phenobarbital', 'drug'), ('vitamin D', 'group'), ('calcitriol', 'drug')]
908
drugs:  [('calcitriol', 'drug'), ('Rocaltrol', 'brand')]
909
drugs:  [('Thiazides', 'group'), ('Thiazides', 'group')]
910
drugs:  [('thiazides', 'group'), ('vitamin D', 'group')]
911
drugs:  [('Digitalis', 'group'), ('Vitamin D', 'group'), ('digitalis', 'group')]
912
drugs:  [('Ketoconazole', 'drug'), ('Ketoconazole', 'drug'), ('vitamin D', 'group')]
913
drugs:  [('ketoconazole', 'drug')]
914
drugs:  [('ketoconazole', 'drug'), ('vitamin D', 'group')]
915
drugs:  [('Corticosteroids', 'group'), ('vitamin D analogues', 'group'), ('corticosteroids', 'group')]
916
drugs:  [('vitamin D', 'group')]
917
drugs:  [('Vitamin D', 'group'), ('vitamin D analogues', 'group')]
918
drugs:  [('Calcium', 'drug'), ('calcium', 'drug')]
919
drugs:  [('Magnesium', 'drug'), ('Magnesium', 'drug'), ('antacids', '

1017
drugs:  [('TAMBOCOR', 'brand'), ('digoxin', 'drug'), ('digoxin', 'drug')]
1018
drugs:  [('TAMBOCOR', 'brand'), ('propranolol', 'drug'), ('flecainide', 'drug'), ('propranolol', 'drug')]
1019
drugs:  [('TAMBOCOR', 'brand'), ('propranolol', 'drug')]
1020
drugs:  [('TAMBOCOR', 'brand'), ('propranolol', 'drug')]
1021
drugs:  [('TAMBOCOR', 'brand'), ('beta blockers', 'group')]
1022
drugs:  [('beta blockers', 'group'), ('flecainide', 'drug')]
1023
drugs:  [('Flecainide', 'drug')]
1024
drugs:  [('flecainide', 'drug')]
1025
drugs:  [('anticoagulants', 'group')]
1026
drugs:  [('TAMBOCOR', 'brand'), ('diuretics', 'group')]
1027
drugs:  [('phenytoin', 'drug'), ('phenobarbital', 'drug'), ('carbamazepine', 'drug'), ('flecainide', 'drug')]
1028
drugs:  [('cimetidine', 'drug'), ('flecainide', 'drug')]
1029
drugs:  [('amiodarone', 'drug'), ('flecainide', 'drug'), ('flecainide', 'drug'), ('flecainide', 'drug')]
1030
drugs:  [('quinidine', 'drug'), ('flecainide', 'drug'), ('flecainide', 'drug')]
103

1142
drugs:  [('contraceptives', 'group')]
1143
drugs:  [('Erythromycin', 'drug'), ('Ketoconazole', 'drug'), ('Fexofenadine', 'drug')]
1144
drugs:  [('fexofenadine hydrochloride', 'drug'), ('ketoconazole', 'drug'), ('erythromycin', 'drug'), ('fexofenadine', 'drug')]
1145
drugs:  [('Fexofenadine', 'drug'), ('erythromycin', 'drug'), ('ketoconazole', 'drug')]
1146
drugs:  [('fexofenadine hydrochloride', 'drug'), ('erythromycin', 'drug'), ('ketoconazole', 'drug')]
1147
drugs:  [('fexofenadine hydrochloride', 'drug'), ('erythromycin', 'drug'), ('ketoconazole', 'drug')]
1148
drugs:  [('fexofenadine', 'drug'), ('fexofenadine hydrochloride', 'drug')]
1149
drugs:  [('Erythromycin', 'drug')]
1150
drugs:  [('Ketoconazole', 'drug')]
1151
drugs:  [('ketoconazole', 'drug'), ('erythromycin', 'drug'), ('fexofenadine', 'drug')]
1152
drugs:  [('fexofenadine', 'drug'), ('ketoconazole', 'drug'), ('fexofenadine', 'drug'), ('erythromycin', 'drug')]
1153
drugs:  [('Antacids', 'group'), ('fexofenadine hydroch

1273
drugs:  [('deferasirox', 'drug'), ('hydroxyurea', 'drug')]
1274
drugs:  [('Exjade', 'brand')]
1275
drugs:  [('deferasirox', 'drug')]
1276
drugs:  [('Deferasirox', 'drug')]
1277
drugs:  [('Exjade', 'brand')]
1278
drugs:  [('codeine', 'drug'), ('alcohol', 'drug'), ('antihistamines', 'group'), ('psychotropics', 'group')]
1279
drugs:  [('dextromethorphan', 'drug'), ('monoamine oxidase inhibitors', 'group'), ('MAOIs', 'group')]
1280
drugs:  [('dextromethorphan hydrobromide', 'drug'), ('alcohol', 'drug'), ('antihistamines', 'group'), ('psychotropics', 'group')]
1281
drugs:  [('AMEVIVE', 'brand'), ('immunosuppressive', 'group')]
1282
drugs:  [('alefacept', 'drug')]
1283
drugs:  [('alefacept', 'drug')]
1284
drugs:  [('AMEVIVE', 'brand')]
1285
drugs:  [('immunosuppressive', 'group')]
1286
drugs:  [('AMEVIVE', 'brand'), ('AMEVIVE', 'brand'), ('AMEVIVE', 'brand')]
1287
drugs:  [('AMEVIVE', 'brand')]
1288
drugs:  [('AMEVIVE', 'brand')]
1289
drugs:  [('AMEVIVE', 'brand')]
1290
drugs:  [('AMEVI

1369
drugs:  [('Cerebyx', 'brand'), ('phenytoin', 'drug')]
1370
drugs:  [('Phenytoin', 'drug')]
1371
drugs:  [('Phenytoin', 'drug')]
1372
drugs:  [('phenytoin', 'drug')]
1373
drugs:  [('Phenytoin', 'drug')]
1374
drugs:  [('phenytoin', 'drug'), ('alcohol', 'drug'), ('amiodarone', 'drug'), ('chlordiazepoxide', 'drug'), ('cimetidine', 'drug'), ('diazepam', 'drug'), ('dicumarol', 'drug'), ('disulfiram', 'drug'), ('estrogens', 'group'), ('ethosuximide', 'drug'), ('fluoxetine', 'drug'), ('H2-antagonists', 'group'), ('halothane', 'drug'), ('isoniazid', 'drug'), ('methylphenidate', 'drug'), ('phenothiazines', 'group'), ('phenylbutazone', 'drug'), ('salicylates', 'group'), ('succinimides', 'group'), ('sulfonamides', 'group'), ('tolbutamide', 'drug'), ('trazodone', 'drug')]
1375
drugs:  [('phenytoin', 'drug'), ('carbamazepine', 'drug'), ('alcohol', 'drug'), ('reserpine', 'drug')]
1376
drugs:  [('phenytoin', 'drug'), ('phenobarbital', 'drug'), ('sodium valproate', 'drug')]
1377
drugs:  [('phenyto

1479
drugs:  [('INH', 'drug'), ('Isoniazid', 'drug'), ('ketoconazole', 'drug')]
1480
drugs:  [('ketoconazole', 'drug'), ('loratadine', 'drug'), ('loratadine', 'drug')]
1481
drugs:  [('descarboethoxyloratadine', 'drug_n')]
1482
drugs:  [('loratadine', 'drug'), ('ketoconazole', 'drug')]
1483
drugs:  [('alcohol', 'drug')]
1484
drugs:  [('Cefizox', 'brand'), ('cephalosporins', 'group'), ('aminoglycosides', 'group')]
1485
drugs:  [('famotidine', 'drug')]
1486
drugs:  [('warfarin', 'drug'), ('theophylline', 'drug'), ('phenytoin', 'drug'), ('diazepam', 'drug'), ('aminopyrine', 'drug'), ('antipyrine', 'drug')]
1487
drugs:  [('Indocyanine green', 'drug_n')]
1488
drugs:  [('Prostin VR Pediatric', 'brand')]
1489
drugs:  [('antibiotics', 'group'), ('penicillin', 'drug'), ('gentamicin', 'drug')]
1490
drugs:  [('vasopressors', 'group'), ('dopamine', 'drug'), ('isoproterenol', 'drug')]
1491
drugs:  [('cardiac glycosides', 'group')]
1492
drugs:  [('diuretics', 'group'), ('furosemide', 'drug')]
1493
dr

1598
drugs:  [('ranitidine', 'drug')]
1599
drugs:  [('rifampin', 'drug')]
1600
drugs:  [('vitamin C', 'drug')]
1601
drugs:  [('anticoagulant', 'group'), ('adrenocortical steroids', 'group'), ('alcohol', 'drug'), ('antacids', 'group'), ('antihistamines', 'group'), ('barbiturates', 'group'), ('carbamazepine', 'drug'), ('chloral hydrate', 'drug'), ('chlordiazepoxide', 'drug'), ('cholestyramine', 'drug'), ('vitamin K', 'group'), ('diuretics', 'group'), ('ethchlorvynol', 'drug'), ('glutethimide', 'drug'), ('griseofulvin', 'drug'), ('haloperidol', 'drug'), ('meprobamate', 'drug'), ('contraceptives', 'group'), ('paraldehyde', 'drug'), ('primidone', 'drug'), ('ranitidine', 'drug'), ('rifampin', 'drug'), ('vitamin C', 'drug'), ('warfarin sodium', 'drug')]
1602
drugs:  [('anticoagulant', 'group'), ('alcohol', 'drug')]
1603
drugs:  [('allopurinol', 'drug')]
1604
drugs:  [('aminosalicylic acid', 'drug')]
1605
drugs:  [('amiodarone', 'drug')]
1606
drugs:  [('anabolic steroids', 'group')]
1607
drugs

1748
drugs:  [('Isocarboxazid', 'drug'), ('psychotropic agents', 'group')]
1749
drugs:  [('Isocarboxazid', 'drug'), ('Isocarboxazid', 'drug')]
1750
drugs:  [('Isocarboxazid', 'drug')]
1751
drugs:  [('Eprosartan', 'drug'), ('digoxin', 'drug'), ('warfarin', 'drug'), ('glyburide', 'drug')]
1752
drugs:  [('eprosartan', 'drug'), ('ketoconazole', 'drug'), ('fluconazole', 'drug'), ('eprosartan', 'drug')]
1753
drugs:  [('Ranitidine', 'drug'), ('eprosartan', 'drug')]
1754
drugs:  [('Eprosartan', 'drug'), ('thiazide diuretic', 'group'), ('hydrochlorothiazide', 'drug')]
1755
drugs:  [('Eprosartan', 'drug'), ('calcium channel blockers', 'group'), ('nifedipine', 'drug')]
1756
drugs:  [('Anesthetics', 'group'), ('Sedatives', 'group'), ('Hypnotics', 'group'), ('Opioids', 'group'), ('PRECEDEX', 'brand'), ('anesthetics', 'group'), ('sedatives', 'group'), ('hypnotics', 'group'), ('opioids', 'group')]
1757
drugs:  [('sevoflurane', 'drug'), ('isoflurane', 'drug'), ('propofol', 'drug'), ('alfentanil', 'dru

1877
drugs:  [('Nitrates', 'group'), ('Nifedipine', 'drug'), ('nitrates', 'group')]
1878
drugs:  [('Digitalis', 'group'), ('digoxin', 'drug'), ('digoxin', 'drug'), ('nifedipine', 'drug'), ('digoxin', 'drug'), ('nifedipine', 'drug')]
1879
drugs:  [('nifedipine', 'drug'), ('digoxin', 'drug'), ('digoxin', 'drug')]
1880
drugs:  [('digoxin', 'drug')]
1881
drugs:  [('digoxin', 'drug')]
1882
drugs:  [('digoxin', 'drug'), ('digoxin', 'drug'), ('nifedipine', 'drug')]
1883
drugs:  [('Quinidine', 'drug'), ('quinidine', 'drug'), ('nifedipine', 'drug'), ('quinidine', 'drug')]
1884
drugs:  [('Coumarin Anticoagulants', 'group'), ('coumarin anticoagulants', 'group'), ('nifedipine', 'drug')]
1885
drugs:  [('nifedipine', 'drug')]
1886
drugs:  [('Cimetidine', 'drug'), ('nifedipine', 'drug'), ('cimetidine', 'drug'), ('nifedipine', 'drug')]
1887
drugs:  [('Ranitidine', 'drug')]
1888
drugs:  [('cimetidine', 'drug'), ('nifedipine', 'drug')]
1889
drugs:  [('nifedipine', 'drug'), ('cimetidine', 'drug')]
1890
d

2115
drugs:  [('tricyclic antidepressants', 'group'), ('fluoxetine', 'drug')]
2116
drugs:  [('Theophylline', 'drug'), ('ceftibuten', 'drug')]
2117
drugs:  [('ceftibuten', 'drug'), ('theophylline', 'drug')]
2118
drugs:  [('theophylline', 'drug')]
2119
drugs:  [('ceftibuten', 'drug'), ('theophylline', 'drug')]
2120
drugs:  [('Antacids', 'group'), ('H 2 -receptor antagonists', 'group'), ('ceftibuten', 'drug')]
2121
drugs:  [('ceftibuten', 'drug')]
2122
drugs:  [('antacid', 'group'), ('ceftibuten', 'drug')]
2123
drugs:  [('ranitidine', 'drug'), ('ceftibuten', 'drug'), ('ceftibuten', 'drug')]
2124
drugs:  [('ceftibuten', 'drug')]
2125
drugs:  [('cephalosporins', 'group')]
2126
drugs:  [('ceftibuten', 'drug'), ('ceftibuten', 'drug')]
2127
drugs:  [('FLEXERIL', 'brand'), ('MAO inhibitors', 'group')]
2128
drugs:  [('FLEXERIL', 'brand'), ('alcohol', 'drug'), ('barbiturates', 'group'), ('CNS depressants', 'group')]
2129
drugs:  [('Tricyclic antidepressants', 'group'), ('guanethidine', 'drug')]
2

2240
drugs:  [('beta-receptor agonist drugs', 'group')]
2241
drugs:  [('beta-agonist bronchodilator drugs', 'group')]
2242
drugs:  [('Cimetidine', 'drug'), ('labetalol HCl', 'drug')]
2243
drugs:  [('labetalol HCl', 'drug')]
2244
drugs:  [('halothane', 'drug'), ('labetalol HCl', 'drug')]
2245
drugs:  [('labetalol HCl', 'drug'), ('halothane', 'drug'), ('halothane', 'drug')]
2246
drugs:  [('labetalol HCl', 'drug')]
2247
drugs:  [('Labetalol HCl', 'drug'), ('nitroglycerin', 'drug')]
2248
drugs:  [('labetalol HCl', 'drug'), ('nitroglycerin', 'drug')]
2249
drugs:  [('labetalol', 'drug'), ('calcium antagonist', 'group'), ('verapamil', 'drug')]
2250
drugs:  [('beta-blockers', 'group')]
2251
drugs:  [('epinephrine', 'drug')]
2252
drugs:  [('labetalol HCl', 'drug')]
2253
drugs:  [('Labetalol HCl', 'drug'), ('amphetamine', 'drug')]
2254
drugs:  [('labetalol', 'drug'), ('amphetamine', 'drug')]
2255
drugs:  [('Angiomax', 'brand')]
2256
drugs:  [('Angiomax', 'brand'), ('heparin', 'drug'), ('warfarin

2345
drugs:  [('TAXOL', 'brand')]
2346
drugs:  [('alcohol', 'drug')]
2347
drugs:  [('TAXOL', 'brand')]
2348
drugs:  [('TAXOL', 'brand')]
2349
drugs:  [('TAXOL', 'brand')]
2350
drugs:  [('TAXOL', 'brand')]
2351
drugs:  [('sulfonylureas', 'group'), ('nonsteroidal anti-inflammatory agents', 'group'), ('azoles', 'group'), ('salicylates', 'group'), ('sulfonamides', 'group'), ('chloramphenicol', 'drug'), ('probenecid', 'drug'), ('coumarins', 'group'), ('monoamine oxidase inhibitors', 'group'), ('beta adrenergic blocking agents', 'group')]
2352
drugs:  [('glipizide', 'drug')]
2353
drugs:  [('glipizide', 'drug')]
2354
drugs:  [('glipizide', 'drug'), ('tolbutamide', 'drug'), ('salicylate', 'group'), ('dicumarol', 'drug')]
2355
drugs:  [('glipizide', 'drug')]
2356
drugs:  [('thiazides', 'group'), ('diuretics', 'group'), ('corticosteroids', 'group'), ('phenothiazines', 'group'), ('thyroid products', 'group'), ('estrogens', 'group'), ('contraceptives', 'group'), ('phenytoin', 'drug'), ('nicotinic 

drugs:  [('Norpace', 'brand'), ('propranolol', 'drug'), ('diazepam', 'drug')]
2481
drugs:  [('Norpace', 'brand'), ('quinidine', 'drug'), ('disopyramide', 'drug'), ('quinidine', 'drug')]
2482
drugs:  [('Norpace', 'brand'), ('digoxin', 'drug')]
2483
drugs:  [('disopyramide phosphate', 'drug'), ('erythromycin', 'drug'), ('disopyramide', 'drug')]
2484
drugs:  [('disopyramide phosphate', 'drug')]
2485
drugs:  [('verapamil', 'drug'), ('disopyramide phosphate', 'drug'), ('disopyramide', 'drug'), ('verapamil', 'drug')]
2486
drugs:  [('amyl nitrite', 'drug'), ('alcohol', 'drug')]
2487
drugs:  [('5HT3 Antagonists', 'group'), ('apomorphine', 'drug'), ('ondansetron', 'drug'), ('apomorphine', 'drug'), ('5HT3 antagonist class', 'group'), ('ondansetron', 'drug'), ('granisetron', 'drug'), ('dolasetron', 'drug'), ('palonosetron', 'drug'), ('alosetron', 'drug')]
2488
drugs:  [('Antihypertensive Medication', 'group'), ('Vasodilators', 'group'), ('antihypertensive medications', 'group'), ('vasodilators', 

2605
drugs:  [('Methotrexate', 'drug'), ('methotrexate', 'drug'), ('NSAIDs', 'group'), ('methotrexate', 'drug'), ('methotrexate', 'drug')]
2606
drugs:  [('TORADOL', 'brand'), ('methotrexate', 'drug')]
2607
drugs:  [('Nondepolarizing Muscle Relaxants', 'group'), ('nondepolarizing muscle relaxants', 'group')]
2608
drugs:  [('TORADOL', 'brand'), ('muscle relaxants', 'group')]
2609
drugs:  [('ACE Inhibitors', 'group'), ('ACE inhibitors', 'group')]
2610
drugs:  [('Antiepileptic Drugs', 'group'), ('TORADOL', 'brand'), ('antiepileptic drugs', 'group'), ('phenytoin', 'drug'), ('carbamazepine', 'drug')]
2611
drugs:  [('Psychoactive Drugs', 'group'), ('TORADOL', 'brand'), ('psychoactive drugs', 'group'), ('fluoxetine', 'drug'), ('thiothixene', 'drug'), ('alprazolam', 'drug')]
2612
drugs:  [('Morphine', 'drug'), ('morphine', 'drug')]
2613
drugs:  [('TORADOL', 'brand'), ('morphine', 'drug')]
2614
drugs:  [('TORADOL', 'brand')]
2615
drugs:  [('Ethoxzolamide', 'drug'), ('tricyclics', 'group'), ('amp

2735
drugs:  [('Antacids', 'group'), ('magnesium hydroxide', 'drug'), ('aluminum hydroxide', 'drug'), ('ketoprofen', 'drug'), ('Orudis', 'brand')]
2736
drugs:  [('Aspirin', 'brand'), ('Ketoprofen', 'drug'), ('aspirin', 'brand')]
2737
drugs:  [('aspirin', 'brand'), ('ketoprofen', 'drug'), ('ketoprofen', 'drug'), ('aspirin', 'brand'), ('aspirin', 'brand')]
2738
drugs:  [('aspirin', 'brand'), ('ketoprofen', 'drug')]
2739
drugs:  [('Diuretic', 'group'), ('Hydrochlorothiazide', 'drug'), ('ketoprofen', 'drug'), ('hydrochlorothiazide', 'drug')]
2740
drugs:  [('diuretics', 'group')]
2741
drugs:  [('Digoxin', 'drug'), ('ketoprofen', 'drug'), ('digoxin', 'drug'), ('ketoprofen', 'drug'), ('digoxin', 'drug')]
2742
drugs:  [('Warfarin', 'drug'), ('ketoprofen', 'drug'), ('warfarin', 'drug')]
2743
drugs:  [('warfarin', 'drug'), ('ketoprofen', 'drug')]
2744
drugs:  [('ketoprofen', 'drug'), ('ketoprofen', 'drug'), ('warfarin', 'drug')]
2745
drugs:  [('Probenecid', 'drug'), ('Probenecid', 'drug'), ('ket

2865
drugs:  [('valdecoxib', 'drug'), ('glyburide', 'drug'), ('glyburide', 'drug')]
2866
drugs:  [('valdecoxib', 'drug'), ('glyburide', 'drug'), ('glyburide', 'drug')]
2867
drugs:  [('valdecoxib', 'drug'), ('glyburide', 'drug'), ('glyburide', 'drug'), ('glyburide', 'drug'), ('glyburide', 'drug')]
2868
drugs:  [('valdecoxib', 'drug'), ('glyburide', 'drug'), ('valdecoxib', 'drug')]
2869
drugs:  [('glyburide', 'drug'), ('valdecoxib', 'drug')]
2870
drugs:  [('Omeprazole', 'drug'), ('Omeprazole', 'drug')]
2871
drugs:  [('Valdecoxib', 'drug'), ('omeprazole', 'drug')]
2872
drugs:  [('valdecoxib', 'drug'), ('omeprazole', 'drug')]
2873
drugs:  [('omeprazole', 'drug'), ('valdecoxib', 'drug')]
2874
drugs:  [('omeprazole', 'drug'), ('omeprazole', 'drug')]
2875
drugs:  [('valdecoxib', 'drug'), ('omeprazole', 'drug')]
2876
drugs:  [('Contraceptives', 'group'), ('Valdecoxib', 'drug'), ('contraceptive', 'group'), ('norethindrone', 'drug'), ('ethinyl estradiol', 'drug'), ('Ortho-Novum', 'brand')]
2877


drugs:  [('aspirin', 'brand'), ('NSAIDs', 'group'), ('ibandronate', 'drug')]
2980
drugs:  [('aspirin', 'brand'), ('NSAIDs', 'group'), ('bisphosphonates', 'group'), ('aspirin', 'brand'), ('NSAIDs', 'group'), ('Ibandronate', 'drug')]
2981
drugs:  [('Bisphosphonates', 'group')]
2982
drugs:  [('ibandronate', 'drug')]
2983
drugs:  [('Simulect', 'brand'), ('cyclosporine', 'drug'), ('corticosteroids', 'group'), ('azathioprine', 'drug'), ('mycophenolate mofetil', 'drug')]
2984
drugs:  [('Simulect', 'brand')]
2985
drugs:  [('Simulect', 'brand'), ('azathioprine', 'drug'), ('mycophenolate mofetil', 'drug'), ('cyclosporine', 'drug'), ('corticosteroids', 'group')]
2986
drugs:  [('Simulect', 'brand'), ('azathioprine', 'drug'), ('mycophenolate mofetil', 'drug')]
2987
drugs:  [('Simulect', 'brand'), ('azathioprine', 'drug'), ('corticosteroids', 'group'), ('cyclosporine', 'drug'), ('mycophenolate mofetil', 'drug'), ('muromonab-CD3', 'drug')]
2988
drugs:  [('cephalosporin antibiotics', 'group')]
2989
dr

3103
drugs:  [('thyroid', 'drug')]
3104
drugs:  [('alcohol', 'drug'), ('barbiturates', 'drug'), ('CNS depressants', 'drug')]
3105
drugs:  [('disulfiram', 'drug'), ('amitriptyline HCl', 'drug'), ('anticholinergic', 'drug'), ('sympathomimetic', 'drug'), ('epinephrine', 'drug'), ('anesthetics', 'drug')]
3106
drugs:  [('amitriptyline HCl', 'drug'), ('anticholinergic', 'drug'), ('neuroleptic', 'drug')]
3107
drugs:  [('tricyclic antidepressants', 'drug'), ('anticholinergic', 'drug')]
3108
drugs:  [('Cimetidine', 'drug'), ('tricyclic antidepressants', 'drug'), ('drugs', 'drug')]
3109
drugs:  [('tricyclic antidepressants', 'drug'), ('cimetidine', 'drug')]
3110
drugs:  [('tricyclic antidepressants', 'drug'), ('anticholinergic', 'drug'), ('cimetidine', 'drug'), ('drug', 'drug')]
3111
drugs:  [('cimetidine', 'drug'), ('tricyclic antidepressants', 'drug'), ('cimetidine', 'drug'), ('antidepressants', 'drug')]
3112
drugs:  [('ethchlorvynol', 'drug')]
3113
drugs:  [('ethchlorvynol', 'drug'), ('amitri

3254
drugs:  [('Erythromycin', 'drug'), ('triazolam', 'drug'), ('midazolam', 'drug'), ('benzodiazepines', 'group')]
3255
drugs:  [('erythromycin', 'drug')]
3256
drugs:  [('erythromycin', 'drug'), ('carbamazepine', 'drug'), ('cyclosporine', 'drug'), ('tacrolimus', 'drug'), ('hexobarbital', 'drug'), ('phenytoin', 'drug'), ('alfentanil', 'drug'), ('cisapride', 'drug'), ('disopyramide', 'drug'), ('lovastatin', 'drug'), ('bromocriptine', 'drug'), ('valproate', 'drug'), ('terfenadine', 'drug'), ('astemizole', 'drug')]
3257
drugs:  [('erythromycin', 'drug')]
3258
drugs:  [('Erythromycin', 'drug'), ('antihistamines', 'group'), ('terfenadine', 'drug'), ('astemizole', 'drug')]
3259
drugs:  [('terfenadine', 'drug'), ('erythromycin', 'drug')]
3260
drugs:  [('erythromycin', 'drug'), ('cisapride', 'drug'), ('cisapride', 'drug'), ('erythromycin', 'drug')]
3261
drugs:  [('lovastatin', 'drug'), ('erythromycin', 'drug')]
3262
drugs:  [('chlorambucil', 'drug')]
3263
drugs:  [('tetracyclines', 'group'), (

3448
drugs:  [('vitamin D', 'group')]
3449
drugs:  [('Vitamin D', 'group'), ('vitamin D analogues', 'group')]
3450
drugs:  [('Calcium', 'drug'), ('calcium', 'drug')]
3451
drugs:  [('Magnesium', 'drug'), ('Magnesium', 'drug'), ('antacids', 'group'), ('vitamin D', 'group')]
3452
drugs:  [('cholestyramine', 'drug'), ('colestipol', 'drug'), ('thiazide diuretics', 'group'), ('diuretic', 'group')]
3453
drugs:  [('CNS depressant drugs', 'group'), ('barbiturates', 'group'), ('tranquilizers', 'group'), ('opioids', 'group'), ('anesthetics', 'group'), ('INAPSINE', 'brand')]
3454
drugs:  [('INAPSINE', 'brand')]
3455
drugs:  [('INAPSINE', 'brand'), ('CNS depressant drugs', 'group')]
3456
drugs:  [('Isoproterenol hydrochloride', 'drug'), ('epinephrine', 'drug')]
3457
drugs:  [('ISUPREL', 'brand'), ('anesthetics', 'group'), ('halothane', 'drug'), ('sympathomimetic amines', 'group')]
3458
drugs:  [('progestin', 'group'), ('anticonvulsants', 'group'), ('phenytoin', 'drug'), ('carbamazepine', 'drug'), (

3625
drugs:  [('ketoconazole', 'drug'), ('Vardenafil', 'drug'), ('ketoconazole', 'drug')]
3626
drugs:  [('HIV Protease Inhibitors', 'group'), ('Indinavir', 'drug'), ('Vardenafil', 'drug'), ('vardenafil', 'drug'), ('vardenafil', 'drug'), ('vardenafil', 'drug')]
3627
drugs:  [('Vardenafil', 'drug'), ('indinavir', 'drug')]
3628
drugs:  [('Ritonavir', 'drug'), ('Vardenafil', 'drug'), ('vardenafil', 'drug'), ('vardenafil', 'drug')]
3629
drugs:  [('vardenafil', 'drug'), ('ritonavir', 'drug')]
3630
drugs:  [('Ritonavir', 'drug'), ('vardenafil', 'drug')]
3631
drugs:  [('Vardenafil', 'drug'), ('ritonavir', 'drug')]
3632
drugs:  [('vardenafil', 'drug'), ('glyburide', 'drug'), ('warfarin', 'drug'), ('digoxin', 'drug'), ('Maalox', 'brand'), ('ranitidine', 'drug')]
3633
drugs:  [('warfarin', 'drug'), ('vardenafil', 'drug')]
3634
drugs:  [('Vardenafil', 'drug')]
3635
drugs:  [('Vardenafil', 'drug')]
3636
drugs:  [('Vardenafil', 'drug')]
3637
drugs:  [('Nitrates', 'group'), ('nitrates', 'group'), ('v

3757
drugs:  [('CASODEX', 'brand'), ('coumarin anticoagulant', 'group'), ('warfarin', 'drug')]
3758
drugs:  [('CASODEX', 'brand'), ('coumarin anticoagulants', 'group'), ('anticoagulant', 'group')]
3759
drugs:  [('anesthetic solutions', 'group'), ('epinephrine', 'drug'), ('norepinephrine', 'drug'), ('monoamine oxidase inhibitors', 'group'), ('tricyclic antidepressants', 'group'), ('phenothiazines', 'group')]
3760
drugs:  [('vasopressor drugs', 'group'), ('ergot-type oxytocic drugs', 'group')]
3761
drugs:  [('sulfonamides', 'group')]
3762
drugs:  [('chloroprocaine', 'drug'), ('sulfonamide drug', 'group')]
3763
drugs:  [('anticholinergic drugs', 'group'), ('amantadine', 'drug'), ('antiarrhythmic agents', 'group'), ('quinidine', 'drug'), ('antihistamines', 'group'), ('antipsychotic agents', 'group'), ('phenothiazines', 'group'), ('benzodiazepines', 'group')]
3764
drugs:  [('MAO inhibitors', 'group'), ('narcotic analgesics', 'group'), ('meperidine', 'drug'), ('nitrates', 'group'), ('nitrite

3880
drugs:  [('Itraconazole', 'drug'), ('cyclosporine', 'drug'), ('tacrolimus', 'drug'), ('digoxin', 'drug')]
3881
drugs:  [('Cyclosporine', 'drug'), ('tacrolimus', 'drug'), ('digoxin', 'drug'), ('Itraconazole', 'drug')]
3882
drugs:  [('HMG-CoA reductase inhibitors', 'group'), ('immunosuppressive drugs', 'group'), ('cyclosporine', 'drug')]
3883
drugs:  [('Itraconazole', 'drug'), ('phenytoin', 'drug'), ('rifampin', 'drug'), ('itraconazole', 'drug')]
3884
drugs:  [('itraconazole', 'drug'), ('Itraconazole', 'drug')]
3885
drugs:  [('Itraconazole', 'drug'), ('phenytoin', 'drug'), ('phenytoin', 'drug')]
3886
drugs:  [('phenytoin', 'drug'), ('Itraconazole', 'drug')]
3887
drugs:  [('Itraconazole', 'drug'), ('coumarin', 'group')]
3888
drugs:  [('Itraconazole', 'drug'), ('coumarin', 'group')]
3889
drugs:  [('azole antifungal agents', 'group'), ('isoniazid', 'drug')]
3890
drugs:  [('Itraconazole', 'drug'), ('Itraconazole', 'drug'), ('isoniazid', 'drug')]
3891
drugs:  [('azole antifungal agents',

4022
drugs:  [('Nelfinavir', 'drug')]
4023
drugs:  [('Nelfinavir', 'drug')]
4024
drugs:  [('nelfinavir', 'drug'), ('nevirapine', 'drug')]
4025
drugs:  [('Rifabutin', 'drug')]
4026
drugs:  [('Rifabutin', 'drug')]
4027
drugs:  [('Rifabutin', 'drug')]
4028
drugs:  [('rifabutin', 'drug')]
4029
drugs:  [('Rifampin', 'drug')]
4030
drugs:  [('Nevirapine', 'drug')]
4031
drugs:  [('Nevirapine', 'drug'), ('rifampin', 'drug'), ('nevirapine', 'drug')]
4032
drugs:  [('nevirapine', 'drug'), ('rifabutin', 'drug')]
4033
drugs:  [('Saquinavir', 'drug')]
4034
drugs:  [('Saquinavir', 'drug')]
4035
drugs:  [('saquinavir', 'drug')]
4036
drugs:  [('nevirapine', 'drug'), ('methadone', 'drug'), ('methadone', 'drug')]
4037
drugs:  [('Nevirapine', 'drug')]
4038
drugs:  [('Antiarrhythmics', 'group')]
4039
drugs:  [('Amiodarone', 'drug'), ('disopyramide', 'drug'), ('lidocaine', 'drug')]
4040
drugs:  [('Anticonvulsants', 'group')]
4041
drugs:  [('Carbamazepine', 'drug'), ('clonazepam', 'drug'), ('ethosuximide', 'd

4151
drugs:  [('quinine', 'drug'), ('penicillamine', 'drug'), ('digoxin', 'drug')]
4152
drugs:  [('Thyroid', 'group'), ('digoxin', 'drug')]
4153
drugs:  [('digoxin', 'drug'), ('sympathomimetics', 'group')]
4154
drugs:  [('Succinylcholine', 'drug')]
4155
drugs:  [('beta-adrenergic blockers', 'group'), ('calcium channel blockers', 'group'), ('digoxin', 'drug')]
4156
drugs:  [('digoxin', 'drug')]
4157
drugs:  [('digoxin', 'drug'), ('digoxin', 'drug')]
4158
drugs:  [('Pulmozyme', 'brand'), ('antibiotics', 'group'), ('bronchodilators', 'group'), ('vitamins', 'group'), ('corticosteroids', 'group'), ('analgesics', 'group')]
4159
drugs:  [('cetirizine', 'drug'), ('pseudoephedrine', 'drug'), ('antipyrine', 'drug'), ('ketoconazole', 'drug'), ('erythromycin', 'drug'), ('azithromycin', 'drug')]
4160
drugs:  [('theophylline', 'drug'), ('cetirizine', 'drug'), ('cetirizine', 'drug')]
4161
drugs:  [('theophylline', 'drug'), ('cetirizine', 'drug')]
4162
drugs:  [('theophylline', 'drug'), ('azithromycin

4298
drugs:  [('piperacillin', 'drug')]
4299
drugs:  [('insulin', 'drug')]
4300
drugs:  [('antidiabetic products', 'group'), ('ACE inhibitors', 'group'), ('disopyramide', 'drug'), ('fibrates', 'group'), ('fluoxetine', 'drug'), ('monoamine oxidase (MAO) inhibitors', 'group'), ('propoxyphene', 'drug'), ('salicylates', 'group'), ('somatostatin analog', 'group'), ('octreotide', 'drug'), ('sulfonamide antibiotics', 'group')]
4301
drugs:  [('corticosteroids', 'group'), ('niacin', 'drug'), ('danazol', 'drug'), ('diuretics', 'group'), ('sympathomimetic agents', 'group'), ('epinephrine', 'drug'), ('salbutamol', 'drug'), ('terbutaline', 'drug'), ('isoniazid', 'drug'), ('phenothiazine derivatives', 'group'), ('somatropin', 'drug'), ('thyroid hormones', 'group'), ('estrogens', 'group'), ('progestogens', 'group'), ('contraceptives', 'group')]
4302
drugs:  [('Beta-blockers', 'group'), ('clonidine', 'drug'), ('lithium', 'drug'), ('alcohol', 'drug'), ('insulin', 'drug')]
4303
drugs:  [('Pentamidine', 

4389
drugs:  [('levothyroxine sodium', 'drug'), ('androgens', 'group'), ('anabolic hormones', 'group'), ('asparaginase', 'drug'), ('clofibrate', 'drug'), ('estrogens', 'group'), ('estrogen-containing compounds', 'group'), ('5-fluorouracil', 'drug'), ('furosemide', 'drug'), ('glucocorticoids', 'group'), ('meclofenamic acid', 'drug'), ('mefenamic acid', 'drug'), ('methadone', 'drug'), ('perphenazine', 'drug'), ('phenylbutazone', 'drug'), ('phenytoin', 'drug'), ('salicylates', 'group'), ('tamoxifen', 'drug')]
4390
drugs:  [('aminoglutethimide', 'drug'), ('p-aminosalicylic acid', 'drug'), ('amiodarone', 'drug'), ('androgens', 'group'), ('thiocyanate', 'drug'), ('perchlorate', 'drug'), ('pertechnetate', 'drug'), ('antithyroid drugs', 'group'), ('b-adrenergic blocking agents', 'group'), ('carbamazepine', 'drug'), ('chloral hydrate', 'drug'), ('diazepam', 'drug'), ('dopamine', 'drug'), ('dopamine agonists', 'group'), ('ethionamide', 'drug'), ('glucocorticoids', 'group'), ('heparin', 'drug'), 

4538
drugs:  [('hydantoins', 'group'), ('sulfonamides', 'group'), ('sulfonylureas', 'group')]
4539
drugs:  [('coumarin-type anticoagulants', 'group'), ('Nalfon', 'brand')]
4540
drugs:  [('Nalfon', 'brand'), ('loop diuretics', 'group')]
4541
drugs:  [('Nalfon', 'brand'), ('steroid', 'group'), ('steroid', 'group'), ('steroid', 'group')]
4542
drugs:  [('WelChol', 'brand')]
4543
drugs:  [('WelChol', 'brand'), ('digoxin', 'drug'), ('lovastatin', 'drug'), ('metoprolol', 'drug'), ('quinidine', 'drug'), ('valproic acid', 'drug'), ('warfarin', 'drug')]
4544
drugs:  [('WelChol', 'brand'), ('verapamil', 'drug'), ('Calan SR', 'brand')]
4545
drugs:  [('verapamil', 'drug')]
4546
drugs:  [('WelChol', 'brand'), ('atorvastatin', 'drug'), ('lovastatin', 'drug'), ('simvastatin', 'drug'), ('HMG-CoA reductase inhibitor', 'group')]
4547
drugs:  [('Lovenox', 'brand')]
4548
drugs:  [('anticoagulants', 'group'), ('platelet inhibitors', 'group'), ('acetylsalicylic acid', 'drug'), ('NSAIDs', 'group'), ('ketorola

4624
drugs:  [('nitrofurantoin', 'drug'), ('nitrofurantoin', 'drug'), ('Norfloxacin', 'drug')]
4625
drugs:  [('Multivitamins', 'group'), ('iron', 'drug'), ('zinc', 'drug'), ('antacids', 'group'), ('sucralfate', 'drug'), ('norfloxacin', 'drug'), ('norfloxacin', 'drug')]
4626
drugs:  [('Videx', 'brand'), ('Didanosine', 'drug'), ('norfloxacin', 'drug'), ('norfloxacin', 'drug')]
4627
drugs:  [('quinolones', 'group'), ('caffeine', 'drug')]
4628
drugs:  [('caffeine', 'drug')]
4629
drugs:  [('thimerosal', 'drug_n'), ('latanoprost', 'drug')]
4630
drugs:  [('baclofen', 'drug')]
4631
drugs:  [('baclofen', 'drug'), ('morphine', 'drug')]
4632
drugs:  [('KEMSTRO', 'brand'), ('baclofen', 'drug')]
4633
drugs:  [('baclofen', 'drug')]
4634
drugs:  [('baclofen', 'drug')]
4635
drugs:  [('KEMSTROTM', 'brand'), ('baclofen', 'drug')]
4636
drugs:  [('Dantrium', 'brand')]
4637
drugs:  [('phenobarbital', 'drug'), ('diazepam', 'drug'), ('Dantrium', 'brand')]
4638
drugs:  [('diazepam', 'drug'), ('diphenylhydanto

4784
drugs:  [('glycine', 'drug')]
4785
drugs:  [('Olanzapine', 'drug')]
4786
drugs:  [('Olanzapine', 'drug'), ('thienobenzodiazepine derivative', 'group'), ('second generation (atypical) antipsychotic agent', 'group')]
4787
drugs:  [('conventional antipsychotics', 'group')]
4788
drugs:  [('olanzapine', 'drug'), ('haloperidol', 'drug')]
4789
drugs:  [('olanzapine', 'drug'), ('haloperidol', 'drug')]
4790
drugs:  [('olanzapine', 'drug'), ('risperidone', 'drug'), ('olanzapine', 'drug'), ('risperidone', 'drug')]
4791
drugs:  [('risperidone', 'drug'), ('olanzapine', 'drug')]
4792
drugs:  [('olanzapine', 'drug'), ('risperidone', 'drug'), ('haloperidol', 'drug')]
4793
drugs:  [('olanzapine', 'drug'), ('risperidone', 'drug')]
4794
drugs:  [('olanzapine', 'drug'), ('olanzapine', 'drug')]
4795
drugs:  [('Olanzapine', 'drug'), ('haloperidol', 'drug'), ('risperidone', 'drug')]
4796
drugs:  [('olanzapine', 'drug'), ('clozapine', 'drug'), ('risperidone', 'drug')]
4797
drugs:  [('olanzapine', 'drug')

4940
drugs:  [('digoxin', 'drug'), ('digoxin', 'drug'), ('amiodarone', 'drug')]
4941
drugs:  [('amiodarone', 'drug'), ('digoxin', 'drug')]
4942
drugs:  [('Digoxin', 'drug')]
4943
drugs:  [('Digoxin', 'drug')]
4944
drugs:  [('Amiodarone', 'drug'), ('digoxin', 'drug')]
4945
drugs:  [('digoxin', 'drug'), ('amiodarone', 'drug'), ('digoxin', 'drug')]
4946
drugs:  [('alcohol', 'drug')]
4947
drugs:  [('alcohol', 'drug'), ('alcohol', 'drug')]
4948
drugs:  [('ethanol', 'drug'), ('physostigmine', 'drug'), ('atropine', 'drug')]
4949
drugs:  [('Ethanol', 'drug')]
4950
drugs:  [('Physostigmine', 'drug')]
4951
drugs:  [('atropine', 'drug')]
4952
drugs:  [('Physostigmine', 'drug'), ('alcohol', 'drug'), ('alcohol', 'drug')]
4953
drugs:  [('atropine', 'drug')]
4954
drugs:  [('Atropine', 'drug'), ('alcohol', 'drug')]
4955
drugs:  [('ethanol', 'drug')]
4956
drugs:  [('cerulein', 'drug_n'), ('3H-spiroperidol', 'drug_n'), ('neuroleptics', 'group'), ('haloperidol', 'drug'), ('pyreneperone', 'drug_n'), ('3H-

drugs:  [('18-Methoxycoronaridine', 'drug_n'), ('18-MC', 'drug_n'), ('ibogaine', 'drug_n')]
5143
drugs:  [('18-MC', 'drug_n')]
5144
drugs:  [('ibogaine', 'drug_n'), ('18-MC', 'drug_n'), ('morphine', 'drug'), ('cocaine', 'drug'), ('ethanol', 'drug'), ('nicotine', 'drug')]
5145
drugs:  [('ibogaine', 'drug_n'), ('18-MC', 'drug_n')]
5146
drugs:  [('ibogaine', 'drug_n'), ('18-MC', 'drug_n'), ('opioid', 'group')]
5147
drugs:  [('ibogaine', 'drug_n'), ('18-MC', 'drug_n'), ('ibogaine', 'drug_n')]
5148
drugs:  [('ibogaine', 'drug_n'), ('18-MC', 'drug_n'), ('morphine', 'drug'), ('nicotine', 'drug')]
5149
drugs:  [('ibogaine', 'drug_n'), ('cocaine', 'drug')]
5150
drugs:  [('ibogaine', 'drug_n'), ('18-MC', 'drug_n')]
5151
drugs:  [('Ibogaine', 'drug_n'), ('18-MC', 'drug_n'), ('morphine', 'drug')]
5152
drugs:  [('morphine', 'drug'), ('morphine', 'drug')]
5153
drugs:  [('Ibogaine', 'drug_n')]
5154
drugs:  [('18-MC', 'drug_n')]
5155
drugs:  [('Ibogaine', 'drug_n'), ('18-MC', 'drug_n')]
5156
drugs:  [

5370
drugs:  [('phenobarbital', 'drug')]
5371
drugs:  [('KRM-1648', 'drug_n'), ('ofloxacin', 'drug')]
5372
drugs:  [('benzoxazinorifamycin', 'drug_n'), ('KRM-1648', 'drug_n'), ('ofloxacin', 'drug')]
5373
drugs:  [('KRM-1648', 'drug_n')]
5374
drugs:  [('rifampicin', 'drug'), ('rifabutin', 'drug')]
5375
drugs:  [('ofloxacin', 'drug'), ('KRM-1648', 'drug_n'), ('rifampicin', 'drug'), ('rifabutin', 'drug'), ('ofloxacin', 'drug')]
5376
drugs:  [('KRM-1648', 'drug_n')]
5377
drugs:  [('budesonide', 'drug'), ('Survanta', 'brand'), ('lung surfactant', 'group')]
5378
drugs:  [('pulmonary surfactant', 'group'), ('Survanta', 'brand'), ('budesonide', 'drug')]
5379
drugs:  [('budesonide', 'drug'), ('ethanol', 'drug')]
5380
drugs:  [('solvent', 'group')]
5381
drugs:  [('Survanta', 'brand'), ('budesonide', 'drug')]
5382
drugs:  [('Survanta', 'brand'), ('sodium dodecyl sulfate', 'drug_n')]
5383
drugs:  [('budesonide', 'drug'), ('budesonide', 'drug'), ('Survanta', 'brand')]
5384
drugs:  [('lung surfactan

29
drugs:  [('Pegaptanib', 'drug')]
30
drugs:  [('Macugen', 'brand'), ('pegaptanib', 'drug')]
31
drugs:  [('anticholinergic agents', 'group')]
32
drugs:  [('Alcohol', 'drug'), ('primaquine', 'drug')]
33
drugs:  [('methyl aminolevulinate', 'drug'), ('anesthetics', 'group')]
34
drugs:  [('methyl aminolevulinate', 'drug')]
35
drugs:  [('cimetidine', 'drug'), ('digoxin', 'drug'), ('sevelamer', 'drug'), ('theophylline', 'drug')]
36
drugs:  [('Telbivudine', 'drug'), ('telbivudine', 'drug')]
37
drugs:  [('telbivudine', 'drug'), ('telbivudine', 'drug'), ('telbivudine', 'drug')]
38
drugs:  [('Geref', 'brand'), ('Geref', 'brand')]
39
drugs:  [('BEXXAR', 'brand')]
40
drugs:  [('murine monoclonal antibodies', 'group')]
41
drugs:  [('Palonosetron', 'drug')]
42
drugs:  [('palonosetron', 'drug')]
43
drugs:  [('palonosetron', 'drug')]
44
drugs:  [('palonosetron', 'drug'), ('metoclopramide', 'drug')]
45
drugs:  [('ALOXI', 'brand'), ('corticosteroids', 'group'), ('analgesics', 'group'), ('antiemetics', 

234
drugs:  [('propiverine hydrochloride', 'drug')]
235
drugs:  [('propiverine hydrochloride', 'drug'), ('cetirizine di-hydrochloride', 'drug')]
236
drugs:  [('ethyl acetate', 'drug_n'), ('methanol-ammonium acetate', 'drug_n')]
237
drugs:  [('propiverine', 'drug')]
238
drugs:  [('propiverine hydrochloride', 'drug')]
239
drugs:  [('propiverine hydrochloride', 'drug')]
240
drugs:  [('antituberculosis drugs', 'group'), ('antiretroviral drugs', 'group')]
241
drugs:  [('antiretroviral', 'group'), ('antituberculosis', 'group')]
242
drugs:  [('Nitric oxide', 'drug')]
243
drugs:  [('opiate', 'group')]
244
drugs:  [('opiate', 'group')]
245
drugs:  [('opiate', 'group')]
246
drugs:  [('opiate', 'group')]
247
drugs:  [('opiate', 'group'), ('opiate', 'group')]
248
drugs:  [('opiate', 'group')]
249
drugs:  [('opiate', 'group'), ('morphine', 'drug')]
250
drugs:  [('opiate', 'group'), ('opiate', 'group'), ('opiate', 'group')]
251
drugs:  [('Ticagrelor', 'drug'), ('antiplatelet agent', 'group')]
252
dr

In [7]:
# computing the Distribution of frequencies that we will need later for a feature

list_of_tokens = [tuples[0] for row in train_set for tuples in row]
freqDistribution = nltk.FreqDist(list_of_tokens)

In [8]:
frequencies = [v for k,v in freqDistribution.items()]
frequencies = list(set(sorted(frequencies)))
max_pos = round(len(frequencies)*20/100) # we only take the 20 small frequencies
more_rare_freq = frequencies[0:max_pos]

## FEATURES 

In [9]:
with(open('data/DrugBank_names_DB.txt', 'r')) as f:
    drugbank_db = f.read().splitlines()

# remember s is a triple defined as follows: (token, POS tag, BIO tag)
X_train = [sent2features(s,drugbank_db,freqDistribution,more_rare_freq) for s in train_set]
y_train = [sent2labels(s) for s in train_set]

X_test = [sent2features(s,drugbank_db,freqDistribution,more_rare_freq) for s in test_set]
y_test = [sent2labels(s) for s in test_set]


In [10]:
X_train[598][5]

{'+1:bigrams': ['ma', 'ay'],
 '+1:postag': 'MD',
 '+1:postag[:2]': 'MD',
 '+1:trigrams': ['may'],
 '+1:unigrams': ['m', 'a', 'y'],
 '+2:bigrams': ['le', 'ea', 'ad'],
 '+2:postag': 'VB',
 '+2:postag[:2]': 'VB',
 '+2:trigrams': ['lea', 'ead'],
 '+2:unigrams': ['l', 'e', 'a', 'd'],
 '-1:bigrams': ['ga',
  'as',
  'st',
  'tr',
  'ro',
  'oi',
  'in',
  'nt',
  'te',
  'es',
  'st',
  'ti',
  'in',
  'na',
  'al'],
 '-1:postag': 'JJ',
 '-1:postag[:2]': 'JJ',
 '-1:trigrams': ['gas',
  'ast',
  'str',
  'tro',
  'roi',
  'oin',
  'int',
  'nte',
  'tes',
  'est',
  'sti',
  'tin',
  'ina',
  'nal'],
 '-1:unigrams': ['g',
  'a',
  's',
  't',
  'r',
  'o',
  'i',
  'n',
  't',
  'e',
  's',
  't',
  'i',
  'n',
  'a',
  'l'],
 '-2:bigrams': ['th', 'he'],
 '-2:postag': 'DT',
 '-2:postag[:2]': 'DT',
 '-2:trigrams': ['the'],
 '-2:unigrams': ['t', 'h', 'e'],
 'all_uppercase_letters': 0,
 'caps_mix': 1,
 'contains_capital_letter': 1,
 'contains_drug_prefix': 0,
 'contains_drug_sufix': 0,
 'contain

In [11]:
train_set[598]

[('Drugs', 'NNS', 'O'),
 ('that', 'WDT', 'O'),
 ('raise', 'VBP', 'O'),
 ('the', 'DT', 'O'),
 ('gastrointestinal', 'JJ', 'O'),
 ('pH', 'NN', 'O'),
 ('may', 'MD', 'O'),
 ('lead', 'VB', 'O'),
 ('to', 'TO', 'O'),
 ('an', 'DT', 'O'),
 ('earlier', 'JJR', 'O'),
 ('release', 'NN', 'O'),
 ('of', 'IN', 'O'),
 ('duloxetine', 'NN', 'B-drug'),
 ('.', '.', 'O')]

In [12]:
train_texts_entities[397]

('Other strong selective CYP3A4 inhibitors such as ketoconazole can also be expected to increase the exposure of zaleplon.',
 [('ketoconazole', 'drug'), ('zaleplon', 'drug')])

In [13]:
import sklearn_crfsuite
from sklearn_crfsuite import scorers
from sklearn_crfsuite import metrics

crf = sklearn_crfsuite.CRF(
    algorithm='lbfgs',
    c1=1.33,
    c2=0.1,
    max_iterations=100,
    all_possible_transitions=True
)

params_space = {
    'c1': scipy.stats.expon(scale=0.5),
    'c2': scipy.stats.expon(scale=0.5),
}

# use the same metric for evaluation

labels = list(set(sum(y_train,[])))
labels.remove('O')

f1_scorer = make_scorer(metrics.flat_f1_score,
                        average='weighted', labels=labels)

rs = RandomizedSearchCV(crf, params_space,
                        cv=4,
                        verbose=1,
                        n_jobs=-1,
                        n_iter=15,
                        scoring=f1_scorer)
rs.fit(X_train, y_train)


Fitting 4 folds for each of 15 candidates, totalling 60 fits


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed: 30.9min
[Parallel(n_jobs=-1)]: Done  60 out of  60 | elapsed: 40.1min finished


RandomizedSearchCV(cv=4, error_score='raise',
          estimator=CRF(algorithm='lbfgs', all_possible_states=None,
  all_possible_transitions=True, averaging=None, c=None, c1=1.33, c2=0.1,
  calibration_candidates=None, calibration_eta=None,
  calibration_max_trials=None, calibration_rate=None,
  calibration_samples=None, delta=None, epsilon=None, error_...e,
  num_memories=None, pa_type=None, period=None, trainer_cls=None,
  variance=None, verbose=False),
          fit_params={}, iid=True, n_iter=15, n_jobs=-1,
          param_distributions={'c1': <scipy.stats._distn_infrastructure.rv_frozen object at 0x14b5573c8>, 'c2': <scipy.stats._distn_infrastructure.rv_frozen object at 0x14b5575f8>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          scoring=make_scorer(flat_f1_score, average=weighted, labels=['B-brand', 'B-drug_n', 'B-drug', 'B-group', 'I']),
          verbose=1)

####  Predictions

In [14]:
model = rs.best_estimator_
# model = crf
model.fit(X_train,y_train)
pred_tags = model.predict(X_test)

In [15]:
print(model)

CRF(algorithm='lbfgs', all_possible_states=None,
  all_possible_transitions=True, averaging=None, c=None,
  c1=0.2894710757799365, c2=0.21306562235894833,
  calibration_candidates=None, calibration_eta=None,
  calibration_max_trials=None, calibration_rate=None,
  calibration_samples=None, delta=None, epsilon=None, error_sensitive=None,
  gamma=None, keep_tempfiles=None, linesearch=None, max_iterations=100,
  max_linesearch=None, min_freq=None, model_filename=None,
  num_memories=None, pa_type=None, period=None, trainer_cls=None,
  variance=None, verbose=False)


####  Evaluation

####  Evaluation with scickit learn

In [16]:
# group B and I results
sorted_labels = sorted(
    labels,
    key=lambda name: (name[1:], name[0])
)
print(metrics.flat_classification_report(
    y_test, pred_tags, labels=sorted_labels, digits=3
))

             precision    recall  f1-score   support

          I      0.750     0.652     0.698       161
    B-brand      0.470     0.915     0.621        59
     B-drug      0.776     0.861     0.816       338
   B-drug_n      0.387     0.108     0.169       111
    B-group      0.705     0.682     0.694       151

avg / total      0.683     0.689     0.669       820



In [17]:
from collections import Counter

def print_transitions(trans_features):
    for (label_from, label_to), weight in trans_features:
        print("%-6s -> %-7s %0.6f" % (label_from, label_to, weight))

print("Top likely transitions:")
print_transitions(Counter(model.transition_features_).most_common(20))

print("\nTop unlikely transitions:")
print_transitions(Counter(model.transition_features_).most_common()[-20:])

Top likely transitions:
B-group -> I       0.852876
B-drug_n -> I       0.798307
O      -> O       0.583342
I      -> I       0.308902
O      -> B-group 0.000338
B-drug -> O       -0.000034
O      -> B-brand -0.020661
O      -> B-drug_n -0.040056
B-brand -> O       -0.061517
B-brand -> B-group -0.161544
B-drug -> I       -0.164472
B-brand -> B-brand -0.170059
O      -> B-drug  -0.244170
B-drug_n -> B-group -0.296133
I      -> B-drug  -0.345090
B-drug -> B-drug_n -0.372243
B-group -> B-drug_n -0.377804
B-drug -> B-group -0.455761
B-drug_n -> O       -0.514890
B-drug -> B-brand -0.553121

Top unlikely transitions:
B-drug_n -> B-group -0.296133
I      -> B-drug  -0.345090
B-drug -> B-drug_n -0.372243
B-group -> B-drug_n -0.377804
B-drug -> B-group -0.455761
B-drug_n -> O       -0.514890
B-drug -> B-brand -0.553121
B-group -> B-brand -0.604306
B-drug_n -> B-drug  -0.679047
B-brand -> I       -0.685389
I      -> B-drug_n -0.688441
I      -> B-group -0.755463
B-group -> B-drug  -1.190678
I  

In [18]:
def print_state_features(state_features):
    for (attr, label), weight in state_features:
        print("%0.6f %-8s %s" % (weight, label, attr))

print("Top positive:")
print_state_features(Counter(model.state_features_).most_common(30))

print("\nTop negative:")
print_state_features(Counter(model.state_features_).most_common()[-30:])

Top positive:
5.311037 B-brand  word[-5:]:pirin
4.223503 B-group  word[-2:]:Is
4.052828 B-group  word[-2:]:Ds
3.559414 B-drug_n word[-5:]:gaine
3.454864 B-drug   word[-5:]:drugs
3.318023 B-brand  word[-2:]:EX
3.158843 B-drug_n word[-2:]:MC
3.117001 B-group  word[-5:]:ylate
2.955492 B-group  word[-5:]:ogens
2.825016 O        +1:bigrams:of
2.726492 O        BOS
2.683978 B-brand  word[-2:]:IL
2.674453 B-drug   word[-5:]:goxin
2.656646 B-drug   isInDB
2.604003 B-brand  word[-2:]:IN
2.559511 B-drug_n word[-5:]:atrol
2.548581 B-brand  word[-2:]:OL
2.515327 B-group  word[-5:]:tacid
2.498567 B-group  word[-5:]:piate
2.471002 B-drug   word[-2:]:ib
2.446081 O        word[-2:]:ay
2.426628 B-drug   word[-5:]:ampin
2.426628 B-drug   word[-4:]:mpin
2.367205 B-brand  word[-5:]:utane
2.357267 B-drug   word[-3:]:cin
2.344016 B-brand  word[-2:]:AN
2.320498 B-group  word[-5:]:rogen
2.313360 O        word[-4:]:tion
2.311073 B-group  word[-5:]:ators
2.290426 B-drug_n word[-5:]:toxin

Top negative:
-1.68550

In [19]:
print('Total execution time: ',(time.time() - first_init)/60, ' minutes')

Total execution time:  44.01821411450704  minutes


In [20]:

'''
## Log of results
date, precision, recall, F1, features, test
14-May, 46.2, 52.1, 48.99, Token length; Prefixes/Suffixes; POS tag; Binary features (+-2); Token position; DrugBank DB; Shape, yes
'''

'\n## Log of results\ndate, precision, recall, F1, features, test\n14-May, 46.2, 52.1, 48.99, Token length; Prefixes/Suffixes; POS tag; Binary features (+-2); Token position; DrugBank DB; Shape, yes\n'