In [1]:
import urllib.request, urllib.error, urllib.parse
import json
import os
import pandas
from pprint import pprint

In [2]:
REST_URL = "http://data.bioontology.org"
API_KEY = "a28f1d5b-0cc4-454a-8baf-1b2285cfa549"

def get_json(url):
    opener = urllib.request.build_opener()
    opener.addheaders = [('Authorization', 'apikey token=' + API_KEY)]
    return json.loads(opener.open(url).read())

def print_annotations(annotations, get_class=True):
    #Result is the row
    for result in annotations:
        #returns a martix called annotatedClass which holds all of the ids, links, ontologies, etc
        class_details = result["annotatedClass"]
        if get_class:
            try:
                class_details = get_json(result["annotatedClass"]["links"]["self"])
            except urllib.error.HTTPError:
                print(f"Error retrieving {result['annotatedClass']['@id']}")
                continue
        print("Class details")
        print("\tid: " + class_details["@id"])
        print("\tprefLabel: " + class_details["prefLabel"])
        print("\tontology: " + class_details["links"]["ontology"])

        print("Annotation details")
        for annotation in result["annotations"]:
            print("\tfrom: " + str(annotation["from"]))
            print("\tto: " + str(annotation["to"]))
            print("\tmatch type: " + annotation["matchType"])

        if result["hierarchy"]:
            print("\n\tHierarchy annotations")
            for annotation in result["hierarchy"]:
                try:
                    class_details = get_json(annotation["annotatedClass"]["links"]["self"])
                except urllib.error.HTTPError:
                    print(f"Error retrieving {annotation['annotatedClass']['@id']}")
                    continue
                pref_label = class_details["prefLabel"] or "no label"
                print("\t\tClass details")
                print("\t\t\tid: " + class_details["@id"])
                print("\t\t\tprefLabel: " + class_details["prefLabel"])
                print("\t\t\tontology: " + class_details["links"]["ontology"])
                print("\t\t\tdistance from originally annotated class: " + str(annotation["distance"]))

        print("\n\n")


In [3]:
#Input the data you want to work with in here
df = pandas.read_csv('../data/output/unlabeled_withDBID.csv')
print(len(df))
df.head()


1431


Unnamed: 0.1,Unnamed: 0,Label_ID,Drug_Brand_Name,Active_ingredient,UNII_ID,Formatted_Text,Text,WordCount,DB_ID
0,0,302b23fa-a0c9-405f-9e9f-1d59284c8862.xml,MENTAX,BUTENAFINE,91Y494NL0X,Mentax,MentaxM. furfur DOSAGE AND ADMINISTRATION,5,DB01091
1,0,32c3b588-388f-4e22-8fc7-af0f8b992755.xml,Cerezyme,IMIGLUCERASE,Q6U6J48BWY,\n,Cerezyme ...,5,DB00053
2,0,0673b70a-a94f-4a8b-8153-1402e33fa3e0.xml,Simulect,BASILIXIMAB,9927MT646M,Simulect|The efficacy of Simulect,Simulect The efficacy of Simulect,5,DB00074
3,0,94ba4d57-e4da-4080-9c8e-9e0065aef659.xml,Amytal Sodium,AMOBARBITAL,GWH6IJ239E,,A...,5,DB01351
4,0,1a61a75c-19ff-462e-e054-00144ff8d46c.xml,OPIUM,OPIUM,37M3MZ001L,"VERITGO, DIARRHEA, PAIN, OR OTHER INDICATIONS","VERITGO, DIARRHEA, PAIN, OR OTHER INDICATIONS",6,DB11130


In [4]:
#Now to make this run for each context available!!!!
id = []
From = []
To = []
matchType = []
annotation2 = []
ontology = []
context = []
dbid = []
drugname = []

#Adds additional parameters here for the bioportal search engine
additional_parameters = "&ontologies=DOID"

data = []
for index, row in df.iterrows():
    
    if index % 100  == 0:
        perc = index/len(df) *100
        print(str(index) + " : "+ str(perc))
    
    #Text input for the ontology search engine
    text_to_annotate = row["Text"]
    db_id = row["DB_ID"]
    drug_name = row["Active_ingredient"]
    label_id = row["Label_ID"]


    # Annotate using the provided text
    annotations = get_json(REST_URL + "/annotator?text=" + urllib.parse.quote(text_to_annotate) + additional_parameters)

    for result in annotations:
        class_details = result["annotatedClass"]

        for annotation in result["annotations"]:
            From.append(annotation["from"])
            To.append(annotation["to"])
            matchType.append(annotation["matchType"])
            annotation2.append(annotation["text"])   
            context.append(text_to_annotate)
            ontology.append("\tontology: " + class_details["links"]["ontology"])
            id.append(class_details["@id"])
            dbid.append(db_id)
            drugname.append(drug_name)
            data.append([label_id, annotation["from"], annotation["to"], annotation["matchType"], annotation["text"], 
                         text_to_annotate, class_details["@id"], db_id, drug_name ])



0 : 0.0
100 : 6.988120195667366
200 : 13.976240391334732
300 : 20.964360587002094
400 : 27.952480782669465
500 : 34.940600978336825
600 : 41.92872117400419
700 : 48.91684136967156
800 : 55.90496156533893
900 : 62.893081761006286
1000 : 69.88120195667365
1100 : 76.86932215234103
1200 : 83.85744234800838


HTTPError: HTTP Error 414: Request-URI Too Large

In [5]:
len(data)

5615

In [6]:
#Constructs the new dataframe (newdf) from the collected lists_
columns =['Label_ID','From','To','Type', 'Annotation', 'Context','DO_ID','DB_ID','DrugName' ]

newdf = pandas.DataFrame(data, columns= columns)
newdf.head()

Unnamed: 0,Label_ID,From,To,Type,Annotation,Context,DO_ID,DB_ID,DrugName
0,1a61a75c-19ff-462e-e054-00144ff8d46c.xml,10,17,PREF,DIARRHEA,"VERITGO, DIARRHEA, PAIN, OR OTHER INDICATIONS",http://purl.obolibrary.org/obo/DOID_13250,DB11130,OPIUM
1,0061a54d-79b7-48be-b889-b42de0624e33.xml,19,29,PREF,CYCLOPLEGIA,For mydriasis and cycloplegia for diagnostic p...,http://purl.obolibrary.org/obo/DOID_10033,DB00809,TROPICAMIDE
2,cb8394e1-a813-45f5-b358-3e69d2196fe9.xml,22,35,PREF,VULVOVAGINITIS,For the treatment of vulvovaginitis caused by ...,http://purl.obolibrary.org/obo/DOID_2273,DB00259,SULFANILAMIDE
3,66f84cc7-8014-417d-af1d-935e85223b7f.xml,80,87,PREF,EPILEPSY,Ethosuximide oral solution is indicated for th...,http://purl.obolibrary.org/obo/DOID_1826,DB00593,ETHOSUXIMIDE
4,7bd438ca-ebcf-3e92-e053-2a91aa0a3675.xml,83,95,SYN,VIVAX MALARIA,Primaquine phosphate is indicated for the radi...,http://purl.obolibrary.org/obo/DOID_12978,DB01087,PRIMAQUINE


In [10]:
#Length of each of the df and the average number of annotations made per label
numOfAnno = len(newdf)
numOfContext = len(df.drop_duplicates(subset=["Text"]))

print(numOfAnno)
print(numOfContext)

print(numOfAnno/numOfContext)

5615
1420
3.954225352112676


In [12]:
newdf.to_csv('../data/output/unlabeled_withBPAnnotations.csv', index=False)

In [2]:
df= pandas.read_csv('../data/contra/unlabeled_withBPAnnotations.csv')

In [5]:
print(len(df))
df.drop_duplicates(subset=["Context"])

4129


Unnamed: 0,Label_ID,From,To,Type,Annotation,Context,DO_ID,DB_ID,DrugName
0,../DailyMedExtracter/prescription/temp_xml/f39...,7,22,SYN,HYPERSENSITIVITY,Known hypersensitivity to Rose Bengal.,http://purl.obolibrary.org/obo/DOID_1205,DB14215,Rose Bengal AT
1,../DailyMedExtracter/prescription/temp_xml/636...,1,16,SYN,HYPERSENSITIVITY,Hypersensitivity to Dapsone and/or its derivat...,http://purl.obolibrary.org/obo/DOID_1205,DB00250,DAPSONE
2,../DailyMedExtracter/prescription/temp_xml/6c3...,1,16,SYN,HYPERSENSITIVITY,Hypersensitivity to any of the ingredients.,http://purl.obolibrary.org/obo/DOID_1205,DB00180,FLUNISOLIDE ANHYDROUS
3,../DailyMedExtracter/prescription/temp_xml/34d...,3,14,PREF,HYPERTENSION,['Hypertension; toxemia; pregnancy; and hypers...,http://purl.obolibrary.org/obo/DOID_10763,DB00353,METHYLERGONOVINE
6,../DailyMedExtracter/prescription/temp_xml/607...,13,27,PREF,HYPERTHYROIDISM,Symptomatic Hyperthyroidism\nSymptomatic Hyper...,http://purl.obolibrary.org/obo/DOID_7998,DB09134,IOVERSOL
8,../DailyMedExtracter/prescription/temp_xml/8a0...,1,16,SYN,HYPERSENSITIVITY,Hypersensitivity to any component of this prod...,http://purl.obolibrary.org/obo/DOID_1205,DB06794,LODOXAMIDE
9,../DailyMedExtracter/prescription/temp_xml/8ab...,1,16,SYN,HYPERSENSITIVITY,Hypersensitivity to any component of this medi...,http://purl.obolibrary.org/obo/DOID_1205,DB00200,HYDROXOCOBALAMIN
10,../DailyMedExtracter/prescription/temp_xml/8a2...,57,72,SYN,HYPERSENSITIVITY,"['\n ', 'CONTRAINDICATIONS...",http://purl.obolibrary.org/obo/DOID_1205,DB06824,TRIENTINE
11,../DailyMedExtracter/prescription/temp_xml/eef...,1,6,PREF,ANURIA,Anuria. Known hypersensitivity to chlorthalido...,http://purl.obolibrary.org/obo/DOID_2983,DB00310,CHLORTHALIDONE
13,../DailyMedExtracter/prescription/temp_xml/b11...,31,46,SYN,HYPERSENSITIVITY,"[None, 'CONTRAINDICATIONS ', 'Hypersensitivity...",http://purl.obolibrary.org/obo/DOID_1205,DB00868,Benzonatate
