In [None]:
import os
import csv
import json
import random
import ipywidgets as widgets

from urllib.parse import urlparse
from IPython.display import display, HTML

In [None]:
# read queries
with open('../data/corpus/queries.txt', 'r', encoding='utf-8') as f:
    qText = f.readlines()

# prepare queries
queries = {}
for query in qText:
    q = query.split('\t')
    queries[q[0]] = q[1]

In [None]:
# read entity cards
with open('../data/cards/size=5/dynes_utility.json', 'r', encoding='utf-8') as f:
    dynes = json.load(f)

with open('../data/cards/size=5/vRankDynes.json', 'r', encoding='utf-8') as f:
    qrank = json.load(f)

In [None]:
# set query keys -- restricted to queries associated w/ entity cards (i.e., those cards w/ tau < 0.8)
qIDs = list(dynes.keys())

In [None]:
# randomize the position of the qRank entity card
random.seed(42)
qCardPos = random.choices(['A', 'B'], k=len(qIDs))

In [None]:
# set annotator ID -- must be changed when a different annotator conducts the annotation process
annotID = 0

In [None]:
# create dir if not exists
os.makedirs('../data/annotations/cards/', exist_ok=True)
# create a CSV file to store results or load CSV file if already present
fname = "../data/annotations/cards/preferences"+str(annotID)+".csv"

if os.path.exists(fname):  # annotation CSV file exists -- resume annotation process
    with open(fname, newline="", encoding="utf-8") as f:  # read annotations
        reader = csv.reader(f)
        annotData = [row for row in reader]
        annotQueries = {row[0] for row in annotData}
        numAnnot = len(annotData)  # count number of annotated cards w/ preference labels

    for qIX, qID in enumerate(dynes.keys()):  # iterate over entity cards
        if qID in annotQueries:  # entity card found in annotation CSV file -- continue
            continue
        else:  # entity card not found in annotation CSV file -- stop and store batchID
            break       
else:  # annotation CSV file does not exists -- start annotation process 
    with open(fname, mode="w", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow(["query", "Annotation", "QualityCard"])
    # set numAnnot and qIX to zero
    numAnnot = 0
    qIX = 0

In [None]:
# output widget to display entity cards evaluation
output = widgets.Output()

# set var to store radio button
radioButton = None

def evaluateCards():
    global radioButton

    if qCardPos[qIX] == 'A':  # qRank card in A position
        aModel = qrank[qIDs[qIX]]
        bModel = dynes[qIDs[qIX]]
    else:  # qRank card in B position
        aModel = dynes[qIDs[qIX]]
        bModel = qrank[qIDs[qIX]]
    
    # model A 
    aFacts = [prepareFact(fact) for fact in aModel]
    aEntity = aFacts[0][0]
    aProperties = [fact[1] for fact in aFacts]

    # model B
    bFacts = [prepareFact(fact) for fact in bModel]
    bEntity = bFacts[0][0]
    bProperties = [fact[1] for fact in bFacts]

    # prepare entity cards presentation
    aCard = f"<td style='border: 1px solid black; padding: 10px; text-align: left;'>"
    aCard += f"<h3>(A) {aEntity}</h3>"
    aCard += f"<div>{'<br>' + '<br>'.join(aProperties)}</div>"
    aCard += f"</td>"

    eCard = f"<td style='padding: 50px;'></td>"
    
    bCard = f"<td style='border: 1px solid black; padding: 10px; text-align: left;'>"
    bCard += f"<h3>(B) {bEntity}</h3>"
    bCard += f"<div>{'<br>' + '<br>'.join(bProperties)}</div>"
    bCard += f"</td>"

    # prepare radio button
    radioButton = widgets.RadioButtons(
        options=['A is better', 'B is better', 'They are the same'],
        value='They are the same'
    )

    # prepare submit button
    submitButton = widgets.Button(description="Submit Annotations")
    submitButton.on_click(processEvaluation)  # process annotations when submit button is clicked
    
    with output:  # display 
        display(HTML(f"<h4 style='font-weight:normal;'><b>Query:</b> {queries[qIDs[qIX]]}</h4>"))
        display(HTML(f"<h4>Entity Cards:</h4>"))
        display(HTML("<table><tr></tr><tr>" + aCard + eCard + bCard + "</tr></table>"))
        display(radioButton)
        display(submitButton)


def processEvaluation(button):
    global qIX, numAnnot

    # get annotation from radio button
    annotation = radioButton.value
    if annotation == "A is better":  # entity card A is considered better
        annotation = "A"
    elif annotation == "B is better":  # entity card B is considered better
        annotation = "B"
    else:  # entity cards are considered equivalent
        annotation = "SAME"

    # update number of annotations 
    numAnnot += 1
    
    # store entity card annotation
    with open(fname, mode="a", newline="", encoding="utf-8") as f:
        writer = csv.writer(f)
        writer.writerow([qIDs[qIX], annotation, qCardPos[qIX]])

    # move to the next entity card
    qIX += 1
    if qIX < len(qIDs):  # entity cards are not finished -- keep annotating 
        output.clear_output()
        evaluateCards()  # call evaluateCards() to continue processing new cards for annotation
    else:  # entity cards are finished -- exit
        output.clear_output()
        with output:
            display(HTML(f"Annotation task exhausted! Annotated {numAnnot} entity cards with preference labels."))
        return True


def prepareFact(fact):
    # subject (always URL)
    sURL = stripResource(fact[0])
    sText = " ".join(sURL.split("/")[-1].split("_"))
    s = f"<a href='{sURL}' target='_blank'>{sText}</a>"

    # predicate
    pred = fact[1][1:-1].split(':')[-1]
    p = f"<i>{pred}</i>"

    # object (either URL or Literal)
    obj = stripResource(fact[2])
    if isURL(obj):  # object is URL -- prepare anchor text
        oText = " ".join(obj.split("/")[-1].split("_"))
        if oText:  # URL contains a resource
            o = f"<a href='{obj}' target='_blank'>{oText}</a>"
        else:  # URL is the resource
            o = f"<a href='{obj}' target='_blank'>{obj}</a>"
    else:  # object is Literal -- present as is
        o = obj

    # prepare (subject) entity
    entity = f"{s}"
    # prepare entity property (predicate+object)
    property = f"{p}&nbsp;&nbsp;&nbsp;{o}"
    return entity, property


def isURL(item):
    try:  # if urlparse manages to parse item, then item is URL -- return TRUE
        result = urlparse(item)
        return all([result.scheme, result.netloc])
    except:  # otherwise, return FALSE
        return False

def stripResource(item):
    if item.startswith('<') and item.endswith('>'):  # resource is URL
        if item.startswith('<dbpedia'):  # DBpedia resource
            resource = 'http://dbpedia.org/resource/' + item[1:-1].split(':')[-1]
        else:  # external resource
            resource = item[1:-1]
    else:  # resource is Literal 
        resource = item
    return resource

<!DOCTYPE html>
<html>
    <body>
        <div>
            <h2>Task: Entity Card Preference</h2>
            <p>Given a <strong>query</strong> of interest, you will be presented with a side-by-side pair of entity cards, <strong>A</strong> and <strong>B</strong>, about an entity relevant for the query. <br> Your task is to choose the better entity card between <strong>A</strong> and <strong>B</strong>. <br> There is also the option to mark <strong>'They are the same'</strong> in case you think they are equivalent.</p>
        </div>
    </body>
</html>

In [None]:
evaluateCards()
output