In [2]:
import json
import os, sys

path = os.path.abspath(os.pardir)
if path not in sys.path:
    sys.path.append(path)

# change the current directory 
os.chdir(path)

In [3]:
with open('evaluation/results.json', 'r') as f:
    results_json = json.load(f)

In [4]:
# select IDS For the labeling
ids = [0, 1, 2]

In [5]:
from tqdm import tqdm
with open('files/raw/foodrecipes.json', 'r') as f:
    data = json.load(f)
data_indexed = {}
for recipe in tqdm(data):
    recipe_url = recipe['canonical_url']
    data_indexed[recipe_url] = recipe
del data

100%|██████████| 483137/483137 [00:00<00:00, 1169133.17it/s]


In [6]:
# build the labeling tasks

results_subset = [results_json[i] for i in ids]
ingredeints = [r['ingredients'] for r in results_subset for _ in r['results']]
keywords = [r['keywords'] for r in results_subset for _ in r['results']]
queries = [f"Ingredients: {r['ingredients']}. Kewords: {r['keywords']}"
           for r in results_subset
           for _ in r['results']]
score = [float(r['results'][i][1]) for r in results_subset for i in range(len(r['results']))]
recipe_subset = [data_indexed[r['results'][i][0]] for r in results_subset for i in range(len(r['results']))]


In [None]:
# setup widgets
import ipywidgets as widgets
from IPython.display import display, clear_output
from pprint import pprint

index = 0
annotations = []

output = widgets.Output()
label_buttons = widgets.ToggleButtons(
    options=["Relevant", "Not Relevant"],
    description='Label:',
    button_style=''
)


next_button = widgets.Button(description="Next")
skip_button = widgets.Button(description="Skip")
progress_label = widgets.Label()

def update_display():
    with output:
        clear_output(wait=True)
        print(f"Query: {queries[index]}")
        print(f"Recipe {index + 1} of {len(recipe_subset)}:")
        pprint(recipe_subset[index])
        progress_label.value = f"{index + 1} / {len(recipe_subset)}"

def on_next_clicked(b):
    global index
    annotations.append({
        "ingredients": ingredeints[index],
        'keywords': keywords[index],   
        'url': recipe_subset[index]['canonical_url'],
        "recipe": recipe_subset[index],
        'score': score[index],
        "label": label_buttons.value
    })
    index += 1
    if index < len(recipe_subset):
        update_display()
    else:
        with output:
            clear_output()
            print("Labeling complete!")
        next_button.disabled = True
        label_buttons.disabled = True
        skip_button.disabled = True
        progress_label.value = f"{len(recipe_subset)} / {len(recipe_subset)}"

def on_skip_clicked(b):
    global index
    annotations.append({
        "ingredients": ingredeints[index],
        'keywords': keywords[index],   
        'url': recipe_subset[index]['canonical_url'],
        "recipe": recipe_subset[index],
        'score': score[index],
        "label": label_buttons.value
    })
    index += 1
    if index < len(recipe_subset):
        update_display()
    else:
        with output:
            clear_output()
            print("Labeling complete!")
        next_button.disabled = True
        label_buttons.disabled = True
        skip_button.disabled = True
        progress_label.value = f"{len(recipe_subset)} / {len(recipe_subset)}"

# Bind callbacks
next_button.on_click(on_next_clicked)
skip_button.on_click(on_skip_clicked)

# Initial display
ui = widgets.VBox([
    progress_label,
    label_buttons,
    widgets.HBox([next_button, skip_button]),
    output
])
display(ui)


Label(value='1 / 30')

ToggleButtons(description='Label:', options=('Relevant', 'Not Relevant'), value='Relevant')

Button(description='Next', style=ButtonStyle())

Button(description='Skip', style=ButtonStyle())

Output()

In [8]:
# export the relevancy labels 
import pandas as pd
# create df 
df = pd.DataFrame(annotations)
# save to csv
df.to_csv('evaluation/annotations.csv', index=False)