In [None]:
import string
import pandas as pd
import ipywidgets as widgets
import traitlets
from ast import literal_eval
from utils.preprocessing import tokenize

In [None]:
df = pd.read_csv('data/processed/row_df_fi.csv', index_col=0)

# shuffle
df = df.sample(frac=1)

df

In [None]:
bio = {
    '0': {'button_style': ''},
    '1': {'button_style': 'warning'}
}

labels = list(bio.keys())

In [None]:
TEXT_COL = 'answer'
LABEL_COL = 'label'
N_COLS = 12

if LABEL_COL not in df.columns:
    df[LABEL_COL] = None

idxmap = dict(enumerate(df.index))

i = 0

In [None]:
def next_prev_button_click(next_or_prev, button):
    assert next_or_prev in ['next', 'prev']
    global i
    
    # @@@ set labels for the previous text @@@

    if i >= 0:
        # in the beginning container does not contain HBoxes so we must handle that special case
        hboxes = container.children

        # get BIO labels for each token
        labels = [w.tooltip for hbox in hboxes for w in hbox.children]
        df.loc[idxmap[i], LABEL_COL] = labels

    # @@@ increase or decrease index @@@
    
    new_i = i + {'next': 1, 'prev': -1}[next_or_prev]
    
    if 0 <= new_i and new_i < len(df):
        i = new_i
        
    # @@@ if the new text has been labeled, use those labels @@@
    
    text = df.loc[idxmap[i], TEXT_COL]
    labels = df.loc[idxmap[i], LABEL_COL]
    
    hboxes = get_hboxes(text, labels)
    container.children = hboxes
    
    txt.value = text
    
def prev_button_click(button):
    return next_prev_button_click('prev', button)

def next_button_click(button):
    return next_prev_button_click('next', button)

def handle_label_click(button):
    next_label = labels[(labels.index(button.tooltip) + 1) % len(labels)]

    button.button_style = bio[next_label]['button_style']
    button.tooltip = next_label

In [None]:
def get_hboxes(text, labels):
    ws = []

    tokens = tokenize(text)

    if labels is None or labels != labels:
        labels = ['0'] * len(tokens)
        
    assert len(tokens) == len(labels)

    for token, label in zip(tokens, labels):
        w = widgets.Button(description=token, tooltip=label, button_style=bio[label]['button_style'])
        w.on_click(handle_label_click)

        ws.append(w)

    n_cols = 12
    hboxes = []

    while ws:
        # add n_cols buttons to a horizontal box
        hbox = widgets.HBox(ws[:n_cols])
        hboxes.append(hbox)

        ws = ws[n_cols:]

    return hboxes

In [None]:
i = -1

txt = widgets.Label('')

container = widgets.VBox([widgets.Label('Click next to start')])

prev_button = widgets.Button(description='Previous', button_style='success')
prev_button.on_click(prev_button_click)

next_button = widgets.Button(description='Next', button_style='success')
next_button.on_click(next_button_click)

buttons = widgets.HBox([prev_button, next_button])

# txt = show_text(lbl.value)

debug = widgets.Output()

widgets.VBox([txt, container, buttons, debug])

In [None]:
df

In [None]:
df['label'].apply(lambda v: isinstance(v, list)).sum()

In [None]:
df.to_csv('test.csv')

In [None]:
def view_labels(df):
    text_col = df[TEXT_COL]
    label_col = df[LABEL_COL]
    
    html = """
    <style>
    .1 {
        color: orange
    }
    
    .0 {
        color: black
    }

    </style>
    """
    
    ws = []

    for a, b in zip(text_col, label_col):
        a = tokenize(a)
        
        assert len(a) == len(b)
        lbl = widgets.HTML(html + ' '.join(f'<span class="{b_}">{a_}</span>' for a_, b_ in zip(a, b)))
        ws.append(lbl)
        
    return widgets.VBox(ws)
    
view_labels(df[df['label'].apply(lambda x: isinstance(x, list))])

In [None]:
# if this notebook is used to label more data, remember to save the new labels!