In [45]:
import time
import hashlib
import sys

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from IPython import display
import ipywidgets as widgets
import tqdm.notebook as tqdm


fig=plt.figure(figsize=(10, 10))
%matplotlib inline

<Figure size 1000x1000 with 0 Axes>

In [44]:
# For debugging and development.
data = pd.DataFrame(columns=['uid', 'data', 'label', 'username'])
for n in range(100):
    data_uid = hashlib.sha1(bytes(n)).hexdigest()
    data.loc[len(data)] = [data_uid, n, None, None]
#data.loc[3, 'label']= 1
print(data)

                                         uid  data label username
0   da39a3ee5e6b4b0d3255bfef95601890afd80709     0  None     None
1   5ba93c9db0cff93f52b521d7420e43f6eda2784f     1  None     None
2   1489f923c4dca729178b3e3233458550d8dddf29     2  None     None
3   29e2dcfbb16f63bb0254df7585a15bb6fb5e927d     3  None     None
4   9069ca78e7450a285173431b3e52c5c25299e473     4  None     None
..                                       ...   ...   ...      ...
95  bd057d7f49143824e45263147a02a580137fabef    95  None     None
96  c49a9785b2243f2f080daad1747f119acceccfa5    96  None     None
97  fa205d2a65684c6245a2272facf45fb12ace4014    97  None     None
98  a568e30784b1df87b30e1d4a2234de7b706b3d27    98  None     None
99  d991c16949bd5e85e768385440e18d493ce3aa46    99  None     None

[100 rows x 4 columns]


In [42]:
# Labeling interface
"""
def draw_widgets():
    label_buttons = widgets.RadioButtons(
        options=['pepperoni', 'pineapple', 'anchovies'],
    #    value='pineapple', # Defaults to 'pineapple'
    #    layout={'width': 'max-content'}, # If the items' names are long
        description='Pizza topping:',
        disabled=False
    )
    display.display(label_buttons)
    
    next_button = widgets.ToggleButton(
    value=False,
    description='Click me',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Description',
    icon='check' # (FontAwesome names without the `fa-` prefix)
    )
    display.display(next_button)

    return label_buttons, next_button
"""

labels = {
    0: "Not Cloudy",
    1: "Cloudy"
}

def do_simple_plot(n):
    """Plotting routine for debugging the labeling interface."""
    t = np.arange(0.0, 2.0, 0.01)
    s = 1 + np.sin(2 * np.pi * t*n)

    fig, ax = plt.subplots()
    ax.plot(t, s)

    ax.set(xlabel='time (s)', ylabel='voltage (mV)',
           title='About as simple as it gets, folks')
    ax.grid()
    #plt.show()
    plt.close()
    return fig


def get_valid_label_str(valid_labels):
    """
    Params:
        valid_labels: dictionary of image classes and their numeric encoding.
        
    Returns: string describing all valid labels and their encodings.
    """
    s = ""
    for key, val in labels.items():
        s += f"{key + 1}='{val}', "
    return s[:-2]


def get_user_label():
    """Prompts and returns the label a user assigns to a given image."""
    valid_label_str = get_valid_label_str(labels)
    valid_label = False
    while not valid_label:
#         print(f"Valid labels:\t{valid_label_str}.\nYour label: ", end="")
#         label = input("")
        label = input(f"Valid labels:\t{valid_label_str}.\nYour label: ")
        if label.isnumeric() and int(label) - 1 in labels:
            valid_label = True
        elif label == 'exit':
            return
        else:
            print(f"\x1b[31mError:\t   '{label}' -->  is not a valid label. "
                  "(To exit the session, type 'exit'.)\x1b[0m\n")
    return label



def get_label_session(df, labeler_name):
    """Constructor for labeling interface.
    Params: 
        df: DataFrame containing metadata about unlabeled data points.
        labeler_name: Name of person doing the labeling.
    """
    unlabeled_data = df[df.label.isnull()]
    #print(unlabeled_data.index)
    num_imgs = len(unlabeled_data)
    
    def status_msg_generator(num_divisions=10):
        """Generate status progress bar"""
        itr_num = 1
        while True:
            progress_str = '#'*itr_num + '.'*(num_imgs - itr_num)
            msg = f"[{progress_str}]" + f"\t{itr_num} / {num_imgs}\n"
            itr_num += 1
            yield msg

    
    def start_label_session():
        """Labeling interface."""
        if num_imgs == 0:
            print("All available data is labeled! \N{grinning face}")
            return 
        progress = status_msg_generator()
        for i in unlabeled_data.index:
            data = unlabeled_data.loc[i, 'data']
            fig = do_simple_plot(data)
            
            display.clear_output(wait=True)
            print(next(progress))
            display.display(fig)
            
            
            label = get_user_label()
            df.loc[i, ['label', 'username']] = [label, labeler_name]
            

        #bar.update()
    return start_label_session
#labeler(df)

In [34]:
# Enter your name below, then run this cell
name = "Nicolas Rault-Wang"

In [37]:
# Run this cell to start labeling.
start_label_session = get_label_session(data, name)
start_label_session()

All available data is labeled! 😀


In [38]:
# Run the cell below to preview your labels and write them to file. (TODO)

In [39]:
print(data)

                                         uid  data label            username
0   da39a3ee5e6b4b0d3255bfef95601890afd80709     0     1  Nicolas Rault-Wang
1   5ba93c9db0cff93f52b521d7420e43f6eda2784f     1     2  Nicolas Rault-Wang
2   1489f923c4dca729178b3e3233458550d8dddf29     2     1  Nicolas Rault-Wang
3   29e2dcfbb16f63bb0254df7585a15bb6fb5e927d     3     2  Nicolas Rault-Wang
4   9069ca78e7450a285173431b3e52c5c25299e473     4     1  Nicolas Rault-Wang
..                                       ...   ...   ...                 ...
95  bd057d7f49143824e45263147a02a580137fabef    95     1  Nicolas Rault-Wang
96  c49a9785b2243f2f080daad1747f119acceccfa5    96     2  Nicolas Rault-Wang
97  fa205d2a65684c6245a2272facf45fb12ace4014    97     1  Nicolas Rault-Wang
98  a568e30784b1df87b30e1d4a2234de7b706b3d27    98     2  Nicolas Rault-Wang
99  d991c16949bd5e85e768385440e18d493ce3aa46    99     1  Nicolas Rault-Wang

[100 rows x 4 columns]
