# MNIST annotation example

In [None]:
%load_ext autoreload
%autoreload 2

import os, sys
os.environ['CUDA_VISIBLE_DEVICES'] = ''

sys.path.append('../')

import matplotlib.pyplot as plt, collections, logging
%pylab inline

# Initialization

In [None]:
import pandas
import numpy

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier

from sklearn.metrics import f1_score, accuracy_score

from libact.query_strategies import UncertaintySampling
from libact.models import LogisticRegression as LibActLogisticRegression, SklearnProbaAdapter

from actleto import ActiveLearner, make_libact_strategy_ctor, MPErr, ActiveLearnerUiWidget, ImageVisualizer

In [None]:
import logging

# Logger is needed for the default evaluation output and some notifications
logger = logging.getLogger('actleto')
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')    
strm_hdl = logging.StreamHandler(sys.stdout)
strm_hdl.setFormatter(formatter)
logger.addHandler(strm_hdl)
logger.setLevel(logging.INFO)

# Dataset preparation

In [None]:
# Loading MNIST dataset
(X_train_orig, y_train), (X_test_orig, y_test) = mnist.load_data()

num_classes = numpy.unique(y_train).shape[0]

# flatten 28*28 images to a 784 vector for each image
num_pixels = X_train_orig.shape[1] * X_train_orig.shape[2]
X_train = X_train_orig.reshape(X_train_orig.shape[0], num_pixels).astype('float32')
X_test = X_test_orig.reshape(X_test_orig.shape[0], num_pixels).astype('float32')

# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255

# Model construction

In [None]:
# Simple 2-layer perceptron
def baseline_model():
    model = Sequential()
    model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
    model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# Active learning prepartions

In [None]:
# For demonstration we sample 50 images from MNIST as seed examples for AL. 
# The rest of the images are considered "unlabeled"
y_seed = pandas.Series([None] * y_train.shape[0], index = range(y_train.shape[0]))
known_indexes = np.random.randint(y_train.shape[0], size = 50)
y_seed.iloc[known_indexes] = y_train[known_indexes]

In [None]:
# Creating model for selecting examples from the unlabeled dataset.
# We use special wrapper to make sklearn model compatible with libact library.
query_model = SklearnProbaAdapter(KerasClassifier(build_fn = baseline_model, verbose = 0))

# We use uncertainty sampling strategy from libact and wrap it with adaptor.
active_learn_alg_ctor = make_libact_strategy_ctor(lambda trn_ds:
                                                  UncertaintySampling(trn_ds,
                                                                      model = query_model))

# Now we create model for evaluation. In this example it is the same model as the one
# for selecting examples.
evaluation_model = KerasClassifier(build_fn = baseline_model, verbose = 0)


def f1_macro(y_t, y_p):
    return f1_score(y_t, y_p, average = 'macro')

# We create ActiveLearner object that incupsulates the logic of active learning.
active_learner = ActiveLearner(active_learn_alg_ctor = active_learn_alg_ctor,
                               y_dtype = 'int',
                               X_full_dataset = X_train, 
                               y_full_dataset = y_seed.values,
                               X_test_dataset = X_test,
                               y_test_dataset = y_test,
                               model_evaluate = evaluation_model,
                               eval_metrics = [accuracy_score, f1_macro],
                               rnd_start_steps = 0)

# X_helper object is needed for data visualization for human annotators. 
X_helper = pandas.DataFrame(X_train_orig.reshape(-1, 28*28), index = range(X_train_orig.shape[0]))

# Invoking annotation widget

In [None]:
# We need this try catch if we use autosave feature. It stops autosave thread for the older annotator widgets if we
# invoke this cell for the second and next times.
try:
    if active_learn_ui:
        active_learn_ui.stop()
except NameError:
    pass
    
# Now we create the active learner widget itself and configure it with created active learning object,
# data for visualization (X_helper), and visualizer for images.
active_learn_ui = ActiveLearnerUiWidget(active_learner = active_learner, 
                                        X_helper = X_helper,
                                        display_feature_table = False,
                                        drop_labels = list(range(0, X_helper.shape[1])),
                                        visualizer = ImageVisualizer(columns_range = (0, X_helper.shape[1]), 
                                                                     img_shape = (28, 28),
                                                                     img_mode = 'L',
                                                                     preview_shape = (100, 100)),
                                        y_labels = {str(elem) : elem for elem in range(10)},
                                        save_path = 'mnist.npy',
                                        save_time = 120)

active_learn_ui

2017-09-26 13:20:06,837 - actleto - INFO - Autosave.
2017-09-26 13:20:06,843 - actleto - INFO - Saved. File path: mnist_autosave.npy
2017-09-26 13:22:06,845 - actleto - INFO - Autosave.
2017-09-26 13:22:06,849 - actleto - INFO - Saved. File path: mnist_autosave.npy
2017-09-26 13:24:06,851 - actleto - INFO - Autosave.
2017-09-26 13:24:06,856 - actleto - INFO - Saved. File path: mnist_autosave.npy
2017-09-26 13:26:06,858 - actleto - INFO - Autosave.
2017-09-26 13:26:06,864 - actleto - INFO - Saved. File path: mnist_autosave.npy
2017-09-26 13:28:06,866 - actleto - INFO - Autosave.
2017-09-26 13:28:06,871 - actleto - INFO - Saved. File path: mnist_autosave.npy
2017-09-26 13:30:06,872 - actleto - INFO - Autosave.
2017-09-26 13:30:06,878 - actleto - INFO - Saved. File path: mnist_autosave.npy
