In [1]:
%load_ext autoreload
%autoreload 2

import os, sys
os.environ['CUDA_VISIBLE_DEVICES'] = ''

import matplotlib.pyplot as plt, collections, logging
%pylab inline

import pandas as pd
import numpy as np

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier

from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

from libact.query_strategies import UncertaintySampling
from libact.models import LogisticRegression as LibActLogisticRegression, SklearnProbaAdapter

sys.path.append('/notebook')
from actleto import ActiveLearner, make_libact_strategy_ctor, MPErr, ActiveLearnerUiWidget, ImageVisualizer

Populating the interactive namespace from numpy and matplotlib


Using TensorFlow backend.


In [2]:
(X_train_orig, y_train_orig), (X_test_orig, y_test_orig) = mnist.load_data()

# flatten 28*28 images to a 784 vector for each image
num_pixels = X_train_orig.shape[1] * X_train_orig.shape[2]
X_train = X_train_orig.reshape(X_train_orig.shape[0], num_pixels).astype('float32')
X_test = X_test_orig.reshape(X_test_orig.shape[0], num_pixels).astype('float32')

# normalize inputs from 0-255 to 0-1
X_train = X_train / 255
X_test = X_test / 255

# one hot encode outputs
y_train = np_utils.to_categorical(y_train_orig)
y_test = np_utils.to_categorical(y_test_orig)
num_classes = y_test.shape[1]

In [3]:
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
    model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [4]:
y_seed = pd.DataFrame(y_train)
unknown_indexes = np.random.randint(y_train.shape[0], size = y_train.shape[0] - 500)
y_seed.iloc[unknown_indexes] = None

In [5]:
selection_model = SklearnProbaAdapter(KerasClassifier(build_fn = baseline_model, verbose = 0))
active_learn_alg_ctor = make_libact_strategy_ctor(lambda trn_ds:
                                                  UncertaintySampling(trn_ds,
                                                                      model = selection_model))
evaluation_model = KerasClassifier(build_fn = baseline_model, verbose = 0)


def f1_macro(y_t, y_p):
    return f1_score(y_t, y_p, average = 'macro')


active_learner = ActiveLearner(active_learn_alg_ctor = active_learn_alg_ctor,
                               y_dtype = 'int',
                               X_full_dataset = X_train, 
                               y_full_dataset = y_seed.values,
                               X_test_dataset = X_test,
                               y_test_dataset = y_test_orig,
                               model_evaluate = evaluation_model,
                               eval_metrics = [accuracy_score, f1_macro],
                               rnd_start_steps = 0)

X_helper = pd.DataFrame(X_train_orig.reshape(-1, 28*28), index = range(X_train_orig.shape[0]))

In [10]:
try:
    if active_learn_ui:
        active_learn_ui.stop()
except NameError:
    pass
    
active_learn_ui = ActiveLearnerUiWidget(active_learner = active_learner, 
                                        X_helper = X_helper,
                                        display_feature_table = False,
                                        drop_labels = list(range(0, X_helper.shape[1])),
                                        visualizer = ImageVisualizer(columns_range = (0, X_helper.shape[1]), 
                                                                     img_shape = (28, 28),
                                                                     img_mode = 'L',
                                                                     preview_shape = (100, 100)),
                                        y_labels = {str(elem) : elem for elem in range(10)},
                                        save_path = '../data/defterms_dump/results/mnist.npy',
                                        save_time = 120)

active_learn_ui

ValueError: not enough values to unpack (expected 2, got 0)

In [27]:
a = numpy.array([[1, 2],
                 [None, None],
                 [None, 1]])
pd.DataFrame(a).notnull().all(axis=1)

0     True
1    False
2    False
dtype: bool

In [25]:
pd.DataFrame(a).notnull()

Unnamed: 0,0,1
0,True,True
1,False,False
2,False,True


In [10]:
np.apply_along_axis?