QueryRandom strategy didn't work. #1

evanzhu2013 · 2019-02-15T02:12:07Z

Thanks for your work. The most comprehensive AL package I've seen.

Issue:

from sklearn.datasets import load_iris,load_breast_cancer
from alipy.experiment.al_experiment import AlExperiment

import warnings
warnings.filterwarnings('ignore')

import copy
from sklearn.datasets import make_classification
from alipy import ToolBox
from alipy.query_strategy.query_labels import QueryInstanceGraphDensity, QueryInstanceQBC, \
    QueryInstanceQUIRE, QueryRandom, QueryInstanceUncertainty, QureyExpectedErrorReduction, QueryInstanceLAL

X, y = make_classification(n_samples=500, n_features=20, n_informative=2, n_redundant=2,
    n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0,
    hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)

alibox = ToolBox(X=X, y=y, query_type='AllLabels', saving_path='.')

# Split data
alibox.split_AL(test_ratio=0.3, initial_label_rate=0.1, split_count=10)

# Use the default Logistic Regression classifier
model = alibox.get_default_model()

# The cost budget is 50 times querying
stopping_criterion = alibox.get_stopping_criterion('num_of_queries', 50)


def main_loop(alibox, strategy, round):
    # Get the data split of one fold experiment
    train_idx, test_idx, label_ind, unlab_ind = alibox.get_split(round)
    # Get intermediate results saver for one fold experiment
    saver = alibox.get_stateio(round)
    while not stopping_criterion.is_stop():
        # Select a subset of Uind according to the query strategy
        # Passing model=None to use the default model for evaluating the committees' disagreement
        select_ind = strategy.select(label_ind, unlab_ind, batch_size=1)
        label_ind.update(select_ind)
        unlab_ind.difference_update(select_ind)

        # Update model and calc performance according to the model you are using
        model.fit(X=X[label_ind.index, :], y=y[label_ind.index])
        pred = model.predict(X[test_idx, :])
        accuracy = alibox.calc_performance_metric(y_true=y[test_idx],
                                                  y_pred=pred,
                                                  performance_metric='accuracy_score')

        # Save intermediate results to file
        st = alibox.State(select_index=select_ind, performance=accuracy)
        saver.add_state(st)

        # Passing the current progress to stopping criterion object
        stopping_criterion.update_information(saver)
    # Reset the progress in stopping criterion object
    stopping_criterion.reset()
    return saver

unc_result = []
qbc_result = []
random_result = []

for round in range(5):
    train_idx, test_idx, label_ind, unlab_ind = alibox.get_split(round)

    # Use pre-defined strategy
    unc = QueryInstanceUncertainty(X, y)
    qbc = QueryInstanceQBC(X, y)
    rnd = QueryRandom(X,y)

    unc_result.append(copy.deepcopy(main_loop(alibox, unc, round)))
    qbc_result.append(copy.deepcopy(main_loop(alibox, qbc, round)))
    random_result.append(copy.deepcopy(main_loop(alibox, rnd, round)))

analyser = alibox.get_experiment_analyser(x_axis='num_of_queries')

analyser.add_method(method_name='QBC', method_results=qbc_result)
analyser.add_method(method_name='Unc', method_results=unc_result)
analyser.add_method(method_name='RANDOM', method_results=random_result)

print(analyser)
analyser.plot_learning_curves(title='Example of alipy', std_area=False)

Error is below:

| round | initially labeled data | number of queries | cost | Performance: |
|   0   |   35 (10.00% of all)   |         50        |  0   | 0.846 ± 0.02 |
| round | initially labeled data | number of queries | cost | Performance: |
|   0   |   35 (10.00% of all)   |         50        |  0   | 0.841 ± 0.01 |
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-15-5d0ada1f815d> in <module>
     70     unc_result.append(copy.deepcopy(main_loop(alibox, unc, round)))
     71     qbc_result.append(copy.deepcopy(main_loop(alibox, qbc, round)))
---> 72     random_result.append(copy.deepcopy(main_loop(alibox, rnd, round)))
     73 
     74 analyser = alibox.get_experiment_analyser(x_axis='num_of_queries')

<ipython-input-15-5d0ada1f815d> in main_loop(alibox, strategy, round)
     35         # Select a subset of Uind according to the query strategy
     36         # Passing model=None to use the default model for evaluating the committees' disagreement
---> 37         select_ind = strategy.select(label_ind, unlab_ind, batch_size=1)
     38         label_ind.update(select_ind)
     39         unlab_ind.difference_update(select_ind)

TypeError: select() got multiple values for argument 'batch_size'

The text was updated successfully, but these errors were encountered:

Lggggggx · 2019-02-15T02:39:26Z

Sorry about that.
The QueryRandom API has not been unified yet. The format of the QueryRandom.select looks like this select(self, unlabel_index, batch_size=1). We will fix this in the next version.
If you want to use the QueryRandom, you should write a separate process for this.
Here is an example for your reference.

random = QueryRandom(X, y)
random_result = []

for round in range(5):
    # Get the data split of one fold experiment
    train_idx, test_idx, label_ind, unlab_ind = alibox.get_split(round)
    # Get intermediate results saver for one fold experiment
    saver = alibox.get_stateio(round)
    # calc the initial point
    model.fit(X=X[label_ind.index, :], y=y[label_ind.index])
    pred = model.predict(X[test_idx, :])
    accuracy = sum(pred == y[test_idx]) / len(test_idx)
    saver.set_initial_point(accuracy)

    while not stopping_criterion.is_stop():
        # Select a subset of Uind according to the query strategy
        # Passing model=None to use the default model for evaluating the committees' disagreement
        select_ind = random.select(unlab_ind, batch_size=1)
        label_ind.update(select_ind)
        unlab_ind.difference_update(select_ind)

        # Update model and calc performance according to the model you are using
        model.fit(X=X[label_ind.index, :], y=y[label_ind.index])
        pred = model.predict(X[test_idx, :])
        accuracy = alibox.calc_performance_metric(y_true=y[test_idx],
                                                y_pred=pred,
                                                performance_metric='accuracy_score')

        # Save intermediate results to file
        st = alibox.State(select_index=select_ind, performance=accuracy)
        saver.add_state(st)
        saver.save()

        # Passing the current progress to stopping criterion object
        stopping_criterion.update_information(saver)
    # Reset the progress in stopping criterion object
    stopping_criterion.reset()
    random_result.append(copy.deepcopy(saver))

evanzhu2013 · 2019-02-15T02:49:57Z

Thanks for your quick answer. It works. I suggest putting this code to example folder.

Lggggggx · 2019-02-15T02:54:17Z

Thank you for your advice. It's a good idea.

evanzhu2013 closed this as completed Feb 15, 2019

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

QueryRandom strategy didn't work. #1

QueryRandom strategy didn't work. #1

evanzhu2013 commented Feb 15, 2019

Lggggggx commented Feb 15, 2019

evanzhu2013 commented Feb 15, 2019

Lggggggx commented Feb 15, 2019

QueryRandom strategy didn't work. #1

QueryRandom strategy didn't work. #1

Comments

evanzhu2013 commented Feb 15, 2019

Error is below:

Lggggggx commented Feb 15, 2019

evanzhu2013 commented Feb 15, 2019

Lggggggx commented Feb 15, 2019