In [11]:
# %pip install -U scikit-activeml streamlit ipython -q

In [12]:
import warnings

import gradio

# import gradio
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
from skactiveml.classifier import SklearnClassifier
from skactiveml.pool import QueryByCommittee, RandomSampling, UncertaintySampling
from skactiveml.utils import MISSING_LABEL, labeled_indices, unlabeled_indices
from skactiveml.visualization import plot_decision_boundary, plot_utilities
from sklearn.datasets import load_digits, load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

warnings.filterwarnings("ignore")

In [13]:
# Load datasets
iris = load_iris()
digits = load_digits()

In [15]:
def evaluate_active_learning(dataset, dataset_name, method):
    X, y_true = dataset
    print(f"X shape: {X.shape}, y_true shape: {y_true.shape}")
    print(f"Evaluating dataset: {dataset_name}")

    clf = SklearnClassifier(RandomForestClassifier(), classes=np.unique(y_true))
    qs = None

    if method == 0:
        qs = RandomSampling(random_state=42)
    elif method == 1:
        qs = QueryByCommittee(random_state=42, method="KL_divergence")
    elif method == 2:
        qs = UncertaintySampling(method="least_confident", random_state=42)
    elif method == 3:
        qs = UncertaintySampling(method="margin_sampling", random_state=42)
    elif method == 4:
        qs = UncertaintySampling(method="entropy", random_state=42)

    n_cycles = 100
    accuracies = []

    y = np.full(shape=y_true.shape, fill_value=MISSING_LABEL)
    clf.fit(X, y)
    for _ in range(n_cycles):
        if method == 0:
            query_idx = qs.query(X=X, y=y, batch_size=1)
        elif method == 1:
            query_idx = qs.query(X=X, y=y, ensemble=clf, batch_size=1)
        else:
            query_idx = qs.query(X=X, y=y, clf=clf, batch_size=1)

        y[query_idx] = y_true[query_idx]
        clf.fit(X, y)

        accuracy = clf.score(X, y_true)
        accuracies.append(accuracy)
    return accuracies, n_cycles

In [17]:
def process(method, dataset_name):
    dataset = {"Iris": [iris.data, iris.target], "Digits": [digits.data, digits.target]}
    if method == "Random Sampling":
        method = 0
    elif method == "Query By Committee":
        method = 1
    elif method == "Uncertainty Sampling with Least Confident":
        method = 2
    elif method == "Uncertainty Sampling with Margin Sampling":
        method = 3
    elif method == "Uncertainty Sampling with Entropy":
        method = 4
    accuracies, n = evaluate_active_learning(
        dataset[dataset_name], dataset_name, method
    )
    df = pd.DataFrame({"x": range(1, n + 1), "y": accuracies})
    return df

In [23]:
with gradio.Blocks() as demo:
    with gradio.Row():
        with gradio.Column():
            method = gradio.Radio(
                [
                    "Random Sampling",
                    "Query By Committee",
                    "Uncertainty Sampling with Least Confident",
                    "Uncertainty Sampling with Margin Sampling",
                    "Uncertainty Sampling with Entropy",
                ],
                label="Select Active Learning Method",
            )
            dataset = gradio.Radio(["Iris", "Digits"], label="Select Dataset")
            with gradio.Row():
                with gradio.Column():
                    output = gradio.LinePlot(
                        x="x",
                        y="y",
                        overlay_point=True,
                        tooltip=["x", "y"],
                        y_title="Accuracy",
                        x_title="Iterations",
                        height=500,
                        width=500,
                    )
                    submit = gradio.Button()
            with gradio.Row():
                with gradio.Column():
                    output2 = gradio.LinePlot(
                        x="x",
                        y="y",
                        overlay_point=True,
                        tooltip=["x", "y"],
                        y_title="Accuracy",
                        x_title="Iterations",
                        height=500,
                        width=500,
                    )
                    submit2 = gradio.Button()

    submit.click(process, [method, dataset], output)
    submit2.click(process, [method, dataset], output2)
demo.launch(share=True)

Running on local URL:  http://127.0.0.1:7868
Running on public URL: https://f96d1ec4aa3c1692b3.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




X shape: (150, 4), y_true shape: (150,)
Evaluating dataset: Iris
X shape: (150, 4), y_true shape: (150,)
Evaluating dataset: Iris
Accuracies for dataset: Iris

Method: Random Sampling, Accuracy: 0.9867
Accuracies for dataset: Iris

Method: Query by Committee, Accuracy: 1.0000
