In [20]:
import numpy as np

from skactiveml.base import SingleAnnotatorPoolQueryStrategy, SkactivemlClassifier
from skactiveml.utils import (
    MISSING_LABEL,
    check_type,
    check_equal_missing_label,
    unlabeled_indices
)

from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification
from sklearn.metrics import pairwise_distances_argmin_min, pairwise_distances

from skactiveml.classifier import SklearnClassifier

In [21]:
class Badge(SingleAnnotatorPoolQueryStrategy):
    def __init__(
            self,
            missing_label=MISSING_LABEL,
            random_state=None
    ):
        super().__init__(
            missing_label=missing_label, random_state=random_state
        )

    def query(
        self,
        X,
        y,
        clf,
        candidates=None,
        batch_size=1,
        return_utilities=False,
    ):
        # Validate input parameters
        X, y, candidates, batch_size, return_utilities = self._validate_data(
            X, y, candidates, batch_size, return_utilities, reset=True
        )

        X_cand, mapping = self._transform_candidates(candidates, X, y)

        # Validate classifier type
        check_type(clf, "clf", SkactivemlClassifier)
        check_equal_missing_label(clf.missing_label, self.missing_label_)

        if candidates is None:
            X_unlabeled = X_cand
            unlabeled_mapping = mapping
        elif mapping is not None:
            unlabeled_mapping = unlabeled_indices(y[mapping], missing_label=self.missing_label)
            X_unlabeled = X_cand[unlabeled_mapping]
            unlabeled_mapping = mapping[unlabeled_mapping]
        else:
            unlabeled_mapping = X_cand
            unlabeled_mapping = np.arange(len(X_cand))

        print(X_unlabeled)
        print(unlabeled_mapping)
        

In [22]:
X, y_true = make_classification(n_features=2, n_redundant=0, random_state=0)

X.shape[0]

100

In [23]:
y = np.full(shape=y_true.shape, fill_value=MISSING_LABEL)

In [24]:
clf = SklearnClassifier(LogisticRegression(), classes=np.unique(y_true))
print(np.unique(y_true))

[0 1]


In [25]:
qs = Badge()

In [26]:
qs.query(X[:5], y[:5], clf, candidates=None, batch_size=1)

[[-0.76605469  0.18332468]
 [-0.92038325 -0.0723168 ]
 [-0.98658509 -0.28692   ]
 [ 1.70910242 -1.10453952]
 [ 1.9876467   1.77624479]]
[0 1 2 3 4]


In [30]:
y[1] = y_true[1]
qs.query(X[:5], y[:5], clf, candidates=None, batch_size=1)
qs.query(X[:5], y[:5], clf, candidates=[1,2,3,4], batch_size=1)

[[-0.76605469  0.18332468]
 [-0.98658509 -0.28692   ]
 [ 1.70910242 -1.10453952]
 [ 1.9876467   1.77624479]]
[0 2 3 4]
[[-0.98658509 -0.28692   ]
 [ 1.70910242 -1.10453952]
 [ 1.9876467   1.77624479]]
[2 3 4]


In [27]:
argmin, min_distance = pairwise_distances_argmin_min(X=X[:5], Y=X[6:8])
print(argmin.shape)
print(min_distance)
#min_distance = np.reshape(min_distance, (-1,1))
#print(min_distance)
d2 = np.square(min_distance)
print(d2)
d2_sum = np.sum(d2)
print(d2_sum)
distance = pairwise_distances(X=X[:5], Y=X[6:8])
print(distance)
probas = d2/d2_sum
print(probas)


(5,)
[0.70612023 0.40751186 0.19634942 2.91822117 3.81380748]
[ 0.49860577  0.16606592  0.03855309  8.51601479 14.54512747]
23.764367048350934
[[0.70612023 2.76409189]
 [0.40751186 2.49464132]
 [0.19634942 2.27637396]
 [2.91822117 3.16462351]
 [3.81380748 5.32399199]]
[0.02098124 0.00698802 0.00162231 0.35835227 0.61205617]
