# Load Data

In [1]:
from sklearn import datasets
iris = datasets.load_iris()
list(iris.keys())

['data',
 'target',
 'frame',
 'target_names',
 'DESCR',
 'feature_names',
 'filename']

In [2]:
iris["data"][0]

array([5.1, 3.5, 1.4, 0.2])

In [3]:
import numpy as np

X = iris["data"]
y = (iris["target"] == 2).astype(np.int)  # 1 if Iris virginica, else 0

In [4]:
y_true = y.copy()
y_experiment = y_true.copy()

In [5]:
rng = np.random.RandomState(42)
random_unlabeled_points = rng.rand(y_experiment.shape[0]) < 0.3
y_experiment[random_unlabeled_points] = -1

In [6]:
X.shape

(150, 4)

# RFoT

In [9]:
from RFoT import RFoT

model = RFoT(
        bin_scale=1,
        max_dimensions=3,
        component_purity_tol=0.99,
        min_rank=11,
        max_rank=21,
        n_estimators=50,
        bin_entry=True,
        clustering="ms",
        max_depth=2,
        n_jobs=10,
)
y_pred = model.predict(X, y_experiment)

100%|███████████████████████████████████████████████████████████████████| 50/50 [00:09<00:00,  5.29it/s]
100%|███████████████████████████████████████████████████████████████████| 50/50 [00:09<00:00,  5.47it/s]


# Look at the results

In [10]:
from sklearn.metrics import f1_score

unknown_indices = np.argwhere(y_experiment == -1).flatten()
did_predict_indices = np.argwhere(y_pred[unknown_indices] != -1).flatten()
abstaining_count = len(np.argwhere(y_pred == -1))
f1 = f1_score(
    y_true[unknown_indices][did_predict_indices],
    y_pred[unknown_indices][did_predict_indices],
    average="weighted",
)

print("------------------------")
print("Num. of Abstaining", abstaining_count)
print("Percent Abstaining", (abstaining_count / len(unknown_indices)) * 100, "%")
print("F1=", f1)

------------------------
Num. of Abstaining 3
Percent Abstaining 5.88235294117647 %
F1= 0.9181790683605566


In [11]:
from sklearn.metrics import classification_report

y_true_hat = y_true[unknown_indices][did_predict_indices]
y_pred_hat = y_pred[unknown_indices][did_predict_indices]
print(classification_report(y_true_hat, y_pred_hat))

              precision    recall  f1-score   support

           0       1.00      0.87      0.93        31
           1       0.81      1.00      0.89        17

    accuracy                           0.92        48
   macro avg       0.90      0.94      0.91        48
weighted avg       0.93      0.92      0.92        48

