# Load Data

In [1]:
from sklearn import datasets
iris = datasets.load_iris()
list(iris.keys())

['data',
 'target',
 'frame',
 'target_names',
 'DESCR',
 'feature_names',
 'filename',
 'data_module']

In [2]:
iris["data"][0]

array([5.1, 3.5, 1.4, 0.2])

In [3]:
import numpy as np

X = iris["data"]
y = (iris["target"]).astype(np.int)

In [4]:
X.shape

(150, 4)

In [5]:
print("Number of unique classes:", len(np.unique(y)))

Number of unique classes: 3


In [6]:
from sklearn.utils import shuffle
X, y = shuffle(X, y, random_state=0)

In [7]:
y_true = y.copy()
y_experiment = y_true.copy()

In [8]:
rng = np.random.RandomState(42)
random_unlabeled_points = rng.rand(y_experiment.shape[0]) < 0.4
y_experiment[random_unlabeled_points] = -1

# RFoT

In [9]:
from RFoT import RFoT

model = RFoT(
        bin_scale=1,
        max_dimensions=3,
        component_purity_tol=1.0,
        min_rank=2,
        max_rank=10,
        n_estimators=50,
        bin_entry=True,
        clustering="ms",
        max_depth=2,
        n_jobs=50,
)
y_pred = model.predict(X, y_experiment)

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 48/48 [00:00<00:00, 54.91it/s]
100%|██████████| 48/48 [00:00<00:00, 49.03it/s]


# Look at the results

In [10]:
from sklearn.metrics import f1_score

unknown_indices = np.argwhere(y_experiment == -1).flatten()
did_predict_indices = np.argwhere(y_pred[unknown_indices] != -1).flatten()
abstaining_count = len(np.argwhere(y_pred == -1))
f1 = f1_score(
    y_true[unknown_indices][did_predict_indices],
    y_pred[unknown_indices][did_predict_indices],
    average="weighted",
)

print("------------------------")
print("Num. of Abstaining", abstaining_count)
print("Percent Abstaining", (abstaining_count / len(unknown_indices)) * 100, "%")
print("F1=", f1)

------------------------
Num. of Abstaining 4
Percent Abstaining 5.797101449275362 %
F1= 0.9219398487691172


In [11]:
from sklearn.metrics import classification_report

y_true_hat = y_true[unknown_indices][did_predict_indices]
y_pred_hat = y_pred[unknown_indices][did_predict_indices]
print(classification_report(y_true_hat, y_pred_hat))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        28
           1       1.00      0.74      0.85        19
           2       0.78      1.00      0.88        18

    accuracy                           0.92        65
   macro avg       0.93      0.91      0.91        65
weighted avg       0.94      0.92      0.92        65

