In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import numpy as np

In [7]:
def calculate_uncertainty_Entropy(X, model):
    # Get predicted probabilities for each class
    predicted_probs = model.predict_proba(X)    
    # Check for zero probabilities and replace them with a small value
    predicted_probs[predicted_probs == 0] = 1e-10
    # Calculate uncertainty scores using the entropy
    entropy = -np.sum(predicted_probs * np.log2(predicted_probs), axis=1)
    # Handle the case of uniform probabilities
    entropy[np.isnan(entropy)] = 0    
    return entropy

# Function to calculate the uncertainty score for a data point
def calculate_uncertainty(x, models):
    eps=0.00001
    # Get the class probabilities for each model
    class_probabilities = []
    for model in models:
        probabilities = model.predict_proba(x.reshape(1, -1))
        class_probabilities.append(probabilities[0, 1])  # Probability of class 1
    
    # Calculate the lower and upper bound probabilities for class 1
    lower_probability = min(class_probabilities)
    upper_probability = max(class_probabilities)
    
    # Calculate the uncertainty score
    uncertainty_score = -max(lower_probability / (1 - lower_probability+eps), (1 - upper_probability) / (upper_probability+eps))
    
    return uncertainty_score


In [3]:
# Load the Iris dataset
iris = load_iris()
X, y = iris.data[:, :2], iris.target
# Filter the dataset for two classes (class 0 and class 1)
class_indices = np.where((y == 0) | (y == 1))[0]
X = X[class_indices]
y = y[class_indices]

In [5]:
# Split the dataset into labeled data and pool
X_labeled, X_pool, y_labeled, y_pool = train_test_split(X, y, train_size=10, random_state=42)
print(np.c_[X_labeled, y_labeled])

[[6.1 3.  1. ]
 [6.4 2.9 1. ]
 [6.7 3.1 1. ]
 [5.8 2.7 1. ]
 [5.4 3.4 0. ]
 [5.  2.  1. ]
 [6.1 2.8 1. ]
 [5.8 4.  0. ]
 [5.8 2.6 1. ]
 [6.4 3.2 1. ]]


In [8]:
# Initialize the learning models
model1 = RandomForestClassifier(random_state=42)
model2 = RandomForestClassifier(random_state=43)
model3 = RandomForestClassifier(random_state=44)

# Train the learning models with the labeled data
model1.fit(X_labeled, y_labeled)
model2.fit(X_labeled, y_labeled)
model3.fit(X_labeled, y_labeled)

RandomForestClassifier(random_state=44)

In [15]:
# Calculate the uncertainty for each data point in the pool
uncertainty_scores = []
models = [model1, model2, model3]
for x in X_pool:
    uncertainty = calculate_uncertainty(x, models)
    uncertainty_scores.append(uncertainty)

uncertainty_scores=np.array(uncertainty_scores)
uncertainty_entropy=calculate_uncertainty_Entropy(X_pool,model1)
top_indices_entropy = np.argsort(uncertainty_entropy)[-10:]
top_indices_credal = np.argsort(uncertainty_scores)[-10:]

print("Entropy", top_indices_entropy)
print("Credal", top_indices_credal)
#print(np.c_[top_indices_entropy, top_indices_credal])

Entropy [56 52 50 42 33 30 26 22  5 44]
Credal [56 50  9 42 33 30 26 22  5 44]


In [16]:
# Print the uncertainty scores for the data points in the pool
for i, uncertainty in enumerate(uncertainty_scores):
    print(f"Data point {i+1}: Uncertainty score = {uncertainty:.4f}")

Data point 1: Uncertainty score = -98.9011
Data point 2: Uncertainty score = -2.4482
Data point 3: Uncertainty score = -4.2629
Data point 4: Uncertainty score = -3.5453
Data point 5: Uncertainty score = -2.3333
Data point 6: Uncertainty score = -1.5000
Data point 7: Uncertainty score = -1.7777
Data point 8: Uncertainty score = -2.4482
Data point 9: Uncertainty score = -5.6663
Data point 10: Uncertainty score = -1.5000
Data point 11: Uncertainty score = -3.5453
Data point 12: Uncertainty score = -2.5713
Data point 13: Uncertainty score = -98.9011
Data point 14: Uncertainty score = -8.9991
Data point 15: Uncertainty score = -2.4482
Data point 16: Uncertainty score = -1.7777
Data point 17: Uncertainty score = -100000.0000
Data point 18: Uncertainty score = -100000.0000
Data point 19: Uncertainty score = -3.5453
Data point 20: Uncertainty score = -2.7036
Data point 21: Uncertainty score = -11.4986
Data point 22: Uncertainty score = -1.8571
Data point 23: Uncertainty score = -1.5000
Data po