# Active Learning with Featuristic

This notebook demonstrates how to use Featuristic in an active learning setup, where:

1. We start with a very small labeled training set
2. Train an initial model
3. Use the model to identify the most uncertain examples from the unlabeled pool
4. Request human labeling for those uncertain examples
5. Add the newly labeled examples to the training set
6. Retrain the model and observe the improvement in accuracy

This approach minimizes human labeling effort by focusing it on the most informative examples.

In [None]:
from featuristic import FeaturisticClassifier
from featuristic import PromptFeatureDefinition, PromptFeatureConfiguration, extract_features
from featuristic import Distribution

from sklearn.model_selection import train_test_split
import json
import numpy as np
import os
from typing import List
import pandas as pd

## Setup API Keys

Configure your Azure OpenAI API key and endpoint.

In [None]:
# Set environment variables for Azure OpenAI
AOAI_API_KEY = os.getenv("AOAI_API_KEY")
AOAI_API_ENDPOINT = os.getenv("AOAI_API_ENDPOINT")

if not AOAI_API_KEY or not AOAI_API_ENDPOINT:
    print("Warning: Azure OpenAI credentials not found. Please set the environment variables.")

## Data Loading

Load and prepare our dataset.

In [None]:
def load_data(path) -> List[str]:
    with open(path, "r") as f:
        data = f.readlines()
    return [json.loads(d)["text"].strip().replace("\n\n", "\n") for d in data if "text" in json.loads(d)]

In [None]:
# Load the datasets
russia_ukraine = load_data("data/russia_ukraine_2025.jsonl")
ones = np.ones(len(russia_ukraine))

uk_us_relationship = load_data("data/uk_us_relationship.jsonl")
zeros = np.zeros(len(uk_us_relationship))

X = np.array(russia_ukraine + uk_us_relationship) # cast to array for easier masking
y = np.concatenate([ones, zeros])

## Define Feature Configuration and Features

Define the features we'll use for classification.

In [None]:
# Create feature configuration
config = PromptFeatureConfiguration(
    aoai_api_key=AOAI_API_KEY,
    aoai_api_endpoint=AOAI_API_ENDPOINT
)

# Helper function for text proportion features
def as_propotion_of_text(x, text):
    return x/len(text.split())

In [None]:
# Define features for classification
mention_of_war = PromptFeatureDefinition(
    name="mention_of_war",
    prompt="Whether or not the notion of war is mentioned",
    llm_return_type=bool,
    distribution=Distribution.BERNOULLI,
    config=config
)

mention_of_casualties = PromptFeatureDefinition(
    name="mention_of_casualties",
    prompt="Whether or not the notion of casualities are mentioned",
    llm_return_type=bool,
    distribution=Distribution.BERNOULLI,
    config=config
)

mentions_weapons = PromptFeatureDefinition(
    name="mentions_weapons",
    prompt="Whether or not the notion of weapons are mentioned",
    llm_return_type=bool,
    distribution=Distribution.BERNOULLI,
    config=config
)

mentions_US = PromptFeatureDefinition(
    name="mentions_US",
    prompt="A count of references to the United States",
    llm_return_type=int,
    feature_post_callback=as_propotion_of_text,
    distribution=Distribution.GAUSSIAN,
    config=config
)

mentions_Russia = PromptFeatureDefinition(
    name="mentions_Russia",
    prompt="A count of references to Russians, Russia, or a place in Russia",
    llm_return_type=int,
    feature_post_callback=as_propotion_of_text,
    distribution=Distribution.GAUSSIAN,
    config=config
)

mentions_Ukraine = PromptFeatureDefinition(
    name="mentions_Ukraine",
    prompt="A count of references to Ukrainians, Ukraine, or any place in Ukraine",
    llm_return_type=int,
    feature_post_callback=as_propotion_of_text,
    distribution=Distribution.GAUSSIAN,
    config=config
)

feature_definitions = [
    mention_of_war,
    mention_of_casualties,
    mentions_weapons,
    mentions_US,
    mentions_Russia,
    mentions_Ukraine
]

In [None]:
filepath = "data/features.csv"

if os.path.exists(filepath):
    # Load features from CSV if it exists
    X_features_df = pd.read_csv(filepath)
else:
    X_features_df: pd.DataFrame = await extract_features(
        X.tolist(),
        feature_definitions)
    
    # Save features to CSV for later use
    X_features_df.to_csv(filepath, index=False)

In [None]:
# Split into training/test datasets
idx = np.arange(len(X))
idx_train, idx_test = train_test_split(
    idx, test_size=0.7, random_state=2, shuffle=True, stratify=y
)

data_train = X[idx_train]
X_test = X[idx_test]

y_train = y[idx_train]
y_test = y[idx_test]

features_train_df = X_features_df.iloc[idx_train]
features_test_df = X_features_df.iloc[idx_test]

# Start with just 2 examples from each class
class_0_idx = np.where(y_train == 0)[0][[6, 15]]
class_1_idx = np.where(y_train == 1)[0][[8, 1]]
initial_idx = np.concatenate([class_0_idx, class_1_idx])

is_labelled_mask = np.zeros(len(y_train), dtype=bool)
is_labelled_mask[initial_idx] = True


## Initial Model Training and Evaluation

Train a model on our very small initial training set and evaluate its performance.

In [None]:
# Helper function to evaluate the model
async def evaluate_model(classifier, features, y_true):
    predictions = classifier.predict(features)
    correct = np.sum(predictions == y_true)
    accuracy = correct / len(y_true)
    return accuracy, predictions, features

In [None]:
# See maximum accuracy with all training data

featuristic_classifier = FeaturisticClassifier(
    distributions=[d.distribution for d in feature_definitions]
)

featuristic_classifier.fit(
    features_train_df,
    y_train,
)
accuracy, predictions, features = await evaluate_model(
    featuristic_classifier,
    features_test_df,
    y_test
)
print(f"Accuracy with all training data: {accuracy:.2f}")

In [None]:
featuristic_classifier = FeaturisticClassifier(
    distributions=[d.distribution for d in feature_definitions]
)

# Initial training and evaluation
async def initial_training():    
    print("Training initial classifier...")
    featuristic_classifier.fit(
        features=features_train_df.iloc[is_labelled_mask],
        Y=y_train[is_labelled_mask]
    )
    
    print("Evaluating on test set...")
    test_accuracy, _, _ = await evaluate_model(featuristic_classifier, features_test_df, y_test)
    
    print(f"Initial model accuracy on test set with 2 examples per class: {test_accuracy:.2f}")
    
    return test_accuracy

# Run initial training
initial_accuracy = await initial_training()

# Store accuracy history for tracking progress
accuracy_history = [initial_accuracy]

## Active Learning Loop

Now we'll implement the active learning loop with human-in-the-loop labeling. In each iteration:

1. Extract features for unlabeled examples
2. Rank examples by uncertainty
3. Present the most uncertain example for human labeling
4. Add the newly labeled example to the training set
5. Retrain the model
6. Evaluate the new model

In [None]:
# Active learning iterations with human-in-the-loop labeling
from time import sleep


async def active_learning_iteration(i, 
                                    data_train: List[str],
                                    features_train_df: pd.DataFrame,
                                    y_train,
                                    is_labelled_mask: np.ndarray,
                                    featuristic_classifier: FeaturisticClassifier,
                                    auto_label: bool = False,
                                    ):
    print(f"\n=== Active Learning Iteration {i} ===")
    
    # Rank examples by uncertainty
    uncertainty_ranks = featuristic_classifier.rank_by_uncertainty(features_train_df.iloc[~is_labelled_mask])
    # Get the most uncertain example
    most_uncertain_idx = uncertainty_ranks[0]
    most_uncertain_text = data_train[np.where(~is_labelled_mask)[0][most_uncertain_idx]]
    
    # Display the text for human labeling
    print(f"\nPlease label this example:")
    print(f"\n{most_uncertain_text.strip()}...\n")

    if auto_label:
        # Use ground truth labels for labeling
        print("Auto-labeling enabled. Using model prediction.")
        human_label = int(y_train[np.where(~is_labelled_mask)[0][most_uncertain_idx]])
    else:
        # Get human label through input
        sleep(0.5)
        label_valid = False
        while not label_valid:
            human_input = input(f"Is this about Russia-Ukraine (1) or UK-US relationship (0)? Type 1 or 0 (Expected: {int(y_train[np.where(~is_labelled_mask)[0][most_uncertain_idx]])}): ")
            try:
                human_label = int(human_input)
                if human_label in [0, 1]:
                    label_valid = True
                else:
                    print("Please enter either 0 or 1.")
            except ValueError:
                print("Invalid input. Please enter a number (0 or 1).")
    
    print(f"You labeled this as: {'Russia-Ukraine' if human_label == 1 else 'UK-US Relationship'}")
    
    # Update the label in case the user provided a different one
    y_train[np.where(~is_labelled_mask)[0][most_uncertain_idx]] = human_label
    # Update the mask to include the newly labeled example
    is_labelled_mask[np.where(~is_labelled_mask)[0][most_uncertain_idx]] = True


    # Retrain the classifier with the new data
    print("Retraining classifier...")
    featuristic_classifier = FeaturisticClassifier(
        distributions=[d.distribution for d in feature_definitions]
    )
    featuristic_classifier.fit(features=features_train_df.iloc[is_labelled_mask], Y=y_train[is_labelled_mask])
    
    # Evaluate the model
    acc, _, _ = await evaluate_model(featuristic_classifier, features_test_df, y_test)
    print(f"Model accuracy after iteration {i}: {acc:.2f}")
    
    return is_labelled_mask, y_train, acc, featuristic_classifier

In [None]:
# Perform active learning for a set number of iterations
num_iterations = 50  # Keeping it small for the example
auto_label = True  # Set to True for automatic labeling

# Take copies to enable re-running
is_labelled_mask_temp = is_labelled_mask.copy()
y_train_temp = y_train.copy()


for i in range(1, num_iterations + 1):
    is_labelled_mask_temp, y_train_temp, acc, featuristic_classifier = await active_learning_iteration(
            i, 
            features_train_df=features_train_df, 
            is_labelled_mask=is_labelled_mask_temp,
            data_train=data_train,
            y_train=y_train_temp,
            featuristic_classifier=featuristic_classifier,
            auto_label=auto_label
        )
    
    accuracy_history.append(acc)

In [None]:
# Print final accuracy history
print("\nFinal accuracy history:")
for i, acc in enumerate(accuracy_history):
    print(f"Iteration {i}: {acc:.2f}")

In [None]:
async def random_selection_iteration(i, 
                               data_train: List[str],
                               features_train_df: pd.DataFrame,
                               y_train,
                               is_labelled_mask: np.ndarray,
                               featuristic_classifier: FeaturisticClassifier,
                               auto_label: bool = False):
    print(f"\n=== Active Learning Iteration {i} ===")
    
    # Get a random example from the unlabelled data
    random_idx = np.random.choice(np.where(~is_labelled_mask)[0], 1)[0]
    random_text = data_train[random_idx]
    
    # Display the text for human labeling
    print(f"\nPlease label this example:")
    print(f"\n{random_text.strip()}...\n")

    if auto_label:
        # Use ground truth labels for labeling
        print("Auto-labeling enabled. Using model prediction.")
        human_label = int(y_train[random_idx])
    else:
        # Get human label through input
        sleep(0.5)
        label_valid = False
        while not label_valid:
            human_input = input(f"Is this about Russia-Ukraine (1) or UK-US relationship (0)? Type 1 or 0 (Expected: {int(y_train[random_idx])}): ")
            try:
                human_label = int(human_input)
                if human_label in [0, 1]:
                    label_valid = True
                else:
                    print("Please enter either 0 or 1.")
            except ValueError:
                print("Invalid input. Please enter a number (0 or 1).")
    
    print(f"You labeled this as: {'Russia-Ukraine' if human_label == 1 else 'UK-US Relationship'}")
    
    # Update the label in case the user provided a different one
    y_train[random_idx] = human_label
    # Update the mask to include the newly labeled example
    is_labelled_mask[random_idx] = True


    # Retrain the classifier with the new data
    print("Retraining classifier...")
    featuristic_classifier = FeaturisticClassifier(
        distributions=[d.distribution for d in feature_definitions]
    )
    featuristic_classifier.fit(features=features_train_df.iloc[is_labelled_mask], Y=y_train[is_labelled_mask])
    
    # Evaluate the model
    acc, _, _ = await evaluate_model(featuristic_classifier, features_test_df, y_test)
    print(f"Model accuracy after iteration {i}: {acc:.2f}")
    
    return is_labelled_mask, y_train, acc, featuristic_classifier

In [None]:
initial_accuracy = await initial_training()
accuracy_history = [initial_accuracy]

# Perform active learning for a set number of iterations
num_iterations = 50  # Keeping it small for the example
auto_label = True  # Set to True for automatic labeling

# Take copies to enable re-running
is_labelled_mask_temp = is_labelled_mask.copy()
y_train_temp = y_train.copy()


for i in range(1, num_iterations + 1):
    is_labelled_mask_temp, y_train_temp, acc, featuristic_classifier = await random_selection_iteration(
            i, 
            features_train_df=features_train_df, 
            is_labelled_mask=is_labelled_mask_temp,
            data_train=data_train,
            y_train=y_train_temp,
            featuristic_classifier=featuristic_classifier,
            auto_label=auto_label
        )
    
    accuracy_history.append(acc)

In [None]:
# Print final accuracy history
print("\nFinal accuracy history:")
for i, acc in enumerate(accuracy_history):
    print(f"Iteration {i}: {acc:.2f}")