In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from modAL.models import ActiveLearner
from modAL.uncertainty import uncertainty_sampling

ModuleNotFoundError: No module named 'modAL'

In [None]:
# 1. Generate synthetic data
X, _ = make_classification(n_samples=10000, n_features=20, n_classes=2)

# Initial labeled data (5 positive, 5 negative)
initial_idx = np.random.choice(range(10000), size=10, replace=False)
X_labeled = X[initial_idx]
y_labeled = np.array([1]*5 + [0]*5)  # 5 positive, 5 negative

# Unlabeled pool
X_pool = np.delete(X, initial_idx, axis=0)

# 2. Initialize Active Learner
learner = ActiveLearner(
    estimator=SVC(probability=True),
    query_strategy=uncertainty_sampling,
    X_training=X_labeled,
    y_training=y_labeled
)

# 3. Active Learning Loop
for i in range(20):  # Query 20 times
    # Select the most uncertain sample
    query_idx, query_inst = learner.query(X_pool)
    
    # Simulate oracle labeling
    true_label = np.random.randint(0, 2)  # Replace with real labeling
    
    # Teach the model
    learner.teach(X_pool[query_idx].reshape(1, -1), [true_label])
    
    # Remove the labeled sample from the pool
    X_pool = np.delete(X_pool, query_idx, axis=0)

# 4. Evaluate the model
print("Final model accuracy:", learner.score(X, np.random.randint(0, 2, size=10000)))
