In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import make_classification

# -------------------------
# 1️⃣ Generate Dataset
# -------------------------
X, y = make_classification(n_samples=10000, n_features=20, n_classes=2)
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

# Select 5 positive and 5 negative labeled samples
pos_idx = (y == 1).nonzero(as_tuple=True)[0][:5]
neg_idx = (y == 0).nonzero(as_tuple=True)[0][:5]
labeled_idx = torch.cat((pos_idx, neg_idx))

X_labeled = X[labeled_idx]
y_labeled = y[labeled_idx]

# Unlabeled pool (remaining samples)
unlabeled_idx = torch.tensor([i for i in range(len(X)) if i not in labeled_idx])
X_unlabeled = X[unlabeled_idx]

# -------------------------
# 2️⃣ Define Neural Network
# -------------------------
class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 2)  # Output 2 classes

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

# Initialize model
model = SimpleNN(input_dim=20)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# -------------------------
# 3️⃣ Training Function
# -------------------------
def train_model(X_train, y_train, epochs=10):
    model.train()
    dataset = TensorDataset(X_train, y_train)
    loader = DataLoader(dataset, batch_size=8, shuffle=True)

    for epoch in range(epochs):
        for batch_X, batch_y in loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

# Initial Training
train_model(X_labeled, y_labeled)

# -------------------------
# 4️⃣ Active Learning Loop
# -------------------------
def select_most_uncertain_samples(X_pool, k=10):
    model.eval()
    with torch.no_grad():
        logits = model(X_pool)
        probs = torch.nn.functional.softmax(logits, dim=1)
        uncertainty = 1 - torch.max(probs, dim=1)[0]  # Least confidence score
        uncertain_idx = torch.argsort(uncertainty, descending=True)[:k]
    return uncertain_idx

for i in range(10):  # 10 active learning iterations
    print(f"🔄 Iteration {i+1}")

    # Select most uncertain samples
    query_idx = select_most_uncertain_samples(X_unlabeled, k=5)
    X_query = X_unlabeled[query_idx]

    # Simulate Oracle (real-world: human annotator)
    y_query = y[unlabeled_idx[query_idx]]  # True labels

    # Add new labeled data
    X_labeled = torch.cat((X_labeled, X_query))
    y_labeled = torch.cat((y_labeled, y_query))

    # Remove newly labeled samples from unlabeled pool
    X_unlabeled = torch.cat([X_unlabeled[i].unsqueeze(0) for i in range(len(X_unlabeled)) if i not in query_idx])
    unlabeled_idx = torch.tensor([i for i in range(len(unlabeled_idx)) if i not in query_idx])

    # Retrain Model
    train_model(X_labeled, y_labeled)

print("✅ Active Learning Completed!")


🔄 Iteration 1
🔄 Iteration 2
🔄 Iteration 3
🔄 Iteration 4
🔄 Iteration 5
🔄 Iteration 6
🔄 Iteration 7
🔄 Iteration 8
🔄 Iteration 9
🔄 Iteration 10
✅ Active Learning Completed!
