In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
import numpy as np


class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)  # 10 classes for Fashion-MNIST

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

def prepare_data():
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    train_dataset = datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)
    test_dataset = datasets.FashionMNIST(root='./data', train=False, transform=transform, download=True)
    return train_dataset, test_dataset


def split_dataset(dataset, initial_labeled_size):
    indices = list(range(len(dataset)))
    labeled_indices = np.random.choice(indices, size=initial_labeled_size, replace=False)
    unlabeled_indices = [i for i in indices if i not in labeled_indices]
    return Subset(dataset, labeled_indices), Subset(dataset, unlabeled_indices)

def train_model(model, data_loader, optimizer, criterion, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss / len(data_loader):.4f}")


def evaluate_model(model, data_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


    train_dataset, test_dataset = prepare_data()
    initial_labeled_size = 5000  # Choose an appropriate initial labeled size
    labeled_set, unlabeled_set = split_dataset(train_dataset, initial_labeled_size)

    labeled_loader = DataLoader(labeled_set, batch_size=64, shuffle=True)
    unlabeled_loader = DataLoader(unlabeled_set, batch_size=64, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

    model = CNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    print("Training on initial labeled dataset...")
    train_model(model, labeled_loader, optimizer, criterion, epochs=10)

    accuracy = evaluate_model(model, test_loader)
    print(f"Initial Test Accuracy: {accuracy:.2f}%")

    num_iterations = 5
    num_samples = 1000

    for iteration in range(num_iterations):
        print(f"\nActive Learning Iteration {iteration + 1}")
        
        train_model(model, labeled_loader, optimizer, criterion, epochs=5)
        
        accuracy = evaluate_model(model, test_loader)
        print(f"Iteration {iteration + 1} Test Accuracy: {accuracy:.2f}%")


Training on initial labeled dataset...
Epoch 1/10, Loss: 0.9059
Epoch 2/10, Loss: 0.5100
Epoch 3/10, Loss: 0.4330
Epoch 4/10, Loss: 0.3943
Epoch 5/10, Loss: 0.3459
Epoch 6/10, Loss: 0.3124
Epoch 7/10, Loss: 0.2916
Epoch 8/10, Loss: 0.2711
Epoch 9/10, Loss: 0.2371
Epoch 10/10, Loss: 0.1944
Initial Test Accuracy: 86.06%

Active Learning Iteration 1
Epoch 1/5, Loss: 0.1763
Epoch 2/5, Loss: 0.1627
Epoch 3/5, Loss: 0.1394
Epoch 4/5, Loss: 0.1268
Epoch 5/5, Loss: 0.1108
Iteration 1 Test Accuracy: 86.18%

Active Learning Iteration 2
Epoch 1/5, Loss: 0.0901
Epoch 2/5, Loss: 0.0771
Epoch 3/5, Loss: 0.0797
Epoch 4/5, Loss: 0.0487
Epoch 5/5, Loss: 0.0626
Iteration 2 Test Accuracy: 87.49%

Active Learning Iteration 3
Epoch 1/5, Loss: 0.0313
Epoch 2/5, Loss: 0.0436
Epoch 3/5, Loss: 0.0287
Epoch 4/5, Loss: 0.0254
Epoch 5/5, Loss: 0.0125
Iteration 3 Test Accuracy: 87.31%

Active Learning Iteration 4
Epoch 1/5, Loss: 0.0122
Epoch 2/5, Loss: 0.0153
Epoch 3/5, Loss: 0.0082
Epoch 4/5, Loss: 0.0073
Epoch 