## Data Preparation
We'll start by importing the necessary libraries and preparing the CIFAR-10 dataset.

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, roc_auc_score
from torch.utils.data import DataLoader, Subset
import numpy as np

In [2]:
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Transformations for the CIFAR-10 dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

In [3]:
# Download CIFAR-10 dataset
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

print(f'Original training dataset size: {len(train_dataset)}')

Files already downloaded and verified
Files already downloaded and verified
Original training dataset size: 50000


In [4]:
# Create a very small subset of the CIFAR-10 dataset
subset_indices = np.random.choice(len(train_dataset), 200, replace=False)
small_train_dataset = Subset(train_dataset, subset_indices)

print(f'Smaller training dataset size: {len(small_train_dataset)}')

Smaller training dataset size: 200


In [5]:
# Function to split dataset into shards and slices
def split_dataset(dataset, num_shards, num_slices):
    shard_size = len(dataset) // num_shards
    shards = [Subset(dataset, range(i * shard_size, (i + 1) * shard_size)) for i in range(num_shards)]
    slices = []
    for i, shard in enumerate(shards):
        print(f'Shard {i+1} size: {len(shard)}')
        slice_size = len(shard) // num_slices
        shard_slices = [Subset(shard, range(j * slice_size, (j + 1) * slice_size)) for j in range(num_slices)]
        for k, shard_slice in enumerate(shard_slices):
            print(f'Shard {i+1}, Slice {k+1} size: {len(shard_slice)}')
        slices.append(shard_slices)
    return slices

In [6]:
# Example split for small dataset
num_shards = 2
num_slices = 2
slices = split_dataset(small_train_dataset, num_shards, num_slices)

Shard 1 size: 100
Shard 1, Slice 1 size: 50
Shard 1, Slice 2 size: 50
Shard 2 size: 100
Shard 2, Slice 1 size: 50
Shard 2, Slice 2 size: 50


## Model Training
Next, we'll define a function to train the model using the slices from each shard.

In [7]:
def train_model_on_slices(slices, epochs_per_slice):
    trained_models = []
    criterion = nn.CrossEntropyLoss()

    for shard in slices:
        model = models.resnet18(pretrained=True)
        # Modify ResNet-18 to add fully connected layers
        num_features = model.fc.in_features
        model.fc = nn.Sequential(
            nn.Linear(num_features, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 10)  # CIFAR-10 has 10 classes
        )
        model = model.to(device)
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        for slice in shard:
            dataloader = DataLoader(slice, batch_size=64, shuffle=True)
            model.train()
            for epoch in range(epochs_per_slice):
                for images, labels in dataloader:
                    images, labels = images.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
        trained_models.append(model)
    return trained_models


## Model Aggregation
We'll aggregate the trained models by averaging their predictions.

In [8]:
def aggregate_models(trained_models, dataloader):
    all_predictions = []
    all_labels = []
    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            outputs = [model(images).cpu() for model in trained_models]
            avg_output = sum(outputs) / len(outputs)
            _, predictions = torch.max(avg_output, 1)
            all_predictions.extend(predictions.numpy())
            all_labels.extend(labels.numpy())
    return all_predictions, all_labels


## Evaluation

In [None]:
def evaluate_model(predictions, labels):
    f1 = f1_score(labels, predictions, average='macro')
    accuracy = accuracy_score(labels, predictions)
    precision = precision_score(labels, predictions, average='macro')
    recall = recall_score(labels, predictions, average='macro')
    # For AUROC, we need probabilities
    # Here we consider AUROC for each class
    auroc = roc_auc_score(labels, np.array(predictions), multi_class='ovr')
    return f1, accuracy, precision, recall, auroc

# Training and evaluation process for small dataset
epochs_per_slice = 1  # Number of epochs to train per slice for quick testing

slices = split_dataset(small_train_dataset, num_shards, num_slices)
print(1)
trained_models = train_model_on_slices(slices, epochs_per_slice)
print(2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
print(3)
predictions, labels = aggregate_models(trained_models, test_loader)
print(4)
metrics = evaluate_model(predictions, labels)

# Display results
print(f'Configuration (S={num_shards}, R={num_slices}):')
print(f'F1 Score: {metrics[0]}')
print(f'Accuracy: {metrics[1]}')
print(f'Precision: {metrics[2]}')
print(f'Recall: {metrics[3]}')
print(f'AUROC: {metrics[4]}')
print('----------------------------')


Shard 1 size: 100
Shard 1, Slice 1 size: 50
Shard 1, Slice 2 size: 50
Shard 2 size: 100
Shard 2, Slice 1 size: 50
Shard 2, Slice 2 size: 50


