# install torch and torchvision

In [None]:
!pip install torch torchvision matplotlib




# install dataset CIFAR-100

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

torch.backends.cudnn.benchmark = True

# data preprocessing
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# install CIFAR-100
train_dataset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform)

# Create a data loader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

print(f"Train dataset size: {len(train_dataset)}")
print(f"Test dataset size: {len(test_dataset)}")


Downloading https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz to ./data/cifar-100-python.tar.gz


100%|██████████| 169M/169M [00:03<00:00, 47.2MB/s]


Extracting ./data/cifar-100-python.tar.gz to ./data
Files already downloaded and verified
Train dataset size: 50000
Test dataset size: 10000


# Centralized baseline

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics import classification_report

batch_size = 64  # Batch size for training

# Define the LeNet-5 model
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        # Define the convolutional layers
        self.conv1 = nn.Conv2d(3, 6, 5)  # Input: 3 channels (RGB), Output: 6 channels, Kernel size: 5
        self.conv2 = nn.Conv2d(6, 16, 5)  # Input: 6 channels, Output: 16 channels, Kernel size: 5
        # Define the fully connected layers
        self.fc1 = nn.Linear(16 * 5 * 5, 120)  # Flattened input size: 16 * 5 * 5
        self.fc2 = nn.Linear(120, 84)  # 120 input units, 84 output units
        self.fc3 = nn.Linear(84, 100)  # 84 input units, 100 output units (final classes)

    def forward(self, x):
        x = F.relu(self.conv1(x))  # Apply ReLU activation on conv1 output
        x = F.max_pool2d(x, 2)  # Max pooling layer with 2x2 kernel
        x = F.relu(self.conv2(x))  # Apply ReLU activation on conv2 output
        x = F.max_pool2d(x, 2)  # Max pooling layer with 2x2 kernel
        x = torch.flatten(x, 1)  # Flatten the tensor for the fully connected layers
        x = F.relu(self.fc1(x))  # Apply ReLU activation on fc1 output
        x = F.relu(self.fc2(x))  # Apply ReLU activation on fc2 output
        x = self.fc3(x)  # Final output layer (no activation here, raw scores)
        return x


# Device configuration (GPU if available, else CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")

# Local SGD Simulation function
def local_sgd_simulation(model, train_loader, num_workers=4, local_steps=5, epochs=2):
    # Move the model to the configured device
    model_global = model.to(device)
    criterion = nn.CrossEntropyLoss()  # Loss function for classification

    for epoch in range(epochs):
        epoch_loss = 0

        # Split the dataset into partitions for each worker
        partition_size = len(train_loader.dataset) // num_workers
        data_partitions = torch.utils.data.random_split(train_loader.dataset, [partition_size] * num_workers)

        for worker_id, partition in enumerate(data_partitions):
            print(f"Worker {worker_id + 1}/{num_workers} processing...")

            # Create a local model for each worker, initialized with global model parameters
            model_local = LeNet5().to(device)
            model_local.load_state_dict(model_global.state_dict())  # Load the global model into the local model
            optimizer = optim.SGD(model_local.parameters(), lr=0.01, momentum=0.9)  # Optimizer setup (SGD)
            scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)  # Learning rate scheduler

            # Create data loader for the local partition
            local_loader = torch.utils.data.DataLoader(partition, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=4)
            model_local.train()  # Set the model to training mode

            # Training loop for each worker
            for _ in range(local_steps):
                for inputs, labels in local_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()  # Zero the gradients

                    # Forward pass
                    outputs = model_local(inputs)  # Get the model output
                    loss = criterion(outputs, labels)  # Calculate the loss

                    # Backward pass
                    loss.backward()  # Compute gradients
                    optimizer.step()  # Update model parameters

                    epoch_loss += loss.item()  # Accumulate loss for this epoch

                scheduler.step()  # Step the learning rate scheduler

            # Synchronize local model weights with the global model (average weights)
            with torch.no_grad():
                for param_global, param_local in zip(model_global.parameters(), model_local.parameters()):
                    param_global.data += (param_local.data - param_global.data) / num_workers


        print('Loss/train', loss.item(), epoch)
        print(f"Epoch {epoch + 1}/{epochs} completed.")

    return model_global  # Return the globally trained model


# Training and testing
model = LeNet5()  # Initialize the model
trained_model = local_sgd_simulation(model, train_loader, 4, 3, 2)  # Train the model using local SGD simulation

# Testing the trained model
trained_model.eval()  # Set the model to evaluation mode
correct = 0
total = 0
all_labels = []
all_preds = []

# Evaluate on test set
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = trained_model(inputs)  # Get predictions from the model
        _, predicted = outputs.max(1)  # Get the predicted class index
        all_labels.extend(labels.cpu().numpy())  # Collect true labels
        all_preds.extend(predicted.cpu().numpy())  # Collect predicted labels
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

# Print test accuracy
print(f"Test Accuracy: {100. * correct / total:.2f}%")



Device: cuda
Worker 1/4 processing...




Worker 2/4 processing...
Worker 3/4 processing...
Worker 4/4 processing...
Loss/train 3.08390736579895 0
Epoch 1/2 completed.
Worker 1/4 processing...
Worker 2/4 processing...
Worker 3/4 processing...
Worker 4/4 processing...
Loss/train 3.319532871246338 1
Epoch 2/2 completed.
Test Accuracy: 18.62%
