In [71]:
# Import Global Dependencies
from torchvision import datasets, transforms
import torch.optim as optim
import torch.nn as nn
import torch
import torch.nn.functional as F

# Import Helper Libaries
import matplotlib.pyplot as plt
from torchinfo import summary
import numpy as np
import random
import time

filepath = "C:/Users/aidan_000/Desktop/UNCC/Github/Fed-Learning/data" 

<h2>CIFAR-10 CNN Model Architecture</h2>

<t>*The second experiment focuses on training a Convolutional Neural Network CNN on the CIFAR-10 dataset. The CNN
architecture encompasses two convolutional layers with max pooling, two fully connected layers, and a softmax output
layer*</t>

In [72]:
class CNN(nn.Module):
    def __init__(self, num_classes=10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=5, padding=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, padding=2)
        self.fc1 = nn.Linear(64*8*8, 1024)
        self.fc2 = nn.Linear(1024, num_classes)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(x.size(0), -1)  # Flatten the tensor
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = F.softmax(x, dim=1)
        return x

<h2>Federated Learning Algorithms</h2>

<h3>Global Aggregator with Selected Clients' Quantized Vectors</h3>

In [73]:
def global_aggregate(global_model, client_models):
    global_dict = global_model.state_dict()
    quantized_updates = []
    for i, client_model in enumerate(client_models):
        # Quantize the difference between the client's final local model and the global model
        quantized_update = Q(client_model.state_dict() - global_dict)
        quantized_updates.append(quantized_update)
    
    # Aggregate the quantized updates and update the global model
    for k in global_dict.keys():
        global_dict[k] = global_dict[k] + (1 / len(client_models)) * torch.stack(
            [update[k] for update in quantized_updates], 0
        ).mean(0)
    
    global_model.load_state_dict(global_dict)
    
    # Update the client models with the new global model
    for model in client_models:
        model.load_state_dict(global_model.state_dict())

<h3>Global Model Evaluation</h3>

In [74]:
def model_evaluation(global_model, validation_loader):
    global_model.eval()
    loss = 0
    correct = 0
    with torch.no_grad():
        for inputs, labels in validation_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            output = global_model(inputs)
            loss += nn.CrossEntropyLoss()(output, labels).item()  # Using the criterion
            _, predicted = torch.max(output.data, 1)
            correct += (predicted == labels).sum().item()

    loss /= len(validation_loader.dataset)
    accuracy = correct / len(validation_loader.dataset)

    return loss, accuracy

<h3>Client Update</h3>

In [75]:
def client_update(client, optimizer, training_loader, epochs):
    client.train()
    for epoch in range(epochs):
        for batch_idx, (inputs, labels) in enumerate(training_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            output = client(inputs)
            loss = F.cross_entropy(output, labels)
            loss.backward()
            optimizer.step()
    return loss.item()

<h3>Training using Random Scheduling</h3>

1. Periodic averaging: Let the participating nodes conduct a number of local updates and synchronize through the parameter server periodically. To be more specific, once nodes pull an updated model from the server, they update the model locally by running iterations of the SGD method and then send proper information to the server for updating the aggregate model.

2. Partial node participation: Each round of the training algorithm the parameter server sends its current model X<sub>k</sub> to all the _r_ nodes in subset _S<sub>k</sub>_, which are distributed uniformly at random among the total _n_ nodes

3. Quantized message-passing: each node _i_ ∈ _S<sub>k</sub>_ obtains the model **x**<sup>(_i_)</sup><sub>_k,τ_</sub> after running τ local iterations of an optimization method (SGD) on the most recent model **x**<sub>_k_</sub> that it has receieved from the server. Then each node _i_ applies a quantizer operator _Q(·)_ on the difference between the recieved model and its updated model, i.e., **x**<sup>(_i_)</sup><sub>_k,τ_</sub>  -  **x**<sub>_k_</sub> and then uploads the quantized vector _Q(**x**<sup>(i)</sup><sub>k,τ</sub>  -  **x**<sub>k</sub>)_ to the parameter server. Once these quantized vectors are sent to the server, it decodes the quantized signals and combines them to come up with a new model **x**<sub>_k_+1</sub>

In [76]:
def QLP_i(x, s):
    """Low-precision quantizer for the i-th element of vector x"""
    norm_x = torch.norm(x)
    sign_x_i = torch.sign(x)
    l = random.randint(0, s)
    
    # Generate the random variable ξ_i(x, s)
    prob = abs(x) / (norm_x * s) - l
    xi = (l+1)/s if random.random() < prob else l/s
    
    return norm_x * sign_x_i * xi

In [77]:
def global_aggregate(global_model, client_models, quantization_level):
    global_dict = global_model.state_dict()
    quantized_updates = []
    for i, client_model in enumerate(client_models):
        # Compute the difference between the client's final local model and the global model
        model_diff = {k: client_model.state_dict()[k] - global_dict[k] for k in global_dict.keys()}
        
        # Quantize the difference using the low-precision quantizer
        quantized_update = {k: QLP_i(model_diff[k], quantization_level) for k in model_diff.keys()}
        quantized_updates.append(quantized_update)
    
    # Aggregate the quantized updates and update the global model
    for k in global_dict.keys():
        global_dict[k] = global_dict[k] + (1 / len(client_models)) * torch.stack(
            [update[k] for update in quantized_updates], 0
        ).mean(0)
    
    global_model.load_state_dict(global_dict)
    
    # Update the client models with the new global model
    for model in client_models:
        model.load_state_dict(global_model.state_dict())

In [78]:
def RNG_training(model_type, global_model, lr, total_clients, clients_per_round, total_rounds, local_epochs, training_loaders, validation_loader, quantization_level):
    clients = [model_type().to(device) for _ in range(total_clients)]

    for models in clients:
        models.load_state_dict(global_model.state_dict())
    
    opt = [optim.SGD(models.parameters(), lr=lr) for models in clients]

    average_losses, valid_losses, valid_accuracies = [], [], []

    global_start_time = time.time()
    for round in range(total_rounds):
        start_time = time.time()
        clients_idx = np.random.permutation(total_clients)[:clients_per_round]

        client_losses = 0
        selected_models = []
        
        for i in range(clients_per_round):
            clients[clients_idx[i]].load_state_dict(global_model.state_dict())
            client_losses += client_update(clients[clients_idx[i]], opt[clients_idx[i]], training_loaders[clients_idx[i]], local_epochs)
            selected_models.append(clients[clients_idx[i]])
        
        global_aggregate(global_model, selected_models, quantization_level)
    
        avg_loss = client_losses / clients_per_round
        valid_loss , valid_accuracy = model_evaluation(global_model, validation_loader)

        average_losses.append(avg_loss)
        valid_losses.append(valid_loss)
        valid_accuracies.append(valid_accuracy)
    
        end_time = time.time()
        round_time = end_time - start_time
    
        if (round % 10) == 0:
            rounds_end_time = time.time()
            rounds_time = rounds_end_time - global_start_time
            print('Round {:3d}, Time (secs) {:.2f}: Average loss {:.4f}, Validation Loss {:.4f}, Validation Accuracy {:.4f}'.format(round + 1, rounds_time, avg_loss, valid_loss, valid_accuracy))
    return average_losses, valid_losses, valid_accuracies

<h3>Training using Age-based (AoU)</h3>


In [79]:
def ABS_training(model_type, global_model, lr, total_clients, clients_per_round, total_rounds, local_epochs, training_loaders, validation_loader,):
    clients = [model_type().to(device) for _ in range(total_clients)]
    
    for models in clients:
        models.load_state_dict(global_model.state_dict())
    
    opt = [optim.SGD(models.parameters(), lr=lr) for models in clients]
    clients_age = np.zeros(total_clients)
    
    average_losses, valid_losses, valid_accuracies = [], [], []

    global_start_time = time.time()
    for round in range(total_rounds):
        start_time = time.time()

        # Age-based scheduling: select the clients with the highest age
        old_clients_idx = np.argsort(clients_age)[-clients_per_round:]

        clients_age += 1
        clients_age[old_clients_idx] = 0  # Reset the age of the selected clients

        client_losses = 0      
        selected_models = []
        
        for i in range(clients_per_round):
            clients[old_clients_idx[i]].load_state_dict(global_model.state_dict())
            client_losses += client_update(clients[old_clients_idx[i]], opt[old_clients_idx[i]], training_loaders[old_clients_idx[i]], local_epochs)
            selected_models.append(clients[old_clients_idx[i]])
        

        global_aggregate(global_model, selected_models)
    
        avg_loss = client_losses / clients_per_round
        valid_loss , valid_accuracy = model_evaluation(global_model, validation_loader)

        average_losses.append(avg_loss)
        valid_losses.append(valid_loss)
        valid_accuracies.append(valid_accuracy)
    
        end_time = time.time()
        round_time = end_time - start_time
    
        if (round % 10) == 0:
            rounds_end_time = time.time()
            rounds_time = rounds_end_time - global_start_time
            print('Round {:3d}, Time (secs) {:.2f}: Average loss {:.4f}, Validation Loss {:.4f}, Validation Accuracy {:.4f}'.format(round + 1, rounds_time, avg_loss, valid_loss, valid_accuracy))
    return average_losses, valid_losses, valid_accuracies

<h2>Hyperparameters for Training Experience</h2>

In [80]:
# Device configuration: use CUDA if available, otherwise use CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Learning configuration
lr = 0.15
total_rounds = 100  # Total number of training rounds, Denoted as T

# Client configuration
total_clients = 100  # Total number of clients
clients_per_round = int(0.2*total_clients)  # Number of clients selected per round, Denoted as r

# Local training configuration
local_batchsize = 10  # Batch size for local training
local_epochs = 10  # Denoted as tau (little T)

quantization_level = 1 # Denoted as s

<h2>IID Data Preparation for the CIFAR-10 Dataset</h2>
<t>*The IID data is shuffled and then divided up across 100 clients each receiving 600 examples.*  

*Exclusively use independent and identically distributed i.i.d. distributions for CIFAR-10 due to the absence of a natural
data user partition*</t>

In [81]:
# Load CIFAR-10 dataset
CIFARtransform = transforms.Compose([
    transforms.ToTensor(),  # Convert PIL Image to tensor
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) 
])

dataset = datasets.CIFAR10(filepath, train=True, download=True, transform=CIFARtransform)

CIFAR10_dataset = torch.utils.data.random_split(dataset, [len(dataset) // total_clients for _ in range(total_clients)])
CIFAR10_training = [torch.utils.data.DataLoader(x, batch_size=local_batchsize, shuffle=True) for x in CIFAR10_dataset]

CIFAR10_validation = torch.utils.data.DataLoader(datasets.CIFAR10(filepath, train=False, transform=CIFARtransform), batch_size=local_batchsize, shuffle=True)

Files already downloaded and verified


<h2>CNN Model Training with CIFAR-10 IID</h2>

<t>Training is done using *'Random'* Scheduling</t>

In [82]:
CNN_FedPAQ = CNN().to(device)
# CNN_FedAvg = CNN().to(device)

print("=== Training: Model - FedPAQ, Schedule - Random, Data Distribution - IID CIFAR-10 ===")
FedPAQ_losses, FedPAQ_eval_losses, FedPAQ_accuracies = RNG_training(CNN, CNN_FedPAQ, lr, total_clients, clients_per_round, total_rounds, local_epochs, CIFAR10_training, CIFAR10_validation, quantization_level)

# Save Final Models
torch.save(CNN_FedPAQ.state_dict(), '.\\Models\\CNN_FedPAQ.pth')

=== Training: Model - FedPAQ, Schedule - Random, Data Distribution - IID CIFAR-10 ===


RuntimeError: Boolean value of Tensor with more than one value is ambiguous

<h4>CNN Model Training/Inferencing Experience Comparison for CIFAR-10 IID</h4>

In [None]:
print(f'=================================== Final CNN Model Accuracies per Schedule ====================================')
print(f'Random Scheduled FedPAQ CIFAR10 IID Model Accuracy: {FedPAQ_accuracies[-1]}')
print(f'================================================================================================================')

epochs_range = range(1, total_rounds + 1)

# Plot Global Training Loss
plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, FedPAQ_eval_losses, color='red', linestyle="dashed", label="Random Schedule")
plt.xlabel('Communication Rounds')
plt.ylabel('Train Loss')
plt.legend(loc='upper right')  
plt.title('CNN CIFAR-10 IID Loss Curve')

# Plot Global Validation Accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs_range, FedPAQ_accuracies, color='red', linestyle="dashed", label="Random Schedule")
plt.xlabel('Communication Rounds')
plt.ylabel('Test Accuracy')
plt.legend(loc='lower right') 
plt.title('CNN CIFAR-10 IID Accuracy Curve')

plt.tight_layout()
plt.savefig('.\\Plots\\cifar10_iid_FedPAQ_results.png')
plt.show()