In [8]:
%%time
%reload_ext autoreload
%autoreload
%autoreload 2
%config Completer.use_jedi = False

import os
import json
import warnings
import pprint
import sys
import numpy as np
import pandas as pd

import torch

MAIN_PATH = os.getcwd().split("notebooks")[0]
sys.path.insert(0, MAIN_PATH)

CPU times: total: 109 ms
Wall time: 110 ms


In [9]:
# Run a Federated Learning experiment
from data_loader.cifar10 import Cifar10DatasetManager
from server.base_server import BaseServer
from client.base_client import BaseClient
from experiments.base_experiment import BaseExperiment
from gradients.noise import GaussianNoiseGenerator, NoNoiseGenerator,StaircaseNoiseGenerator
from metrics.classification import multiclass_accuracy
from models.cifar_model import SimpleCifarCNN, EfficientCifarCNN,ResNet

In [10]:
class DemoCifar10Experiment(BaseExperiment):
    def __init__(self, 
                 client_num: int = 2, 
                 lr: float = 0.01, 
                 noise_generator=None,
                 max_norm: float = 200,
                 sampling_rate: float = 0.05):
        if noise_generator is None:
            noise_generator = NoNoiseGenerator()
        self.noise_generator = noise_generator
        self.lr = lr
        self.max_norm = max_norm
        self.sampling_rate = sampling_rate
        self.client_num = client_num
        self._init_server_clients(client_num, self.lr)
        self._init_data(client_num)

    def _init_server_clients(self, client_num, lr):
        model = ResNet
        self.clients = [BaseClient(model(lr=lr, max_norm=self.max_norm), 
                                   client_id=idx, 
                                   noise_generator=self.noise_generator)
                        for idx in range(client_num)]
        self.server = BaseServer(model(lr=lr, max_norm=self.max_norm))

    def _init_data(self, client_num):
        data_manager = Cifar10DatasetManager(n_parties=client_num, 
                                             sampling_lot_rate=self.sampling_rate)
        self.client_train_datas = data_manager.train_loaders
        self.valid_datas = data_manager.validation_loader
        self.test_data = data_manager.test_loader

    def evaluate_model(self, data):
        total_correct = 0
        total_sample_num = 0
        with torch.no_grad():
            for _, (inputs, target) in enumerate(data):
                predict_labels = self.server.predict(inputs)
                correct, sample_num = multiclass_accuracy(y_pred=predict_labels, 
                                                          y_true=target)
                total_correct += correct
                total_sample_num += sample_num
                
        return total_correct / total_sample_num
        
    def get_validation_result(self):
        return self.evaluate_model(self.valid_datas)
    
    def get_test_result(self):
        return self.evaluate_model(self.test_data)
    
    def aggeragate(self):
        self.server.aggeragate_model(self.clients)
    
    def run(self, epochs: int, client_epochs: int):
        self._init_data(self.client_num)
        for client in self.clients:
            client.set_training_mode(for_gradient=False)

        for epoch in range(epochs):
            print(self.get_validation_result())
            for client, client_train_data in self.shuffled_data(to_shuffle=False):
                client.train(client_train_data, client_epochs=client_epochs)

            self.aggeragate()

            self.distribute_model()


In [11]:
EXPERIMENT = DemoCifar10Experiment(client_num=1,
                                lr = 0.001, 
                                max_norm=1000,
                                sampling_rate=0.05,
                                noise_generator=NoNoiseGenerator())
EXPERIMENT.run(500, 10)

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
0.0978
0.1025
0.1025
0.1025
0.1025
0.1025
0.1025
0.1025
0.1025
0.1025
0.1025
0.1025
0.0984
0.1023
0.1025
0.1025
0.1025
0.1025
0.1025
0.1025
0.0998
0.1001
0.1025
0.0985
0.0998
0.0998
0.0998
0.1065
0.1168
0.0999
0.0998
0.1035
0.0998
0.0998
0.0998
0.0998
0.0998
0.0997


KeyboardInterrupt: 

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms


In [7]:
# Data preprocessing
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# Load dataset
batch_size = 32
# data_manager = Cifar10DatasetManager(n_parties=1, 
#                                              sampling_lot_rate=0.01)
# trainloader = data_manager.train_loaders
# testset = data_manager.validation_loader
# testloader = data_manager.test_loader
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False)


NameError: name 'transforms' is not defined

In [20]:
trainloader

<torch.utils.data.dataloader.DataLoader at 0x1aa898b1640>

In [13]:
trainloader

[<generator object Cifar10DatasetManager.create_sampling_dataloader at 0x000001A9FAC96350>]

In [17]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                         download=True, transform=transform)
trainloader = Cifar10DatasetManager(trainset, batch_size=batch_size,
                                           shuffle=True, num_workers=2)

Files already downloaded and verified


TypeError: __init__() got an unexpected keyword argument 'shuffle'

In [12]:
# Initialize model and optimizer
learning_rate = 0.001
model = ResNet(lr=learning_rate)
model = model.to(model.device)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(model.device), labels.to(model.device)

        model.optimizer.zero_grad()

        outputs = model(inputs)
        loss = model.loss_fn(outputs, labels).mean()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), model.max_norm)
        model.optimizer.step()

        running_loss += loss.item()
        
        if i % 200 == 199:  # Print average loss every 200 mini-batches
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 200:.3f}")
            running_loss = 0.0

# Save the trained model
torch.save(model.state_dict(), "cifar10_resnet.pth")


ValueError: too many values to unpack (expected 2)

In [35]:
# Initialize model and optimizer
learning_rate = 0.001
model = ResNet(lr=learning_rate)
model = model.to(model.device)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(model.device), labels.to(model.device)

        model.optimizer.zero_grad()

        outputs = model(inputs)
        loss = model.loss_fn(outputs, labels).mean()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), model.max_norm)
        model.optimizer.step()

        running_loss += loss.item()
        
        if i % 200 == 199:  # Print average loss every 200 mini-batches
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 200:.3f}")
            running_loss = 0.0

# Save the trained model
torch.save(model.state_dict(), "cifar10_resnet.pth")


[1, 200] loss: 2.264
[1, 400] loss: 1.911
[1, 600] loss: 1.715
[1, 800] loss: 1.600
[1, 1000] loss: 1.479
[1, 1200] loss: 1.384
[1, 1400] loss: 1.312
[2, 200] loss: 1.174
[2, 400] loss: 1.127
[2, 600] loss: 1.084
[2, 800] loss: 1.038
[2, 1000] loss: 1.004
[2, 1200] loss: 0.949
[2, 1400] loss: 0.920
[3, 200] loss: 0.813
[3, 400] loss: 0.823
[3, 600] loss: 0.793
[3, 800] loss: 0.753
[3, 1000] loss: 0.723
[3, 1200] loss: 0.681
[3, 1400] loss: 0.669
[4, 200] loss: 0.594
[4, 400] loss: 0.559
[4, 600] loss: 0.577
[4, 800] loss: 0.548
[4, 1000] loss: 0.546
[4, 1200] loss: 0.544
[4, 1400] loss: 0.535
[5, 200] loss: 0.433
[5, 400] loss: 0.438
[5, 600] loss: 0.426
[5, 800] loss: 0.431
[5, 1000] loss: 0.448
[5, 1200] loss: 0.446
[5, 1400] loss: 0.412
[6, 200] loss: 0.328
[6, 400] loss: 0.316
[6, 600] loss: 0.349
[6, 800] loss: 0.344
[6, 1000] loss: 0.343
[6, 1200] loss: 0.360
[6, 1400] loss: 0.310
[7, 200] loss: 0.228
[7, 400] loss: 0.246
[7, 600] loss: 0.255
[7, 800] loss: 0.256
[7, 1000] loss: 

In [36]:
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(model.device), labels.to(model.device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%")


Accuracy of the network on the 10000 test images: 86.54%
