# Deploy comunication compression scheme in FedLab

This tutorial provides comprehensive examples about implementing a communication efficiency scheme in FedLab. 

We take the baseline gradient compression algorithms as examples (top-k for gradient sparsification and QSGD for gradient quantization).

## Compress example

In [1]:
import sys
sys.path.append("../")

from fedlab.contrib.compressor.quantization import QSGDCompressor
from fedlab.contrib.compressor.topk import TopkCompressor
import torch

tpk_compressor = TopkCompressor(compress_ratio=0.05) # top 5% gradient
qsgd_compressor = QSGDCompressor(n_bit=8)


In [2]:
# top-k
tensor = torch.randn(size=(100,))
shape = tensor.shape
print("To be compressed tensor:", tensor)

# compress
values, indices = tpk_compressor.compress(tensor)
print("Compressed results top-k values:",values)
print("Compressed results top-k indices:", indices)

# decompress
decompressed = tpk_compressor.decompress(values, indices, shape)
print("Decompressed results:", decompressed)

To be compressed tensor: tensor([-0.0117, -0.2613,  0.9804, -1.1710,  0.8875, -0.3904, -0.2542, -1.9668,
        -0.9684, -0.2456, -0.6323, -0.4404,  0.5047,  0.9951, -0.0873, -0.1383,
         0.1960, -1.0819, -0.3392, -1.6281,  0.7332, -0.1525,  0.2921,  0.1089,
        -1.9116,  0.0485,  0.8300,  1.2147,  0.6108,  0.4922,  0.1932,  0.7526,
        -0.8702, -0.0484, -1.8132,  1.1468,  0.9768,  0.5359, -1.1338,  0.2096,
        -0.8231, -1.3328, -0.8701, -0.4968,  2.0271,  0.8540, -0.2519,  0.9694,
         0.6756,  0.0466, -1.5590,  0.1060,  1.0665, -0.0566, -1.7187, -0.3393,
        -0.5582,  0.9904, -1.6938, -0.3719, -0.7633, -0.6630,  0.4305, -0.2383,
        -0.3844, -0.8845, -0.6226, -0.8108, -0.6467, -0.8886,  0.6957,  0.1746,
         0.7250,  0.0993, -0.4310,  0.0506,  0.1916,  0.3660,  0.7346,  0.0725,
        -2.2989,  0.1391,  0.2980,  0.0408,  0.7876,  1.9719, -0.2313,  2.3823,
         0.4999, -0.2233,  0.5530,  1.0528, -0.6531, -0.1192,  0.0406,  0.1687,
         0.5074

In [3]:
# qsgd
tensor = torch.randn(size=(100,))
shape = tensor.shape
print("To be compressed tensor:", tensor)

# compress
norm, signs, values = qsgd_compressor.compress(tensor)
print("Compressed results QSGD norm:", norm)
print("Compressed results QSGD signs:", signs)
print("Compressed results QSGD values:", values)


To be compressed tensor: tensor([-1.2208,  1.4187, -0.5420,  0.4409, -0.7793, -0.1413, -0.9905, -0.7609,
         1.5335,  1.3902,  2.1191, -0.1703,  0.2943, -1.7366, -1.9368, -1.0637,
        -1.8469,  0.3083,  1.4614, -1.0390, -0.3341, -0.1067, -0.0539,  0.2535,
         2.2873,  0.3141,  0.5668, -1.0361, -0.2052,  0.6109, -0.7858,  0.1472,
         0.3375, -2.0928, -0.7161,  0.0547, -0.5214,  0.5887, -0.0325,  0.4731,
        -1.7919, -0.2484, -0.1335, -1.0788,  1.0368,  0.9461, -0.0935, -1.9321,
        -0.9211, -0.9592,  2.4520,  0.6194, -0.6674,  0.0290,  0.0573,  0.3466,
        -0.3336, -1.0915, -1.1358, -0.0730,  0.8704,  0.1320,  1.5872, -0.9954,
        -0.0316, -0.0083,  0.2476,  0.4635,  0.7978,  0.9841, -1.4457,  0.6230,
         0.2561,  0.1120,  0.5665,  0.9917, -0.7548,  0.6985,  0.1262,  0.9677,
        -0.3080, -0.0502,  0.4727, -0.6012,  2.1698, -1.3320,  0.4133,  1.0892,
        -0.5171, -2.3523, -1.3173,  1.2618,  0.2381, -0.4757,  2.0841,  0.0161,
         1.7087

In [4]:
# decompress
decompressed = qsgd_compressor.decompress([norm, signs, values])
print("Decompressed results:", decompressed)

Decompressed results: tensor([-1.2164,  1.4175, -0.5459,  0.4406, -0.7854, -0.1437, -0.9865, -0.7662,
         1.5325,  1.3984,  2.1167, -0.1724,  0.2969, -1.7336, -1.9347, -1.0727,
        -1.8485,  0.3065,  1.4654, -1.0440, -0.3352, -0.1054, -0.0575,  0.2490,
         2.2796,  0.3161,  0.5747, -1.0344, -0.2011,  0.6130, -0.7854,  0.1532,
         0.3352, -2.0976, -0.7183,  0.0575, -0.5172,  0.5843, -0.0287,  0.4693,
        -1.7911, -0.2490, -0.1341, -1.0727,  1.0344,  0.9482, -0.0862, -1.9347,
        -0.9195, -0.9578,  2.4520,  0.6226, -0.6705,  0.0287,  0.0575,  0.3448,
        -0.3352, -1.0919, -1.1398, -0.0670,  0.8716,  0.1245,  1.5899, -0.9961,
        -0.0287, -0.0096,  0.2490,  0.4693,  0.8045,  0.9865, -1.4463,  0.6226,
         0.2586,  0.1054,  0.5651,  0.9961, -0.7567,  0.6992,  0.1245,  0.9674,
        -0.3065, -0.0479,  0.4693, -0.6034,  2.1742, -1.3313,  0.4119,  1.0919,
        -0.5172, -2.3466, -1.3122,  1.2547,  0.2394, -0.4789,  2.0784,  0.0192,
         1.7145,  

## Use compressor in federated learning

For example on the client side, we could compress the tensors are to compressed and upload the compressed results to server. And server could decompress the tensors follows the compression agreements.

In jupyter notebook, we take the standalone scenario as example.

In [5]:
from fedlab.contrib.algorithm.basic_client import SGDSerialClientTrainer, SGDClientTrainer
from fedlab.contrib.algorithm.basic_server import SyncServerHandler

class CompressSerialClientTrainer(SGDSerialClientTrainer):
    def setup_compressor(self, compressor):
        #self.compressor = TopkCompressor(compress_ratio=k)
        self.compressor = compressor

    @property
    def uplink_package(self):
        package = super().uplink_package
        new_package = []
        for content in package:
            pack = [self.compressor.compress(content[0])]
            new_package.append(pack)
        return new_package

class CompressServerHandler(SyncServerHandler):
    def setup_compressor(self, compressor, type):
        #self.compressor = TopkCompressor(compress_ratio=k)
        self.compressor = compressor
        self.type = type

    def load(self, payload) -> bool:
        if self.type == "topk":
            values, indices = payload[0]
            decompressed_payload = self.compressor.decompress(values, indices, self.model_parameters.shape)

        if self.type == "qsgd":
            n, s, l = payload[0]
            decompressed_payload = self.compressor.decompress((n,s,l))
        
        return super().load([decompressed_payload])

In [6]:
# main, this part we follow the pipeline in pipeline_tutorial.ipynb
# But replace the hander and trainer by the above defined for communication compression

# configuration
import os
from opcode import cmp_op
from munch import Munch
from fedlab.models.mlp import MLP

model = MLP(784, 10)
args = Munch

args.total_client = 100
args.alpha = 0.5
args.seed = 42
args.preprocess = False if os.path.exists("../datasets/mnist/fedmnist/train/data2.pkl") else True
args.cuda = True if torch.cuda.is_available() else False
args.cmp_op = "qsgd" # "topk, qsgd"

args.k = 0.1 # topk
args.bit = 8 # qsgd

if args.cmp_op == "topk":
    compressor = TopkCompressor(args.k)

if args.cmp_op == "qsgd":
    compressor = QSGDCompressor(args.bit)

from torchvision import transforms
from fedlab.contrib.dataset.partitioned_mnist import PartitionedMNIST

fed_mnist = PartitionedMNIST(root="../datasets/mnist/",
                             path="../datasets/mnist/fedmnist/",
                             num_clients=args.total_client,
                             partition="noniid-labeldir",
                             dir_alpha=args.alpha,
                             seed=args.seed,
                             preprocess=args.preprocess,
                             download=True,
                             verbose=True,
                             transform=transforms.Compose([
                                 transforms.ToPILImage(),
                                 transforms.ToTensor()
                             ]))

dataset = fed_mnist.get_dataset(0)  # get the 0-th client's dataset
dataloader = fed_mnist.get_dataloader(
    0,
    batch_size=128)  # get the 0-th client's dataset loader with batch size 128


In [7]:
# client
from fedlab.contrib.algorithm.basic_client import SGDSerialClientTrainer, SGDClientTrainer

# local train configuration
args.epochs = 5
args.batch_size = 128
args.lr = 0.1

trainer = CompressSerialClientTrainer(model, args.total_client,
                                 cuda=args.cuda)  # serial trainer
# trainer = SGDClientTrainer(model, cuda=True) # single trainer

trainer.setup_dataset(fed_mnist)
trainer.setup_optim(args.epochs, args.batch_size, args.lr)
trainer.setup_compressor(compressor)

# server
from fedlab.contrib.algorithm.basic_server import SyncServerHandler

# global configuration
args.com_round = 10
args.sample_ratio = 0.1

handler = CompressServerHandler(model=model,
                            global_round=args.com_round,
                            num_clients=args.total_client,
                            sample_ratio=args.sample_ratio,
                            cuda=args.cuda)
handler.setup_compressor(compressor, args.cmp_op)

In [8]:
from fedlab.utils.functional import evaluate
from fedlab.core.standalone import StandalonePipeline

from torch import nn
from torch.utils.data import DataLoader
import torchvision

class EvalPipeline(StandalonePipeline):
    def __init__(self, handler, trainer, test_loader):
        super().__init__(handler, trainer)
        self.test_loader = test_loader

    def main(self):
        while self.handler.if_stop is False:
            # server side
            sampled_clients = self.handler.sample_clients()
            broadcast = self.handler.downlink_package

            # client side
            self.trainer.local_process(broadcast, sampled_clients)
            uploads = self.trainer.uplink_package

            # server side
            for pack in uploads:
                self.handler.load(pack)

            loss, acc = evaluate(self.handler.model, nn.CrossEntropyLoss(),
                                 self.test_loader)
            print(f"Centralized Evaluation round {self.handler.round}: loss {loss:.4f}, test accuracy {acc:.4f}")


test_data = torchvision.datasets.MNIST(root="../datasets/mnist/",
                                       train=False,
                                       download=True,
                                       transform=transforms.ToTensor())
test_loader = DataLoader(test_data, batch_size=1024)

standalone_eval = EvalPipeline(handler=handler,
                               trainer=trainer,
                               test_loader=test_loader)
standalone_eval.main()

>>> Local training: 100%|██████████| 10/10 [00:04<00:00,  2.28it/s]


loss 21.6554, test accuracy 0.2149


>>> Local training: 100%|██████████| 10/10 [00:03<00:00,  2.64it/s]


loss 17.1997, test accuracy 0.5176


>>> Local training: 100%|██████████| 10/10 [00:03<00:00,  2.73it/s]


loss 11.8354, test accuracy 0.6876


>>> Local training: 100%|██████████| 10/10 [00:03<00:00,  2.94it/s]


loss 9.7127, test accuracy 0.6818


>>> Local training: 100%|██████████| 10/10 [00:03<00:00,  2.66it/s]


loss 9.9816, test accuracy 0.6350


>>> Local training: 100%|██████████| 10/10 [00:03<00:00,  2.69it/s]


loss 7.2361, test accuracy 0.7495


>>> Local training: 100%|██████████| 10/10 [00:04<00:00,  2.40it/s]


loss 6.7841, test accuracy 0.7664


>>> Local training: 100%|██████████| 10/10 [00:03<00:00,  2.70it/s]


loss 6.7317, test accuracy 0.7627


>>> Local training: 100%|██████████| 10/10 [00:03<00:00,  2.60it/s]


loss 4.8795, test accuracy 0.8402


>>> Local training: 100%|██████████| 10/10 [00:04<00:00,  2.40it/s]


loss 4.5749, test accuracy 0.8712
