In [103]:
import os
os.getcwd()

'/Users/Mara/Documents/Mestrado/Pesquisa/param_matrix_mult'

In [142]:
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from tqdm.notebook import tqdm
import numpy as np
import flwr as fl
from torch.utils.data import DataLoader, random_split
from collections import OrderedDict
from typing import List, Tuple
from flwr.common import Metrics

Importando dados

In [96]:
mnist_train = datasets.MNIST(root='./datasets', train=True, transform=transforms.ToTensor(), download=True)
mnist_test = datasets.MNIST(root='./datasets', train=False, transform=transforms.ToTensor(), download=True)
mnist_train_slice = list(mnist_train)[0:1000]
mnist_test_slice = list(mnist_test)[0:1000]
train_loader = DataLoader(mnist_train, batch_size=100, shuffle=True)
test_loader = DataLoader(mnist_test, batch_size=100, shuffle=False)

Classe do modelo

In [97]:
class MNIST_MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin1 = nn.Linear(784, 500)
        self.linout = nn.Linear(500, 10)
        
    def forward(self, x):
        l2_inp = self.lin1(x)
        l2_relu = torch.max(torch.zeros_like(l2_inp), l2_inp)
        return self.linout(l2_relu)

#### Aprendizado Centralizado

Treinando o modelo centralizado

In [98]:
##Training
#Instantiate model
model = MNIST_MLP()

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1.1)

for epoch in range(1):
    #Iterate over the train set minibatchs
    for images, labels in tqdm(train_loader):
        #Zero out the gradient
        optimizer.zero_grad()

        #Forward pass
        x = images.view(-1, 28*28)
        y = model(x)
        loss = criterion(y, labels)

        #Backward pass
        loss.backward()
        optimizer.step()
    
##Testing
correct = 0
total = len(mnist_test)

with torch.no_grad():
    #Iterate through the test set minibacth
    for images, labels in tqdm(test_loader):
        #Forward pass
        x = images.view(-1, 28*28)
        y = model.forward(x)
        #print(y)
        predictions = torch.argmax(y, dim=1)
        correct += torch.sum((predictions==labels).float())
        
print('Test accuracy: {}'.format(correct/total))

  0%|          | 0/600 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

Test accuracy: 0.9624999761581421


Salvando o modelo

In [100]:
 file = "modelo_mlp.pth"
# torch.save(model.state_dict(), file)

Carregando modelo

In [144]:
net = MNIST_MLP()
net.load_state_dict(torch.load(file))
#net.eval()

<All keys matched successfully>

Carregando o mesmo modelo novamente para mudar os pesos, e usar os pesos originais para manter escala.

In [427]:
net2 = MNIST_MLP()
net2.load_state_dict(torch.load(file))
#net2.eval()

<All keys matched successfully>

In [104]:
net2.eval()

MNIST_MLP(
  (lin1): Linear(in_features=784, out_features=500, bias=True)
  (linout): Linear(in_features=500, out_features=10, bias=True)
)

Agregando os pesos pelos parametros(colunas) e samples(linhas) atraves da mediana/media.
Depois multiplicando as agregacoes para reconstruir o formato da matriz.

In [175]:
for p in net2.parameters():
    if p.dim() == 2:
        with torch.no_grad():
            med_feat = np.asarray(p.median(axis=0)[0])
            med_peso = np.asarray(p.median(axis=1)[0])
            p.data = torch.from_numpy(np.asarray(med_feat)*np.asarray(med_peso).reshape(-1, 1))

Mantendo escala dos parametros originais (Em muitos casos nao muda nada, os parametros sao invariantes a escala)

In [431]:
with torch.no_grad():
    for p1, p2 in zip(net.parameters(), net2.parameters()):
        if p2.dim() == 2:
            p2.mul_(p1.norm()/p2.norm())

Tentativa de manter somente media/mediana dos pesos pelos features

In [166]:
for p in net2.parameters():
    if p.dim() == 2:
        with torch.no_grad():
            med_param_l1 = np.asarray(p.median(axis=1)[0])
            dim = p.size()[1]
            p.data = torch.from_numpy(np.repeat(med_param_l1, dim).reshape(-1, dim))

Tentativa de manter somente media/mediana dos features pelos pesos

In [183]:
for p in net2.parameters():
    if p.dim() == 2:
        with torch.no_grad():
            med_param_l1 = np.asarray(p.median(axis=0)[0])
            dim = p.size()[0]
            p.data = torch.from_numpy(np.tile(med_param_l1, (dim,1)))

In [421]:
for p in net2.parameters():
    print(p.size())

torch.Size([500, 784])
torch.Size([500])
torch.Size([10, 500])
torch.Size([10])


Testando com medias por grupos

Criando funcao para juntar n elementos e computar a media

In [328]:
def reduce_matrix(arr, n):
    with torch.no_grad():
        if type(arr) != "numpy.ndarray":
            arr = np.asarray(arr)
        # Number of complete groups of n elements in each row
        full_groups_count = arr.shape[1] // n
        
        # Reshape the array to split each row into subarrays of n elements where possible
        reshaped_arr = arr[:, :full_groups_count * n].reshape(arr.shape[0], -1, n)
        
        # Calculate the mean along the new innermost axis for full groups
        mean_arr = np.median(reshaped_arr, axis=2)
        
        # Check for remaining elements and calculate their mean if they exist
        rest = arr.shape[1] % n
        if rest != 0:
            # Slicing to get the remaining elements
            remaining_elements = arr[:, full_groups_count * n:]
            remaining_means = np.mean(remaining_elements, axis=1, keepdims=True)
            
            # Concatenate the means of full groups with the mean of remaining elements
            mean_arr = np.concatenate((mean_arr, remaining_means), axis=1)
        
    return mean_arr, rest

In [385]:
def resize_matrix(arr, n, r):
    with torch.no_grad():
        tam = arr.shape[0]
        if r == 0:
            matrix = arr.repeat(n).reshape(tam, -1)
        else:
            #replica os grupos fechados de medias
            grupo = arr[:,:-1].repeat(n).reshape(tam, -1)
            
            #replica o resto
            resto = arr[:,-1].repeat(r).reshape(tam, -1)
            matrix = np.concatenate((grupo, resto), axis = 1)
    
    return matrix

In [428]:
for p in net2.parameters():
    if p.dim() == 2:
        tam = p.size(1)
        n = round(p.size(1)*0.006/2)
        print(p.size(0))
        print(n)
        if n != 0 or n != 1:
            medias, resto = reduce_matrix(p, n)
            p.data = torch.from_numpy(resize_matrix(medias, n, resto))

500
2
10
2


In [380]:
n = 4
with torch.no_grad():
    if type(t) != "numpy.ndarray":
        arr = np.asarray(t)
    # Number of complete groups of n elements in each row
    full_groups_count = arr.shape[1] // n
    
    # Reshape the array to split each row into subarrays of n elements where possible
    reshaped_arr = arr[:, :full_groups_count * n].reshape(arr.shape[0], -1, n)
    
    # Calculate the mean along the new innermost axis for full groups
    mean_arr = np.median(reshaped_arr, axis=2)
    
    # Check for remaining elements and calculate their mean if they exist
    rest = arr.shape[1] % n
    if rest != 0:
        # Slicing to get the remaining elements
        remaining_elements = arr[:, full_groups_count * n:]
        remaining_means = np.mean(remaining_elements, axis=1, keepdims=True)
    
        # Concatenate the means of full groups with the mean of remaining elements
        mean_arr = np.concatenate((mean_arr, remaining_means), axis=1)
         
    # replica os grupos fechados de medias
    tam = mean_arr.shape[0]
    grupo = mean_arr[:,:-1].repeat(n).reshape(tam, -1)

    # replica o resto
    resto = mean_arr[:,-1].repeat(rest).reshape(tam, -1)

    # concatena
    matrix = np.concatenate((grupo, resto), axis = 1)

Testando novos parametros no caso centralizado

In [432]:
correct = 0
with torch.no_grad():
    #Iterate through the test set minibacth
    for images, labels in test_loader:
        #Forward pass
        x = images.view(-1, 28*28)
        y = net2.forward(x)
        #print(y)
        predictions = torch.argmax(y, dim=1)
        correct += torch.sum((predictions==labels).float())
print(correct/len(mnist_test))

tensor(0.9458)


#### Aprendizado Federado

Setando onde vao ser realizadas as computacoes

In [269]:
DEVICE = "cpu"

Separando os dados por cliente

In [294]:
NUM_CLIENTS = 10
BATCH_SIZE = 32
partition_size = len(mnist_train_slice) // NUM_CLIENTS
lengths = [partition_size] * NUM_CLIENTS
datasets = random_split(mnist_train_slice, lengths, torch.Generator().manual_seed(42))

# Create train/val for each partition and wrap it into DataLoader
trainloaders = []
valloaders = []
for ds in datasets:
    len_val = len(ds) // 10
    len_train = len(ds) - len_val
    lengths = [len_train, len_val]
    ds_train, ds_val = random_split(ds, lengths, torch.Generator().manual_seed(42))
    trainloaders.append(DataLoader(ds_train, batch_size=BATCH_SIZE, shuffle=True))
    valloaders.append(DataLoader(ds_val, batch_size=BATCH_SIZE))
testloader = DataLoader(mnist_test_slice, batch_size=BATCH_SIZE)

Define funcao de treino e teste

In [285]:
def train(net, trainloader, epochs: int, verbose=False):
    """Train the network on the training set."""
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters())
    net.train()
    for epoch in range(epochs):
        correct, total, epoch_loss = 0, 0, 0.0
        for images, labels in trainloader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            images = images.view(-1, 28*28)
            outputs = net(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            # Metrics
            epoch_loss += loss
            total += labels.size(0)
            correct += (torch.max(outputs.data, 1)[1] == labels).sum().item()
        epoch_loss /= len(trainloader.dataset)
        epoch_acc = correct / total
    if verbose:
        print(f"Epoch {epoch+1}: train loss {epoch_loss}, accuracy {epoch_acc}")

def test(net, testloader):
    """Evaluate the network on the entire test set."""
    criterion = torch.nn.CrossEntropyLoss()
    correct, total, loss = 0, 0, 0.0
    net.eval()
    with torch.no_grad():
        for images, labels in testloader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            images = images.view(-1, 28*28)
            outputs = net(images)
            loss += criterion(outputs, labels).item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    loss /= len(testloader.dataset)
    accuracy = correct / total
    return loss, accuracy

Criando classe do cliente

In [279]:
def get_parameters(net) -> List[np.ndarray]: 
    return [val.cpu().numpy() for _, val in net.state_dict().items()]

def set_parameters(net, parameters: List[np.ndarray]):
    params_dict = zip(net.state_dict().keys(), parameters)
    state_dict = OrderedDict({k: torch.Tensor(v) for k, v in params_dict})
    net.load_state_dict(state_dict, strict=True)
    
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, net, trainloader, valloader):
        self.net = net
        self.trainloader = trainloader
        self.valloader = valloader

    def get_parameters(self, config):
        return get_parameters(self.net)

    def fit(self, parameters, config):
        set_parameters(self.net, parameters)
        train(self.net, self.trainloader, epochs=1)
        return get_parameters(self.net), len(self.trainloader), {}

    def evaluate(self, parameters, config):
        set_parameters(self.net, parameters)
        loss, accuracy = test(self.net, self.valloader)
        return float(loss), len(self.valloader), {"accuracy": float(accuracy)}

Cria funcao para gerar instancias do FlowerClient

In [280]:
def client_fn(cid: str) -> FlowerClient:
    """Create a Flower client representing a single organization."""

    # Load model
    net = MNIST_MLP().to(DEVICE)

    # Note: each client gets a different trainloader/valloader, so each client
    # will train and evaluate on their own unique data
    trainloader = trainloaders[int(cid)]
    valloader = valloaders[int(cid)]

    # Create a single Flower client representing a single organization
    return FlowerClient(net, trainloader, valloader)

Funcao para mostrar acuracia

In [289]:
def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics:
    # Multiply accuracy of each client by number of examples used
    accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics]
    examples = [num_examples for num_examples, _ in metrics]

    # Aggregate and return custom metric (weighted average)
    return {"accuracy": sum(accuracies) / sum(examples)}

Seta a estratedia de agregacao e treinamento

In [290]:
strategy = fl.server.strategy.FedAvg(
    fraction_fit=1.0, # Sample 100% of available clients for training
    fraction_evaluate=0.5, # Sample 50% of available clients for evaluation
    min_fit_clients=10, # Never sample less than 10 clients for training
    min_evaluate_clients=5, # Never sample less than 5 clients for evaluation
    min_available_clients=10, # Wait until all 10 clients are available
    evaluate_metrics_aggregation_fn=weighted_average,
)

Treinamento

In [295]:
fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=NUM_CLIENTS,
    config=fl.server.ServerConfig(num_rounds=10),
    strategy=strategy,
)

[92mINFO [0m:      Starting Flower simulation, config: num_rounds=10, no round_timeout
2024-04-27 16:29:39,490	INFO worker.py:1621 -- Started a local Ray instance.
[92mINFO [0m:      Flower VCE: Ray initialized with resources: {'memory': 2725711872.0, 'node:127.0.0.1': 1.0, 'CPU': 4.0, 'object_store_memory': 1362855936.0, 'node:__internal_head__': 1.0}
[92mINFO [0m:      Optimize your simulation with Flower VCE: https://flower.ai/docs/framework/how-to-run-simulations.html
[92mINFO [0m:      No `client_resources` specified. Using minimal resources for clients.
[92mINFO [0m:      Flower VCE: Resources for each Virtual Client: {'num_cpus': 1, 'num_gpus': 0.0}
[92mINFO [0m:      Flower VCE: Creating VirtualClientEngineActorPool with 4 actors
[92mINFO [0m:      [INIT]
[92mINFO [0m:      Requesting initial parameters from one random client
[2m[36m(pid=27538)[0m 2024-04-27 16:29:47.642325: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimi

History (loss, distributed):
('\tround 1: 0.21507327556610112\n'
 '\tround 2: 0.19945120334625244\n'
 '\tround 3: 0.17114811420440673\n'
 '\tround 4: 0.15345122337341308\n'
 '\tround 5: 0.12797161579132083\n'
 '\tround 6: 0.11112957596778869\n'
 '\tround 7: 0.10416423082351685\n'
 '\tround 8: 0.09715561628341673\n'
 '\tround 9: 0.09623378753662108\n'
 '\tround 10: 0.06966585874557493\n')History (metrics, distributed, evaluate):
{'accuracy': [(1, 0.54),
              (2, 0.5),
              (3, 0.72),
              (4, 0.6),
              (5, 0.7),
              (6, 0.76),
              (7, 0.6799999999999999),
              (8, 0.74),
              (9, 0.76),
              (10, 0.8400000000000001)]}

In [162]:
a = np.arange(5)
a

array([0, 1, 2, 3, 4])

In [119]:
b = np.tile(a, (10, 1))
b

array([[0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4]])

In [164]:
c = np.repeat(a, 10).reshape(-1,10)
c

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
       [3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
       [4, 4, 4, 4, 4, 4, 4, 4, 4, 4]])

In [234]:
t = torch.Tensor([[0,1,2,3,12,13],
                  [4,5,6,7,14,15],
                  [8,9,10,11,16,17]]
                )

In [None]:
def reduz(tensor, n, agg):
    if agg == "feature":
        for t in np.transpose(tensor):
            

In [231]:
def mean_of_elements(arr, n):
    if type(arr) != "numpy.ndarray":
        arr = np.asarray(arr)
    # Number of complete groups of n elements in each row
    full_groups_count = arr.shape[1] // n
    
    # Reshape the array to split each row into subarrays of n elements where possible
    reshaped_arr = arr[:, :full_groups_count * n].reshape(arr.shape[0], -1, n)
    
    # Calculate the mean along the new innermost axis for full groups
    mean_arr = np.mean(reshaped_arr, axis=2)
    
    # Check for remaining elements and calculate their mean if they exist
    if arr.shape[1] % n != 0:
        # Slicing to get the remaining elements
        remaining_elements = arr[:, full_groups_count * n:]
        remaining_means = np.mean(remaining_elements, axis=1, keepdims=True)
        
        # Concatenate the means of full groups with the mean of remaining elements
        mean_arr = np.concatenate((mean_arr, remaining_means), axis=1)
    
    return mean_arr

In [None]:
# Example array with a row that can't be evenly divided by n
array = np.array([[0, 1, 2, 3, 4], [4, 5, 6, 7, 8]])

# Call the function with n=2
result = mean_of_elements(array, 2)

print(result)

In [241]:
n =mean_of_elements(t,3)

In [242]:
n

array([[ 1.      ,  9.333333],
       [ 5.      , 12.      ],
       [ 9.      , 14.666667]], dtype=float32)

In [249]:
n.repeat(3).reshape(-1,3*2)

array([[ 1.      ,  1.      ,  1.      ,  9.333333,  9.333333,  9.333333],
       [ 5.      ,  5.      ,  5.      , 12.      , 12.      , 12.      ],
       [ 9.      ,  9.      ,  9.      , 14.666667, 14.666667, 14.666667]],
      dtype=float32)

In [312]:
array

array([[0, 1, 2, 3, 4],
       [4, 5, 6, 7, 8]])

In [326]:
np.concatenate((array[:,:-1].repeat(3).reshape(2,-1),array[:,-1].repeat(2).reshape(2,-1)), axis = 1)

array([[0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4],
       [4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8]])

In [321]:
array[:,-1].repeat(2).reshape(2,-1)

array([[4, 4],
       [8, 8]])