In [1]:
!pip install sklearn

Collecting sklearn
  Downloading sklearn-0.0.tar.gz (1.1 kB)
Collecting scikit-learn
  Downloading scikit_learn-0.24.0-cp36-cp36m-manylinux2010_x86_64.whl (22.2 MB)
[K     |████████████████████████████████| 22.2 MB 1.7 MB/s eta 0:00:01
[?25hCollecting scipy>=0.19.1
  Downloading scipy-1.5.4-cp36-cp36m-manylinux1_x86_64.whl (25.9 MB)
[K     |████████████████████████████████| 25.9 MB 3.7 MB/s eta 0:00:01     |███████████████████             | 15.4 MB 2.8 MB/s eta 0:00:04
[?25hCollecting joblib>=0.11
  Downloading joblib-1.0.0-py3-none-any.whl (302 kB)
[K     |████████████████████████████████| 302 kB 5.2 MB/s eta 0:00:01
[?25hCollecting threadpoolctl>=2.0.0
  Downloading threadpoolctl-2.1.0-py3-none-any.whl (12 kB)
Building wheels for collected packages: sklearn
  Building wheel for sklearn (setup.py) ... [?25ldone
[?25h  Created wheel for sklearn: filename=sklearn-0.0-py2.py3-none-any.whl size=1316 sha256=3def35c2efa3b853cb30df1a2f273ea2a62a09254ba816818e7e3c3754d19ff5
  Stored i

In [2]:
import numpy as np
import datetime
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist

from sklearn.preprocessing import LabelBinarizer

# Coleta dos Dados

In [3]:
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


In [4]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

## Normalização das imagens

In [5]:
X_train = X_train / 255.0
X_test = X_test / 255.0

## Remodelagem (reshaping) a base de dados

In [6]:
# 6000 imagens, 28 de altura e 28 de largura
X_train.shape

(60000, 28, 28)

In [7]:
# altera de uma matriz para um vetor
X_train = X_train.reshape(-1, 28*28)
X_test = X_test.reshape(-1, 28*28)
X_train.shape, X_test.shape

((60000, 784), (10000, 784))

In [8]:
lb = LabelBinarizer()
y_train = lb.fit_transform(y_train)
y_test = lb.fit_transform(y_test)

In [9]:
lr = 0.01 
comms_round = 100
loss='categorical_crossentropy'
metrics = ['accuracy']
optimizer = tf.keras.optimizers.SGD(lr=lr, decay= lr/comms_round, momentum=0.9)    

# Criação de Clientes

In [10]:
n_clients = 20

1. Cria um vetor com os indices dos pontos de dados e embaralha seus indices
2. Distribui os indices dos pontos de dados entre os clientes e então embaralha-os
3. Gera valores a partir de uma distribuição uniforme (entre 0 e 1)
4. Passa por metade dos indices de clientes, removendo uma pocentagem dos dados de cada um desses clientes e passando para a outra metade dos clientes 

In [11]:
def generate_indexes(X, n_clients):
    indexes = np.arange(np.random.randint(int(X.shape[0] * 0.8), X.shape[0]))
    
    np.random.shuffle(indexes)
    indexes = np.array_split(indexes, n_clients)
    
    clients_index = np.arange(n_clients)
    np.random.shuffle(clients_index)
    
    half_clients = int(n_clients / 2)
    samples = np.random.random_sample((half_clients,))
    for i in range(half_clients):
        client_A = indexes[clients_index[i]]
        client_B = indexes[clients_index[i + half_clients]]
        
        client_A = np.concatenate((client_A, client_B[:int(client_B.shape[0] * samples[i])]))
        client_B = client_B[int(client_B.shape[0] * samples[i]):]
        
        indexes[clients_index[i]] = client_A
        indexes[clients_index[i + half_clients]] = client_B
    
    return indexes

In [12]:
def create_clients(X, y, n_clients):

    indexes = generate_indexes(X, n_clients)
    
    X_slices = np.array([X[indexes[i]] for i in range(n_clients)], dtype='object')
    y_slices = np.array([y[indexes[i]] for i in range(n_clients)], dtype='object')
    return X_slices, y_slices

In [13]:
X_slices, y_slices = create_clients(X_train, y_train, n_clients)

In [14]:
X_slices.shape, y_slices.shape

((20,), (20,))

# Construção do Modelo

In [15]:
class MLP:
    def build(self):
        model = tf.keras.models.Sequential()
        model.add(tf.keras.layers.Dense(units=256, activation='relu', input_shape=(784, )))
        model.add(tf.keras.layers.Dropout(0.2))
        model.add(tf.keras.layers.Dense(units=128, activation='relu', input_shape=(784, )))
        model.add(tf.keras.layers.Dropout(0.2))
        model.add(tf.keras.layers.Dense(units=10, activation='softmax'))
        return model

In [16]:
def NN_update(local_data, global_data, model, X_test, y_test):
    
    weights = (local_data / global_data) * np.array(model.get_weights(), dtype='object')
    test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
    
    test_loss = (local_data / global_data) * test_loss
    test_accuracy = (local_data / global_data) * test_accuracy
    
    return weights, test_loss, test_accuracy

In [17]:
mlp = MLP()

global_model = mlp.build()
global_model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

for j in range(comms_round):

    global_weights = global_model.get_weights()
    new_global_weights = [np.zeros(weight.shape) for weight in global_weights]
    
    X_slices, y_slices = create_clients(X_train, y_train, n_clients)
    global_data = sum([size.shape[0] for size in X_slices])
    
    avg_local_loss = 0.0
    avg_local_acc = 0.0
    
    for i in range(n_clients):
        client_model = mlp.build()
        client_model.compile(loss=loss, 
                      optimizer=optimizer, 
                      metrics=metrics)
        
        client_model.set_weights(global_weights)
        client_model.fit(X_slices[i], y_slices[i], epochs=2, verbose=0)
        
        weights, test_loss, test_accuracy = NN_update(X_slices[i].shape[0], global_data, 
                                                      client_model, X_test, y_test)
        
        new_global_weights += np.array(weights, dtype='object')
        avg_local_loss += test_loss
        avg_local_acc += test_accuracy
    
    global_model.set_weights(new_global_weights) #
    
    test_loss, test_accuracy = global_model.evaluate(X_test, y_test)
    print("Round {}, Loss: {:.3f}, Accuracy: {:.3f}".format(j, test_loss, test_accuracy))
    print("(AVG Local) Round {}, Loss: {:.3f}, Accuracy: {:.3f}".format(j, avg_local_loss, avg_local_acc))



Round 0, Loss: 0.630, Accuracy: 0.779
(AVG Local) Round 0, Loss: 0.684, Accuracy: 0.751
Round 1, Loss: 0.541, Accuracy: 0.804
(AVG Local) Round 1, Loss: 0.586, Accuracy: 0.786
Round 2, Loss: 0.496, Accuracy: 0.822
(AVG Local) Round 2, Loss: 0.529, Accuracy: 0.809
Round 3, Loss: 0.476, Accuracy: 0.830
(AVG Local) Round 3, Loss: 0.513, Accuracy: 0.816
Round 4, Loss: 0.460, Accuracy: 0.835
(AVG Local) Round 4, Loss: 0.485, Accuracy: 0.825
Round 5, Loss: 0.450, Accuracy: 0.839
(AVG Local) Round 5, Loss: 0.472, Accuracy: 0.831
Round 6, Loss: 0.440, Accuracy: 0.841
(AVG Local) Round 6, Loss: 0.459, Accuracy: 0.834
Round 7, Loss: 0.433, Accuracy: 0.844
(AVG Local) Round 7, Loss: 0.448, Accuracy: 0.839
Round 8, Loss: 0.428, Accuracy: 0.845
(AVG Local) Round 8, Loss: 0.445, Accuracy: 0.839
Round 9, Loss: 0.424, Accuracy: 0.847
(AVG Local) Round 9, Loss: 0.440, Accuracy: 0.841
Round 10, Loss: 0.419, Accuracy: 0.850
(AVG Local) Round 10, Loss: 0.432, Accuracy: 0.845
Round 11, Loss: 0.416, Accurac

Round 46, Loss: 0.376, Accuracy: 0.865
(AVG Local) Round 46, Loss: 0.380, Accuracy: 0.864
Round 47, Loss: 0.376, Accuracy: 0.865
(AVG Local) Round 47, Loss: 0.380, Accuracy: 0.863
Round 48, Loss: 0.375, Accuracy: 0.866
(AVG Local) Round 48, Loss: 0.380, Accuracy: 0.864
Round 49, Loss: 0.375, Accuracy: 0.866
(AVG Local) Round 49, Loss: 0.379, Accuracy: 0.864
Round 50, Loss: 0.374, Accuracy: 0.865
(AVG Local) Round 50, Loss: 0.378, Accuracy: 0.864
Round 51, Loss: 0.374, Accuracy: 0.865
(AVG Local) Round 51, Loss: 0.377, Accuracy: 0.864
Round 52, Loss: 0.373, Accuracy: 0.866
(AVG Local) Round 52, Loss: 0.377, Accuracy: 0.865
Round 53, Loss: 0.373, Accuracy: 0.866
(AVG Local) Round 53, Loss: 0.377, Accuracy: 0.864
Round 54, Loss: 0.373, Accuracy: 0.866
(AVG Local) Round 54, Loss: 0.376, Accuracy: 0.865
Round 55, Loss: 0.372, Accuracy: 0.866
(AVG Local) Round 55, Loss: 0.376, Accuracy: 0.865
Round 56, Loss: 0.372, Accuracy: 0.865
(AVG Local) Round 56, Loss: 0.376, Accuracy: 0.865
Round 57, 

Round 92, Loss: 0.362, Accuracy: 0.870
(AVG Local) Round 92, Loss: 0.365, Accuracy: 0.869
Round 93, Loss: 0.362, Accuracy: 0.869
(AVG Local) Round 93, Loss: 0.364, Accuracy: 0.869
Round 94, Loss: 0.362, Accuracy: 0.870
(AVG Local) Round 94, Loss: 0.364, Accuracy: 0.869
Round 95, Loss: 0.362, Accuracy: 0.870
(AVG Local) Round 95, Loss: 0.364, Accuracy: 0.869
Round 96, Loss: 0.362, Accuracy: 0.869
(AVG Local) Round 96, Loss: 0.364, Accuracy: 0.869
Round 97, Loss: 0.362, Accuracy: 0.870
(AVG Local) Round 97, Loss: 0.364, Accuracy: 0.869
Round 98, Loss: 0.361, Accuracy: 0.870
(AVG Local) Round 98, Loss: 0.363, Accuracy: 0.870
Round 99, Loss: 0.361, Accuracy: 0.870
(AVG Local) Round 99, Loss: 0.363, Accuracy: 0.869
