# MNIST

On this noteboook, we try to add and preprocess our own dataset manually and make our own config


https://elitedatascience.com/keras-tutorial-deep-learning-in-python

https://medium.com/@mjbhobe/mnist-digits-classification-with-keras-ed6c2374bd0e

In [20]:
# imports
import matplotlib.pyplot as plt
from loguru import logger
import tensorflow as tf


from pkg import *
from pkg import scenario, main
from pkg.dataset import Dataset


from pathlib import Path


import numpy as np
 
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras.datasets import mnist

# Create custom scenario from mandatory parametters

In [21]:
scenario_params = {
    'partners_count': 3,
    'amounts_per_partner': [0.2, 0.5, 0.3],
}

# Set values for optinal parametters

In [22]:
scenario_params['epoch_count'] = 10
scenario_params['minibatch_count'] = 3

#### Every other parametter will be set to its default value

In [23]:
experiment_path = Path(r"C:\GitHub\distributed-learning-contributivity\experiments\trash")

In [24]:
current_scenario = scenario.Scenario(
        scenario_params,
        experiment_path
    )

2020-08-10 17:58:21.766 | DEBUG    | pkg.scenario:__init__:52 - Dataset selected: mnist
2020-08-10 17:58:21.766 | DEBUG    | pkg.scenario:__init__:87 - Computation use the full dataset for scenario #1
2020-08-10 17:58:21.833 | INFO     | pkg.scenario:__init__:279 - ### Description of data scenario configured:
2020-08-10 17:58:21.834 | INFO     | pkg.scenario:__init__:280 -    Number of partners defined: 3
2020-08-10 17:58:21.834 | INFO     | pkg.scenario:__init__:281 -    Data distribution scenario chosen: random
2020-08-10 17:58:21.835 | INFO     | pkg.scenario:__init__:282 -    Multi-partner learning approach: fedavg
2020-08-10 17:58:21.836 | INFO     | pkg.scenario:__init__:283 -    Weighting option: uniform
2020-08-10 17:58:21.836 | INFO     | pkg.scenario:__init__:284 -    Iterations parameters: 10 epochs > 3 mini-batches > 8 gradient updates per pass
2020-08-10 17:58:21.837 | INFO     | pkg.scenario:__init__:290 - ### Data loaded: mnist
2020-08-10 17:58:21.837 | INFO     | pkg.sc

# Create Data Set

In [25]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()


X_train = X_train.reshape(X_train.shape[0],  28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255

input_shape = (28, 28, 1)
num_classes = 10

# Create Preprocessing function

In [26]:
def preprocess_dataset_labels(y):
    y = np_utils.to_categorical(y, 10)
    return y

# Create Model

In [27]:
def generate_new_model_for_dataset():
    model = Sequential()
    # add Convolutional layers
    model.add(Conv2D(filters=32, kernel_size=(3,3), activation='relu', padding='same',
                     input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Conv2D(filters=64, kernel_size=(3,3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2,2)))    
    model.add(Flatten())
    # Densely connected layers
    model.add(Dense(128, activation='relu'))
    # output layer
    model.add(Dense(num_classes, activation='softmax'))
    # compile with adam optimizer & categorical_crossentropy loss function
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Assignate dataset to scenario

In [28]:
current_scenario.dataset = Dataset(
    "my_dataset",
    X_train,
    X_test,
    y_train,
    y_test,
    input_shape,
    num_classes,
    preprocess_dataset_labels,
    generate_new_model_for_dataset
)

In [29]:
# Check Scenario name
print(current_scenario.dataset.name)

my_dataset


# Split train and validation sets

In [30]:
current_scenario.dataset.train_val_split()

# Legacy 

In [31]:
current_scenario.partners_list = []

# Run scenario

In [32]:
main.run_scenario(current_scenario)

2020-08-10 17:58:22.494 | INFO     | pkg.scenario:split_data:534 - ### Splitting data among partners:
2020-08-10 17:58:22.495 | INFO     | pkg.scenario:split_data:535 -    Simple split performed.
2020-08-10 17:58:22.496 | INFO     | pkg.scenario:split_data:536 -    Nb of samples split amongst partners: 38880
2020-08-10 17:58:22.496 | INFO     | pkg.scenario:split_data:538 -    Partner #0: 7776 samples with labels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
2020-08-10 17:58:22.497 | INFO     | pkg.scenario:split_data:538 -    Partner #1: 19440 samples with labels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
2020-08-10 17:58:22.497 | INFO     | pkg.scenario:split_data:538 -    Partner #2: 11664 samples with labels [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
2020-08-10 17:58:22.684 | DEBUG    | pkg.scenario:compute_batch_sizes:582 -    Compute batch sizes, partner #0: 324
2020-08-10 17:58:22.685 | DEBUG    | pkg.scenario:compute_batch_sizes:582 -    Compute batch sizes, partner #1: 810
2020-08-10 17:58:22.685 | DEBUG    | pkg.s

2020-08-10 17:58:44.812 | DEBUG    | pkg.multi_partner_learning:log_collaborative_round_partner_result:513 - Epoch 01/09 > Minibatch 02/02 > Partner id #2 (2/2) > val_acc: 0.87
2020-08-10 17:58:44.814 | DEBUG    | pkg.multi_partner_learning:compute_collaborative_round_fedavg:303 - End of fedavg collaborative round.
2020-08-10 17:58:45.096 | INFO     | pkg.multi_partner_learning:compute_test_score:184 -    Model evaluation at the end of the epoch: ['0.420', '0.882']
2020-08-10 17:58:45.097 | DEBUG    | pkg.multi_partner_learning:compute_test_score:187 -       Checking if early stopping criteria are met:
2020-08-10 17:58:45.097 | DEBUG    | pkg.multi_partner_learning:compute_test_score:197 -          -> Early stopping criteria are not met, continuing with training.
2020-08-10 17:58:45.156 | DEBUG    | pkg.multi_partner_learning:compute_collaborative_round_fedavg:259 - Start new fedavg collaborative round ...
2020-08-10 17:58:45.158 | DEBUG    | pkg.multi_partner_learning:compute_collabor

2020-08-10 17:59:02.328 | DEBUG    | pkg.multi_partner_learning:log_collaborative_round_partner_result:513 - Epoch 04/09 > Minibatch 00/02 > Partner id #0 (0/2) > val_acc: 0.95
2020-08-10 17:59:03.064 | DEBUG    | pkg.multi_partner_learning:log_collaborative_round_partner_result:513 - Epoch 04/09 > Minibatch 00/02 > Partner id #1 (1/2) > val_acc: 0.95
2020-08-10 17:59:03.777 | DEBUG    | pkg.multi_partner_learning:log_collaborative_round_partner_result:513 - Epoch 04/09 > Minibatch 00/02 > Partner id #2 (2/2) > val_acc: 0.95
2020-08-10 17:59:03.781 | DEBUG    | pkg.multi_partner_learning:compute_collaborative_round_fedavg:303 - End of fedavg collaborative round.
2020-08-10 17:59:03.781 | DEBUG    | pkg.multi_partner_learning:compute_collaborative_round_fedavg:259 - Start new fedavg collaborative round ...
2020-08-10 17:59:03.782 | DEBUG    | pkg.multi_partner_learning:compute_collaborative_round_fedavg:271 - (fedavg) Minibatch n°1 of epoch n°4, init aggregated model for each partner wi

2020-08-10 17:59:22.031 | DEBUG    | pkg.multi_partner_learning:log_collaborative_round_partner_result:513 - Epoch 06/09 > Minibatch 01/02 > Partner id #1 (1/2) > val_acc: 0.97
2020-08-10 17:59:22.911 | DEBUG    | pkg.multi_partner_learning:log_collaborative_round_partner_result:513 - Epoch 06/09 > Minibatch 01/02 > Partner id #2 (2/2) > val_acc: 0.97
2020-08-10 17:59:22.914 | DEBUG    | pkg.multi_partner_learning:compute_collaborative_round_fedavg:303 - End of fedavg collaborative round.
2020-08-10 17:59:22.914 | DEBUG    | pkg.multi_partner_learning:compute_collaborative_round_fedavg:259 - Start new fedavg collaborative round ...
2020-08-10 17:59:22.915 | DEBUG    | pkg.multi_partner_learning:compute_collaborative_round_fedavg:271 - (fedavg) Minibatch n°2 of epoch n°6, init aggregated model for each partner with models from previous round
2020-08-10 17:59:23.934 | DEBUG    | pkg.multi_partner_learning:log_collaborative_round_partner_result:513 - Epoch 06/09 > Minibatch 02/02 > Partne

2020-08-10 17:59:41.608 | DEBUG    | pkg.multi_partner_learning:log_collaborative_round_partner_result:513 - Epoch 08/09 > Minibatch 02/02 > Partner id #2 (2/2) > val_acc: 0.98
2020-08-10 17:59:41.611 | DEBUG    | pkg.multi_partner_learning:compute_collaborative_round_fedavg:303 - End of fedavg collaborative round.
2020-08-10 17:59:41.909 | INFO     | pkg.multi_partner_learning:compute_test_score:184 -    Model evaluation at the end of the epoch: ['0.072', '0.980']
2020-08-10 17:59:41.910 | DEBUG    | pkg.multi_partner_learning:compute_test_score:187 -       Checking if early stopping criteria are met:
2020-08-10 17:59:41.911 | DEBUG    | pkg.multi_partner_learning:compute_test_score:197 -          -> Early stopping criteria are not met, continuing with training.
2020-08-10 17:59:41.970 | DEBUG    | pkg.multi_partner_learning:compute_collaborative_round_fedavg:259 - Start new fedavg collaborative round ...
2020-08-10 17:59:41.971 | DEBUG    | pkg.multi_partner_learning:compute_collabor

0

# Results

In [33]:
df_results = current_scenario.to_dataframe()
df_results["random_state"] = 1
df_results["scenario_id"] = 1
print(df_results.columns)

Index(['aggregation_weighting', 'dataset_fraction_per_partner', 'dataset_name',
       'epoch_count', 'final_relative_nb_samples',
       'gradient_updates_per_pass_count', 'is_early_stopping',
       'learning_computation_time_sec', 'minibatch_count',
       'mpl_nb_epochs_done', 'mpl_test_score',
       'multi_partner_learning_approach', 'nb_samples_used', 'partners_count',
       'samples_split_description', 'scenario_name', 'short_scenario_name',
       'test_data_samples_count', 'train_data_samples_count', 'random_state',
       'scenario_id'],
      dtype='object')


In [34]:
print(df_results.mpl_test_score)

0    0.9815
Name: mpl_test_score, dtype: float64


In [35]:
print(current_scenario.mpl)

<pkg.multi_partner_learning.MultiPartnerLearning object at 0x000001DAAE92A408>


# Extract model 

In [36]:
model = current_scenario.mpl.get_model()

In [37]:
model.evaluate(X_test, preprocess_dataset_labels(y_test))



[0.055614393090084195, 0.9815000295639038]

Seems nice as a federated learning result