In [None]:
# default_exp model
%load_ext lab_black
# nb_black if running in jupyter
%load_ext autoreload
# automatically reload python modules if there are changes in the
%autoreload 2

In [None]:
# hide
from nbdev.showdoc import *

# Model

> In this notebook you create and test a Python class to hold your machine learning model.

***input***: toy dataset from data-notebook

***output***: TFF model for predicting customer paths 

***description:***

Model for simulating federated learning on predicting customer paths.

## Import relevant modules

In [None]:
# tensorflow_federated_nightly also bring in tf_nightly, which
# can causes a duplicate tensorboard install, leading to errors.
#!pip uninstall --yes tensorboard tb-nightly

#!pip install --quiet --upgrade tensorflow-federated-nightly
#!pip install --quiet --upgrade nest-asyncio
#!pip install --quiet --upgrade tb-nightly  # or tensorboard, but not both

In [None]:
# export
import numpy as np

import nest_asyncio

nest_asyncio.apply()

In [None]:
%load_ext tensorboard

In [None]:
import collections

import numpy as np
import tensorflow as tf
import tensorflow_federated as tff

train_test_client_split = tff.simulation.datasets.ClientData.train_test_client_split

tff.federated_computation(lambda: "Hello, World!")()

b'Hello, World!'

In [None]:
from pyarrow import feather
import pandas as pd

In [None]:
from ml_federated_customer_path.data import create_tff_client_data_from_df

## Define notebook parameters

Remember, only simple assignments here!

In [None]:
# this cell is tagged with 'parameters'
seed = 0
data_filepath = "data/preprocessed_data/data.f"
test_split = 0.2
toy_size = 0.1

NUM_CLIENTS = 10
NUM_EPOCHS = 5
BATCH_SIZE = 20
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10

Make immediate derivations from the parameters:

In [None]:
np.random.seed(seed)

## Import toy data for testing

In [None]:
df = feather.read_feather(data_filepath)
df

Unnamed: 0,client_id,x,y
0,0,"[1, 0, 0, 0, 0, 0, 0, 0]",8
1,1,"[1, 0, 0, 0, 0, 0, 0, 0]",6
2,1,"[0, 0, 0, 0, 0, 0, 1, 0]",6
3,1,"[0, 0, 0, 0, 0, 0, 1, 0]",1
4,1,"[0, 1, 0, 0, 0, 0, 0, 0]",2
...,...,...,...
83637,9999,"[0, 1, 0, 0, 0, 0, 0, 0]",1
83638,9999,"[0, 1, 0, 0, 0, 0, 0, 0]",4
83639,9999,"[0, 0, 0, 0, 1, 0, 0, 0]",3
83640,9999,"[0, 0, 0, 1, 0, 0, 0, 0]",1


Convert into tff ClientData (training + testing datasets):

In [None]:
client_data = create_tff_client_data_from_df(df, sample_size=0.1)
train_data, test_data = train_test_client_split(
    client_data, int(toy_size * df.client_id.nunique() * test_split)
)

In [None]:
len(train_data.client_ids)

800

In [None]:
len(test_data.client_ids)

200

Clear dataframe as no longer needed:

In [None]:
df = None

## Explain the math behind

## Explore with simple scripts before constructing the model class

In [None]:
train_data.element_type_structure

OrderedDict([('x', TensorSpec(shape=(None, 8), dtype=tf.uint8, name=None)),
             ('y', TensorSpec(shape=(None,), dtype=tf.int32, name=None))])

In [None]:
example_dataset = train_data.create_tf_dataset_for_client(train_data.client_ids[0])

example_element = next(iter(example_dataset))

example_dataset.element_spec
example_element["x"].numpy()

array([[1, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 0]], dtype=uint8)

## Define you ML model class

Remember that you can also create base class and subclasses to utilize heritance!

In [None]:
def create_keras_model():
    return tf.keras.models.Sequential(
        [
            tf.keras.layers.Dense(64, kernel_initializer="zeros", input_shape=(8,)),
            tf.keras.layers.Softmax(),  # tf.keras.layers.Dense(8, activation="Softmax"),
        ]
    )

In [None]:
def model_fn():
    # We _must_ create a new model here, and _not_ capture it from an external
    # scope. TFF will call this within different graph contexts.
    keras_model = create_keras_model()
    return tff.learning.from_keras_model(
        keras_model,
        input_spec=(
            example_dataset.element_spec["x"],
            example_dataset.element_spec["y"],
        ),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
    )

In [None]:
iterative_process = tff.learning.build_federated_averaging_process(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=0.02),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0),
)

In [None]:
state = iterative_process.initialize()

Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`


Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`


In [None]:
state, metrics = iterative_process.next(state, train_data)
print('round  1, metrics={}'.format(metrics))

TypeError: Expected collections.abc.Sized, found tensorflow_federated.python.simulation.datasets.client_data.ConcreteClientData.

## Unit test

Unit test your class or classes

In [None]:
# your code here

## Visualize model behaviour with toy data

In [None]:
## your code here

## Output of this notebook

The result of this notebook is a collection methods ready for evaluation with the real data.

You should export classes and functions to `model.py` with `# nbdev_build_lib` (workflows will do this automatically).

## You can move on to loss notebook!