In [1]:
from __future__ import absolute_import, division, print_function
import nest_asyncio

nest_asyncio.apply()

import time
import numpy as np
import tensorflow as tf
import tensorflow_federated as tff
import pandas as pd
import matplotlib.pyplot as plt

from user import User
from average import Average
from graphing import *
from file_related import *
from inits import *
from tensorflow import keras
from sklearn.model_selection import train_test_split

import collections
import warnings

from six.moves import range
import six

SEED = 0

In [2]:
# import os
# os.environ['PYTHONHASHSEED']=str(SEED)
# np.random.seed(SEED)
# import random
# random.seed(SEED)
# tf.set_random_seed(SEED)
# could need to force keras to not use parallelism, see documentation

%load_ext autoreload
%autoreload 2
%matplotlib inline

#@test {"skip": true}

# NOTE: If you are running a Jupyter notebook, and installing a locally built
# pip package, you may need to edit the following to point to the '.whl' file
# on your local filesystem.

# NOTE: The high-performance executor components used in this tutorial are not
# yet included in the released pip package; you may need to compile from source.

# NOTE: Jupyter requires a patch to asyncio.


warnings.simplefilter('ignore')

tf.compat.v1.enable_v2_behavior()

# np.random.seed(0)

# NOTE: If the statement below fails, it means that you are
# using an older version of TFF without the high-performance
# executor stack. Call `tff.framework.set_default_executor()`
# instead to use the default reference runtime.
if six.PY3:
    tff.framework.set_default_executor(tff.framework.create_local_executor())

print(tff.federated_computation(lambda: 'It works!')())

b'It works!'


In [3]:
from tensorflow_federated.python.simulation import hdf5_client_data
# https://github.com/tensorflow/federated/blob/master/tensorflow_federated/python/simulation/hdf5_client_data_test.py
# https://github.com/tensorflow/federated/blob/v0.11.0/tensorflow_federated/python/simulation/hdf5_client_data.py
# http://docs.h5py.org/en/stable/high/group.html#Group.create_dataset
# https://stackoverflow.com/questions/55434004/create-a-custom-federated-data-set-in-tensorflow-federated
# https://stackoverflow.com/questions/58965488/how-to-create-federated-dataset-from-a-csv-file

file = "dataset.hdf5"

df = read_file("../dataset/allUsers.lcl.csv")
NUM_CLIENTS = create_hdf5(df,file,0)

train = hdf5_client_data.HDF5ClientData(file)


In [4]:
NUM_EPOCHS = 16
BATCH_SIZE = 20
SHUFFLE_BUFFER = 0

def preprocess(dataset):

    def element_fn(element):
        return collections.OrderedDict([
            ('x', tf.reshape(element['points'], [-1])),
            ('y', tf.reshape(element['label'], [1])),
        ])
    return dataset.repeat(NUM_EPOCHS).map(element_fn).batch(BATCH_SIZE)
#     return dataset.repeat(NUM_EPOCHS).map(element_fn).shuffle(
#       SHUFFLE_BUFFER).batch(BATCH_SIZE)

def make_federated_data(client_data, client_ids):
    return [preprocess(client_data.create_tf_dataset_for_client(x))
          for x in client_ids]

def model_fn():
    keras_model = init_model()
    return tff.learning.from_compiled_keras_model(keras_model, sample_batch)

sample_clients = train.client_ids[0:NUM_CLIENTS]

federated_train_data = make_federated_data(train, sample_clients)

len(federated_train_data), federated_train_data[0]

(14,
 <BatchDataset shapes: OrderedDict([(x, (None, 36)), (y, (None, 1))]), types: OrderedDict([(x, tf.float32), (y, tf.float32)])>)

In [5]:

example_dataset = train.create_tf_dataset_for_client(
    train.client_ids[11])

example_element = iter(example_dataset).next()

print(example_element['points'].numpy())
print(example_element["label"].numpy())

preprocessed_example_dataset = preprocess(example_dataset)
sample_batch = tf.nest.map_structure(
    lambda x: x.numpy(), iter(preprocessed_example_dataset).next())
# type(train)
# print(sample_batch["y"])

# print(sample_batch["x"].shape)
# print(sample_batch["y"].shape)

[ 72.05118    12.736734  -71.69564    70.23323   -12.197811  -59.39264
  54.848625  -40.39899   -47.726048   26.460789   34.228977  -74.096954
   4.3407264  44.69395   -70.86358     0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.       ]
1.0


In [6]:
iterative_process = tff.learning.build_federated_averaging_process(model_fn)
print(str(iterative_process.initialize.type_signature))
state = iterative_process.initialize()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


( -> <model=<trainable=<sequential/dense/kernel=float32[36,32],sequential/dense/bias=float32[32],sequential/dense_1/kernel=float32[32,6],sequential/dense_1/bias=float32[6]>,non_trainable=<>>,optimizer_state=<int64>,delta_aggregate_state=<>,model_broadcast_state=<>>@SERVER)


In [7]:
state, metrics = iterative_process.next(state, federated_train_data)
print('round  0, metrics={}'.format(metrics))
NUM_ROUNDS = 64
for round_num in range(1, NUM_ROUNDS):
    state, metrics = iterative_process.next(state, federated_train_data)
    print('round {:2d}, metrics={}'.format(round_num, metrics))

round  0, metrics=<sparse_categorical_accuracy=0.8757650852203369,loss=0.8561046719551086>
round  1, metrics=<sparse_categorical_accuracy=0.9087525010108948,loss=0.35225701332092285>
round  2, metrics=<sparse_categorical_accuracy=0.931107223033905,loss=0.27405238151550293>
round  3, metrics=<sparse_categorical_accuracy=0.9360874891281128,loss=0.272421270608902>
round  4, metrics=<sparse_categorical_accuracy=0.9386147856712341,loss=0.24905142188072205>
round  5, metrics=<sparse_categorical_accuracy=0.9421496987342834,loss=0.25311222672462463>
round  6, metrics=<sparse_categorical_accuracy=0.9441064596176147,loss=0.22794567048549652>
round  7, metrics=<sparse_categorical_accuracy=0.9441776871681213,loss=0.23617136478424072>
round  8, metrics=<sparse_categorical_accuracy=0.947548508644104,loss=0.22174479067325592>
round  9, metrics=<sparse_categorical_accuracy=0.9474636912345886,loss=0.2141287624835968>
round 10, metrics=<sparse_categorical_accuracy=0.9430468678474426,loss=0.2213790267705

In [8]:
evaluation = tff.learning.build_federated_evaluation(model_fn)

In [9]:
str(evaluation.type_signature)


'(<<trainable=<sequential/dense/kernel=float32[36,32],sequential/dense/bias=float32[32],sequential/dense_1/kernel=float32[32,6],sequential/dense_1/bias=float32[6]>,non_trainable=<>>@SERVER,{<x=float32[?,36],y=float32[?,1]>*}@CLIENTS> -> <sparse_categorical_accuracy=float32@SERVER,loss=float32@SERVER>)'

In [10]:
train_metrics = evaluation(state.model, federated_train_data)
str(train_metrics)


'<sparse_categorical_accuracy=0.7196143269538879,loss=1.8806164264678955>'