In [None]:
import pandas as pd
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_federated as tff
from sklearn.preprocessing import StandardScaler

In [None]:
# do this if you are running on local jupyter -> https://github.com/tensorflow/federated/issues/842
!pip install nest_asyncio
import nest_asyncio
nest_asyncio.apply()

In [None]:
dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")

column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight', 'Acceleration', 'Model Year', 'Origin']

raw_dataset = pd.read_csv(dataset_path, names=column_names,
                      na_values = "?", comment='\t',
                      sep=" ", skipinitialspace=True)

df = raw_dataset.copy()
df = df.dropna()

In [None]:
df.head()

In [None]:
import seaborn as sns
sns.pairplot(df[["MPG", "Cylinders", "Displacement", "Weight", "Origin"]], diag_kind="kde", hue='Origin', 
             plot_kws = {'alpha': 0.6, 's': 80, 'edgecolor': 'k'})

In [None]:
to_be_normalized_columns = ['Cylinders', 'Displacement', 'Horsepower', 'Weight',
                            'Acceleration', 'Model Year']

target_column = "MPG"

standard_scaler_x = StandardScaler(with_mean=True, with_std=True)
df[to_be_normalized_columns + [target_column]
   ] = standard_scaler_x.fit_transform(df[to_be_normalized_columns + [target_column]])

In [None]:
import collections
from sklearn.model_selection import train_test_split

NUM_EPOCHS = 5
BATCH_SIZE = 20
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10

def preprocess(dataset):

  def batch_format_fn(element):
      return collections.OrderedDict(x=element['x'], y=element['y'])

  return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER).batch(
      BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)

# produce datasets for each origin
def make_federated_data():
    dfs = [x for _, x in df.groupby('Origin')]
    train_datasets = []
    test_datasets = []

    for dataframe in dfs:
        target = dataframe.pop('MPG')
        dataframe.pop("Origin")
        
        X_train, X_test, y_train, y_test = train_test_split(dataframe.values,
                                                            target.values,
                                                            test_size=0.2,
                                                            random_state=42)

        train_dataset = tf.data.Dataset.from_tensor_slices(
            ({'x': X_train, 'y': y_train}))
        
        test_dataset = tf.data.Dataset.from_tensor_slices(
            ({'x': X_test, 'y': y_test}))

        preprocessed_train_dataset = preprocess(train_dataset)
        preprocessed_test_dataset = preprocess(test_dataset)

        train_datasets.append(preprocessed_train_dataset)
        test_datasets.append(preprocessed_test_dataset)
        
    return train_datasets, test_datasets


In [None]:
train_datasets, test_datasets = make_federated_data()

In [None]:
def build_model():
  model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=[6]),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
  ])
  return model

In [None]:
def create_tff_model():
  return tff.learning.from_keras_model(build_model(), 
                                       input_spec=train_datasets[0].element_spec,
                                       loss=tf.keras.losses.MeanSquaredError(),
                                       metrics=[tf.keras.metrics.MeanSquaredError()])

In [None]:
print("Create averaging process")
iterative_process = tff.learning.build_federated_averaging_process(model_fn=create_tff_model,
                                                                   client_optimizer_fn = lambda: tf.keras.optimizers.SGD(0.002))

In [None]:
print("Initzialize averaging process")
state = iterative_process.initialize()

print("Start iterations")
for _ in range(10):
  state, metrics = iterative_process.next(state, train_datasets)
  print('metrics={}'.format(metrics))

In [None]:
# Global model evaluated over all clients
evaluation = tff.learning.build_federated_evaluation(model_fn=create_tff_model)
test_metrics = evaluation(state.model, test_datasets)
print(test_metrics)

In [None]:
# Global model evaluated per individual client
for i in range(len(test_datasets)):
    test_metrics = evaluation(state.model, [test_datasets[i]])
    print(test_metrics)