# Import Libraries

In [1]:
!pip install --quiet --upgrade tensorflow-federated
!pip install --quiet --upgrade nest-asyncio

import nest_asyncio
nest_asyncio.apply()

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_federated as tff



SEED = 0
tf.random.set_seed(SEED)
np.random.seed(0)

tff.federated_computation(lambda: 'Hello, World!')()

b'Hello, World!'

In [3]:
tff.__version__

'0.20.0'

In [4]:
from tensorflow import keras
from keras.metrics import RootMeanSquaredError
from keras import layers
from sklearn import preprocessing


from sklearn.model_selection import GroupShuffleSplit

import time


# Load data

In [5]:
path = "/content/drive/MyDrive/Thesis/Datasets/Pasteurizer_dataset/"

In [6]:
alice_df = pd.read_csv(path+ "federated/alice_set.csv")
bob_df = pd.read_csv(path+ "federated/bob_set.csv")

test_df = pd.read_csv(path+ "federated/test_set.csv")

In [7]:
alice_df

Unnamed: 0,paster_id,paster_timeslot,prev_water_temp,prev_can_temp,curr_water_temp,bath_number,curr_bath_temp,curr_can_temp
0,129,20,32.84,29.79,34.81,1,40.0312,29.83
1,129,30,34.81,29.83,36.60,1,40.0312,29.88
2,129,40,36.60,29.88,36.28,1,40.0312,29.94
3,129,50,36.28,29.94,35.90,1,40.0062,30.05
4,129,60,35.90,30.05,36.49,1,40.0062,30.22
...,...,...,...,...,...,...,...,...
23351,214,1970,53.63,54.69,53.68,6,43.7688,54.63
23352,214,1980,53.68,54.63,53.72,6,43.9375,54.58
23353,214,1990,53.72,54.58,53.66,6,44.0375,54.53
23354,214,2000,53.66,54.53,53.60,6,44.0438,54.47


In [8]:
bob_df

Unnamed: 0,paster_id,paster_timeslot,prev_water_temp,prev_can_temp,curr_water_temp,bath_number,curr_bath_temp,curr_can_temp
0,152,20,33.97,29.07,36.14,1,40.3312,29.20
1,152,30,36.14,29.20,37.67,1,40.3312,29.39
2,152,40,37.67,29.39,38.40,1,40.3312,29.48
3,152,50,38.40,29.48,38.80,1,40.2812,29.54
4,152,60,38.80,29.54,39.11,1,40.2562,29.58
...,...,...,...,...,...,...,...,...
21902,185,1820,37.14,39.71,37.58,6,36.3437,39.54
21903,185,1830,37.58,39.54,37.72,6,36.4062,39.43
21904,185,1840,37.72,39.43,37.72,6,36.4438,39.31
21905,185,1850,37.72,39.31,37.66,6,36.4688,39.18


In [9]:
test_df

Unnamed: 0,paster_id,paster_timeslot,prev_water_temp,prev_can_temp,curr_water_temp,bath_number,curr_bath_temp,curr_can_temp
0,24,20,36.24,30.32,36.87,1,41.8063,30.38
1,24,30,36.87,30.38,37.21,1,41.8063,30.44
2,24,40,37.21,30.44,38.49,1,41.7813,30.50
3,24,50,38.49,30.50,39.13,1,41.7250,30.58
4,24,60,39.13,30.58,38.45,1,41.6812,30.70
...,...,...,...,...,...,...,...,...
11320,198,1780,36.92,38.66,36.97,6,36.0000,38.60
11321,198,1790,36.97,38.60,37.00,6,36.0000,38.57
11322,198,1800,37.00,38.57,36.97,6,36.0687,38.54
11323,198,1810,36.97,38.54,36.93,6,36.0625,38.52


# Preparing Data

In [10]:
# Train test numpy arrays
X_alice = alice_df.drop(['curr_can_temp','paster_id'], axis=1).values
y_alice = alice_df['curr_can_temp'].values

X_bob = bob_df.drop(['curr_can_temp','paster_id'], axis=1).values
y_bob = bob_df['curr_can_temp'].values


X_test = test_df.drop(['curr_can_temp','paster_id'], axis=1).values
y_test = test_df['curr_can_temp'].values

In [11]:
# Min max normalizaiton

min_max_scaler = preprocessing.MinMaxScaler()

X_alice_norm_ = min_max_scaler.fit_transform(X_alice)
X_bob_norm = min_max_scaler.fit_transform(X_bob)
X_test_norm = min_max_scaler.fit_transform(X_test)


In [12]:
X_alice.shape

(23356, 6)

In [13]:
y_alice.shape

(23356,)

In [14]:
X_test.shape

(11325, 6)

In [15]:
y_alice.shape

(23356,)

In [16]:
y_alice = y_alice.reshape(y_alice.shape[0],1)
y_bob = y_bob.reshape(y_bob.shape[0],1)
y_test = y_test.reshape(y_test.shape[0],1)

In [17]:
y_alice.shape

(23356, 1)

# Hyperparameters

In [18]:
BATCH_SIZE = 16
GLOBAL_EPOCHS = 10
LOCAL_EPOCHS = 10

# Federated Dataset

In [19]:
# y_alice = y_alice.astype(np.int32)
# y_bob = y_bob.astype(np.int32)

In [20]:
train_data, val_data = [], []

temp_dataset = tf.data.Dataset.from_tensor_slices((X_alice, y_alice)).repeat(LOCAL_EPOCHS).batch(BATCH_SIZE)
train_data.append(temp_dataset)
temp_dataset = tf.data.Dataset.from_tensor_slices((X_bob, y_bob)).repeat(LOCAL_EPOCHS).batch(BATCH_SIZE)
train_data.append(temp_dataset)

temp_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
val_data.append(temp_dataset.batch(1))

In [21]:
train_data

[<BatchDataset element_spec=(TensorSpec(shape=(None, 6), dtype=tf.float64, name=None), TensorSpec(shape=(None, 1), dtype=tf.float64, name=None))>,
 <BatchDataset element_spec=(TensorSpec(shape=(None, 6), dtype=tf.float64, name=None), TensorSpec(shape=(None, 1), dtype=tf.float64, name=None))>]

In [22]:
val_data

[<BatchDataset element_spec=(TensorSpec(shape=(None, 6), dtype=tf.float64, name=None), TensorSpec(shape=(None, 1), dtype=tf.float64, name=None))>]

# Create Model

In [23]:
def input_spec():
    return (
        tf.TensorSpec([None, 6], tf.float64),
        tf.TensorSpec([None, 1], tf.float64)
    )

def model_fn():
    model = tf.keras.models.Sequential([
        tf.keras.layers.InputLayer(input_shape=(6,)),                                       
        tf.keras.layers.Dense(32),
        tf.keras.layers.Dense(1),
    ])

    return tff.learning.from_keras_model(
        model,
        input_spec=input_spec(),
        loss=tf.keras.losses.MeanSquaredError(),
        metrics=[RootMeanSquaredError()])

# Training

In [24]:
trainer = tff.learning.build_federated_averaging_process(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.Adam(),
    server_optimizer_fn=lambda: tf.keras.optimizers.Adam(learning_rate = 0.1)
)

state = trainer.initialize()
train_hist = []

start = time.time()
for i in range(GLOBAL_EPOCHS):
    state, metrics = trainer.next(state, train_data)
    train_hist.append(metrics)

    print(f"\rRun {i+1}/{GLOBAL_EPOCHS}", end="")
    print(metrics["train"])
end = time.time()
print(f"Training time {end - start:.2f} sec")

Run 1/10OrderedDict([('root_mean_squared_error', 25.623428), ('loss', 656.5601), ('num_examples', 452630), ('num_batches', 28290)])
Run 2/10OrderedDict([('root_mean_squared_error', 31.57903), ('loss', 997.2351), ('num_examples', 452630), ('num_batches', 28290)])
Run 3/10OrderedDict([('root_mean_squared_error', 18.220875), ('loss', 332.00027), ('num_examples', 452630), ('num_batches', 28290)])
Run 4/10OrderedDict([('root_mean_squared_error', 6.2766643), ('loss', 39.396515), ('num_examples', 452630), ('num_batches', 28290)])
Run 5/10OrderedDict([('root_mean_squared_error', 4.611262), ('loss', 21.263735), ('num_examples', 452630), ('num_batches', 28290)])
Run 6/10OrderedDict([('root_mean_squared_error', 3.6384203), ('loss', 13.238102), ('num_examples', 452630), ('num_batches', 28290)])
Run 7/10OrderedDict([('root_mean_squared_error', 2.6233404), ('loss', 6.881914), ('num_examples', 452630), ('num_batches', 28290)])
Run 8/10OrderedDict([('root_mean_squared_error', 2.4314256), ('loss', 5.91

In [25]:
# train_hist

In [26]:
evaluator = tff.learning.build_federated_evaluation(model_fn)

In [27]:
federated_metrics = evaluator(state.model, val_data)
federated_metrics

OrderedDict([('eval',
              OrderedDict([('root_mean_squared_error', 12.079615),
                           ('loss', 145.9171),
                           ('num_examples', 11325),
                           ('num_batches', 11325)]))])

In [28]:
# state.model