# Import Libraries

In [24]:
# !pip install --quiet --upgrade tensorflow-federated
!pip install --quiet tensorflow-federated==0.20.0
!pip install --quiet --upgrade nest-asyncio

import nest_asyncio
nest_asyncio.apply()

In [25]:
import matplotlib.pyplot as plt


In [26]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_federated as tff



SEED = 0
tf.random.set_seed(SEED)
np.random.seed(0)

tff.federated_computation(lambda: 'Hello, World!')()

b'Hello, World!'

In [27]:
from tensorflow import keras
from keras.metrics import BinaryAccuracy, Precision, Recall
from keras import layers

from sklearn.model_selection import GroupShuffleSplit

# Load Data


In [28]:
path = "/content/drive/MyDrive/Thesis/Datasets/Turbofan_Dataset/final_datasets_normalized/"

In [29]:
# Load data and drop irrelevant columns

alice_set = pd.read_csv(path + "TRAINING_SET_1.csv")
bob_set = pd.read_csv(path + "TRAINING_SET_2.csv")

test_set = pd.read_csv(path + "TEST_SET_FULL.csv")


drop_cols = ["cycle","setting3","s1","s5","s10","s16","s18","s19","RUL"]
corr_cols = ["s11","s4","s15","s17","s2","s3","s8","s13","s9","s14","s12","s7","s20"]
feature_cols = ['cycle_norm', 'setting1', 'setting2', 's2', 's3', 's4', 's6', 's7',
       's8', 's9', 's11', 's12', 's13', 's14', 's15', 's17', 's20', 's21']
prediction_col = 'fail_30'

alice_set = alice_set.drop(drop_cols, axis=1)
bob_set = bob_set.drop(drop_cols, axis=1)

test_set = test_set.drop(drop_cols, axis=1)

In [30]:
# Move cycle_norm column first for convenience

column_to_move = alice_set.pop("cycle_norm")
alice_set.insert(0, "cycle_norm", column_to_move)
column_to_move = bob_set.pop("cycle_norm")
bob_set.insert(0, "cycle_norm", column_to_move)

column_to_move = test_set.pop("cycle_norm")
test_set.insert(0, "cycle_norm", column_to_move)

In [31]:
X_alice = alice_set.iloc[:, 0:-1].to_numpy()
y_alice = alice_set.iloc[:, -1].to_numpy()

X_bob = bob_set.iloc[:, 0:-1].to_numpy()
y_bob = bob_set.iloc[:, -1].to_numpy()

X_test = test_set.iloc[:, 0:-1].to_numpy()
y_test = test_set.iloc[:, -1].to_numpy()

In [32]:
X_alice.shape

(9909, 19)

# Hyperparameters

In [33]:
# BATCH_SIZE = 16
# GLOBAL_EPOCHS = 10
# LOCAL_EPOCHS = 25

BATCH_SIZE = 32
GLOBAL_EPOCHS = 15
LOCAL_EPOCHS = 5

# Federated Dataset

In [34]:
# Use 80% for training and 20% for validation
alice_split = round(X_alice.shape[0] * 0.8)
bob_split = round(X_bob.shape[0] * 0.8)

In [35]:
np.concatenate([X_alice[alice_split:],X_bob[bob_split:]]).shape

(4126, 19)

In [36]:
np.concatenate([y_alice[alice_split:],y_bob[bob_split:]]).shape

(4126,)

In [37]:
train_data, test_data, val_data = [], [], []

temp_dataset = tf.data.Dataset.from_tensor_slices((X_alice[:alice_split], y_alice[:alice_split])).repeat(LOCAL_EPOCHS).batch(BATCH_SIZE)
train_data.append(temp_dataset)
temp_dataset = tf.data.Dataset.from_tensor_slices((X_bob[:bob_split], y_bob[:bob_split])).repeat(LOCAL_EPOCHS).batch(BATCH_SIZE)
train_data.append(temp_dataset)


temp_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_data.append(temp_dataset.batch(1))

X_val = np.concatenate([X_alice[alice_split:],X_bob[bob_split:]])
y_val = np.concatenate([y_alice[alice_split:],y_bob[bob_split:]])

temp_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
val_data.append(temp_dataset.batch(BATCH_SIZE))


In [38]:
test_data

[<BatchDataset element_spec=(TensorSpec(shape=(None, 19), dtype=tf.float64, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>]

In [39]:
val_data

[<BatchDataset element_spec=(TensorSpec(shape=(None, 19), dtype=tf.float64, name=None), TensorSpec(shape=(None,), dtype=tf.int64, name=None))>]

# Define model


In [40]:
def input_spec():
    return (
        tf.TensorSpec([None, 19], tf.float64),
        tf.TensorSpec([None], tf.int64)
    )

def model_fn():
    model = tf.keras.models.Sequential([
        tf.keras.layers.InputLayer(input_shape=(19)),                                       
        tf.keras.layers.Dense(64, activation="relu"),
        # tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dense(64, activation="relu"),
        # tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    return tff.learning.from_keras_model(
        model,
        input_spec=input_spec(),
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=[BinaryAccuracy(), Precision(), Recall()])

# Training

In [41]:
evaluator = tff.learning.build_federated_evaluation(model_fn)

In [42]:
trainer = tff.learning.build_federated_averaging_process(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.Adam(),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD( learning_rate=0.1)

)

state = trainer.initialize()
train_hist = []
losses = []
accs = []

val_losses = []
val_accs = []
for i in range(GLOBAL_EPOCHS):
    state, metrics = trainer.next(state, train_data)
    train_hist.append(metrics)
    losses.append(metrics['train']['loss'])
    accs.append(metrics['train']['binary_accuracy'])

    val_metrics = evaluator(state.model, val_data)
    val_losses.append(val_metrics['eval']['loss'])
    val_accs.append(val_metrics['eval']['binary_accuracy'])

    print(f"\rRun {i+1}/{GLOBAL_EPOCHS} _ loss={metrics['train']['loss']} _ acc={metrics['train']['binary_accuracy']}")

Run 1/15 _ loss=0.26648131012916565 _ acc=0.8951865434646606
Run 2/15 _ loss=0.24569252133369446 _ acc=0.9034326076507568
Run 3/15 _ loss=0.229119211435318 _ acc=0.90958571434021
Run 4/15 _ loss=0.21686752140522003 _ acc=0.9145979285240173
Run 5/15 _ loss=0.20618018507957458 _ acc=0.9182945489883423
Run 6/15 _ loss=0.19735579192638397 _ acc=0.9221511483192444
Run 7/15 _ loss=0.18926560878753662 _ acc=0.9256298542022705
Run 8/15 _ loss=0.18393777310848236 _ acc=0.9274830222129822
Run 9/15 _ loss=0.17887438833713531 _ acc=0.9307291507720947
Run 10/15 _ loss=0.17566192150115967 _ acc=0.9317198991775513
Run 11/15 _ loss=0.17297042906284332 _ acc=0.9322189688682556
Run 12/15 _ loss=0.17015764117240906 _ acc=0.9343482851982117
Run 13/15 _ loss=0.16692453622817993 _ acc=0.9357315897941589
Run 14/15 _ loss=0.1649799346923828 _ acc=0.9363493323326111
Run 15/15 _ loss=0.1624126434326172 _ acc=0.9371148347854614


In [43]:
train_metrics = evaluator(state.model, train_data)
train_metrics

OrderedDict([('eval',
              OrderedDict([('binary_accuracy', 0.8599612),
                           ('precision', 0.53045326),
                           ('recall', 0.6033025),
                           ('loss', 0.31185567),
                           ('num_examples', 82525),
                           ('num_batches', 2580)]))])

In [44]:
train_metrics['eval']

OrderedDict([('binary_accuracy', 0.8599612),
             ('precision', 0.53045326),
             ('recall', 0.6033025),
             ('loss', 0.31185567),
             ('num_examples', 82525),
             ('num_batches', 2580)])

In [45]:
test_metrics = evaluator(state.model, test_data)
test_metrics

OrderedDict([('eval',
              OrderedDict([('binary_accuracy', 0.95983505),
                           ('precision', 0.27752292),
                           ('recall', 0.36445785),
                           ('loss', 0.13704874),
                           ('num_examples', 13096),
                           ('num_batches', 13096)]))])

In [46]:
type(state.model)

tensorflow_federated.python.learning.model_utils.ModelWeights