In [1]:
import pandas as pd
import os
import nest_asyncio
nest_asyncio.apply()
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import tensorflow_federated as tff
from tensorflow.keras.metrics import BinaryAccuracy, Precision, Recall
SEED = 1337
tf.random.set_seed(SEED)

# Get the current working directory
current_dir = os.getcwd()

# Change the working directory to the parent directory of the current directory
os.chdir(os.path.join(current_dir, '..'))
# Enter path to the data_fusion_guest_lecture file
image_folder_path = 'data/seedling_labels_with_features.csv'

# Loads labels
df = pd.read_csv(image_folder_path)

df = df.drop(columns=['Pos','average_expert'])
df = df.iloc[:, -8:]

In [2]:
# group the dataframe by the 'Rfid' column
groups = df.groupby('Rfid')

# create separate dataframes for each group and drop the 'Rfid' column
dfs = []
i = 1
for _, group_data in groups:
    df_name = f"df_{i}"
    globals()[df_name] = group_data.drop('Rfid', axis=1)
    dfs.append(globals()[df_name] )
    i += 1


In [3]:
df['expert_binary'].value_counts()

1    673
0    321
Name: expert_binary, dtype: int64

In [4]:
import nest_asyncio
nest_asyncio.apply()

import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import tensorflow_federated as tff
from tensorflow.keras.metrics import BinaryAccuracy, Precision, Recall

SEED = 1337
tf.random.set_seed(SEED)

In [5]:
BATCH_SIZE = 50


def make_tf_dataset(dataframe, negative_ratio=None, batch_size=None):
    dataset = dataframe

    # Class balancing
    pos_df = dataset[dataset['expert_binary'] == 1]
    neg_df = dataset[dataset['expert_binary'] == 0]
    
    if negative_ratio:
        neg_df = neg_df.iloc[random.sample(range(0, len(neg_df)), len(pos_df)*negative_ratio), :]
    balanced_df = pd.concat([pos_df, neg_df], ignore_index=True, sort=False)

    y = balanced_df.pop('expert_binary')
    
    # Dataset creation
    dataset = tf.data.Dataset.from_tensor_slices((balanced_df.values, y.to_frame().values))
    dataset = dataset.shuffle(2048, seed=SEED)
    if batch_size:
        dataset = dataset.batch(batch_size)

    return dataset

train_data, val_data, scaled_train_dfs, scaled_val_dfs  = [], [], [], []
for client_data in dfs:
    train_df, val_df = train_test_split(client_data, test_size=0.1, random_state=SEED)

    # Scaling (Standardization actually hurts performance) 
    scaler = MinMaxScaler() 
    train_features = scaler.fit_transform(train_df.drop(['expert_binary'], axis=1))
    val_features = scaler.transform(val_df.drop(['expert_binary'], axis=1))

    train_df[train_df.columns.difference(['expert_binary'])] = train_features
    val_df[val_df.columns.difference(['expert_binary'])] = val_features

    # Append scaled train and validation dataframes to their respective lists
    scaled_train_dfs.append(train_df)
    scaled_val_dfs.append(val_df)


    # TF Datasets
    train_data.append(make_tf_dataset(train_df, batch_size=BATCH_SIZE))
    val_data.append(make_tf_dataset(val_df, batch_size=1))

def input_spec():
    return (
        tf.TensorSpec([None, 6], tf.float64),           #you need to change the expected input shape based on the number of features we have
        tf.TensorSpec([None, 1], tf.int64)
    )

def model_fn():
    model = tf.keras.models.Sequential([
        tf.keras.layers.InputLayer(input_shape=(6,)),   #you need to change the input shape based on the number of features we have
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid'),
    ])

        
    return tff.learning.from_keras_model(
        model,
        input_spec=input_spec(),
        loss=tf.keras.losses.BinaryCrossentropy(),
        metrics=[BinaryAccuracy(), Precision(), Recall()])

training_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn,
    client_optimizer_fn=lambda: tf.keras.optimizers.Adam(),
    server_optimizer_fn=lambda: tf.keras.optimizers.Adam()
)

#print(training_process.initialize.type_signature.formatted_representation())

train_state = training_process.initialize()

NUM_ROUNDS = 100
for round_num in range(2, NUM_ROUNDS):
  result = training_process.next(train_state, train_data)
  train_state = result.state
  train_metrics = result.metrics['client_work']['train']
  print('round {:2d}, metrics={}'.format(round_num, train_metrics))

round  2, metrics=OrderedDict([('binary_accuracy', 0.671156), ('precision', 0.671156), ('recall', 1.0), ('loss', 0.583018), ('num_examples', 891), ('num_batches', 24)])
round  3, metrics=OrderedDict([('binary_accuracy', 0.671156), ('precision', 0.671156), ('recall', 1.0), ('loss', 0.58044785), ('num_examples', 891), ('num_batches', 24)])
round  4, metrics=OrderedDict([('binary_accuracy', 0.671156), ('precision', 0.671156), ('recall', 1.0), ('loss', 0.57742196), ('num_examples', 891), ('num_batches', 24)])
round  5, metrics=OrderedDict([('binary_accuracy', 0.671156), ('precision', 0.671156), ('recall', 1.0), ('loss', 0.57489276), ('num_examples', 891), ('num_batches', 24)])
round  6, metrics=OrderedDict([('binary_accuracy', 0.671156), ('precision', 0.671156), ('recall', 1.0), ('loss', 0.57232875), ('num_examples', 891), ('num_batches', 24)])
round  7, metrics=OrderedDict([('binary_accuracy', 0.671156), ('precision', 0.671156), ('recall', 1.0), ('loss', 0.5695993), ('num_examples', 891),

In [6]:
evaluator = tff.learning.build_federated_evaluation(model_fn)

In [7]:
federated_metrics = evaluator(training_process.get_model_weights(train_state), val_data)
federated_metrics

OrderedDict([('eval',
              OrderedDict([('binary_accuracy', 0.93203884),
                           ('precision', 0.94736844),
                           ('recall', 0.96),
                           ('loss', 0.25818038),
                           ('num_examples', 103),
                           ('num_batches', 103)]))])

In [8]:
model_weights = training_process.get_model_weights(train_state)


keras_model = tf.keras.models.Sequential([
    tf.keras.layers.InputLayer(input_shape=(6,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid'),
])

model_weights.assign_weights_to(keras_model)


In [9]:
scaled_train_df = pd.concat(scaled_train_dfs, axis=0)
scaled_val_df = pd.concat(scaled_val_dfs, axis=0)
combined_df = pd.concat([scaled_train_df, scaled_val_df], axis=0)


X_combined = combined_df.drop('expert_binary', axis=1).values


predictions = keras_model.predict(X_combined)


predictions = predictions.flatten()
binary_predictions = (predictions > 0.5).astype(int)
df['federated_predicted'] = binary_predictions



In [10]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(df['federated_predicted'], df['expert_binary'])
accuracy

0.5955734406438632

# checking performance without federated learning

In [81]:
train_data = train_data[0].concatenate(train_data[1])
val_data = val_data[0].concatenate(val_data[1])


In [82]:
EPOCHS = 100
def model_fn():
    model = tf.keras.models.Sequential([
        tf.keras.layers.InputLayer(input_shape=(6,)),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid'),
    ])
    
    model.compile(
        loss=tf.keras.losses.BinaryCrossentropy(),
        optimizer=tf.keras.optimizers.Adam(),
        metrics=[BinaryAccuracy(), Precision(), Recall()],
    )
    
    return model

model = model_fn()
history = model.fit(train_data, epochs=EPOCHS)

test_scores = model.evaluate(val_data)
single_metrics = {
    'loss': test_scores[0],
    'binary_accuracy': test_scores[1],
    'precision': test_scores[2],
    'recall': test_scores[3]
}
single_metrics

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

{'loss': 0.19725465774536133,
 'binary_accuracy': 0.8846153616905212,
 'precision': 0.9444444179534912,
 'recall': 0.8947368264198303}

In [11]:
print(f"---Single model metrics---\n{single_metrics}\n")
print(f"---Federated model metrics---\n{dict(federated_metrics['eval'])}")

---Single model metrics---
{'loss': 0.11078266054391861, 'binary_accuracy': 0.9230769276618958, 'precision': 0.9473684430122375, 'recall': 0.9473684430122375}

---Federated model metrics---
{'binary_accuracy': 0.9223301, 'precision': 0.94666666, 'recall': 0.94666666, 'loss': 0.182077, 'num_examples': 103, 'num_batches': 103}
