<a href="https://colab.research.google.com/github/AnastasiaBrinati/Progetto-ML-23-24/blob/main/task1_federato.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import tensorflow as tf

!pip install -q flwr["simulation"] tensorflow
!pip install -q flwr_datasets["vision"]
!pip install matplotlib

from typing import Dict, List, Tuple
import flwr as fl
from flwr.common import Metrics
import tensorflow as tf
from tensorflow.keras.initializers import he_normal, glorot_normal
from tensorflow.keras.layers import Input, Dense, concatenate, Flatten
!pip install tensorflow-addons
from tensorflow_addons.metrics import F1Score
from flwr.simulation.ray_transport.utils import enable_tf_gpu_growth
from datasets import Dataset
from flwr_datasets import FederatedDataset
from keras.initializers import RandomNormal, he_normal, glorot_normal, he_uniform, glorot_uniform



In [18]:
# Simulation parameters
settings = {
    "VERBOSE": 0,
    "NUM_CLIENTS": 100,
    "FRACTION_FIT": 0.8,
    "FRACTION_EVALUATE": 0.2,
    "MIN_FIT_CLIENTS": 5,
    "MIN_EVALUATE_CLIENTS": 2,
    "MIN_AVAILABLE_CLIENTS_FRACTION": 0.5,
    "STRATEGY_TYPE": 'QFedAvg', # QFedAvg , FedAvg, FedProx
    "NUM_ROUNDS": 10
}

In [19]:

def get_model():
    """Constructs a model suitable for features."""
    # Define input layers
    Support_Calls_input = Input(shape=(1,), name='Support Calls')
    Payment_Delay_input = Input(shape=(1,), name='Payment Delay')
    Usage_Frequency_input = Input(shape=(1,), name='Usage Frequency')
    Total_Spend_input = Input(shape=(1,), name='Total Spend')
    Last_Interaction_input = Input(shape=(1,), name='Last Interaction')
    Age_18_25_input = Input(shape=(1,), name='Age_binned_(18.0, 25.0]')
    Age_25_35_input = Input(shape=(1,), name='Age_binned_(25.0, 35.0]')
    Age_35_50_input = Input(shape=(1,), name='Age_binned_(35.0, 50.0]')
    Age_50_60_input = Input(shape=(1,), name='Age_binned_(50.0, 60.0]')
    Age_60_70_input = Input(shape=(1,), name='Age_binned_(60.0, 70.0]')
    Age_nan_input = Input(shape=(1,), name='Age_binned_nan')
    Contract_Length_Annual_input = Input(shape=(1,), name='Contract Length_Annual')
    Contract_Length_Monthly_input = Input(shape=(1,), name='Contract Length_Monthly')
    Contract_Length_Quarterly_input = Input(shape=(1,), name='Contract Length_Quarterly')
    Contract_Length_nan_input = Input(shape=(1,), name='Contract Length_nan')
    Gender_Female_input = Input(shape=(1,), name='Gender_Female')
    Gender_Male_input = Input(shape=(1,), name='Gender_Male')
    Gender_nan_input = Input(shape=(1,), name='Gender_nan')
    Subscription_Type_Basic_input = Input(shape=(1,), name='Subscription Type_Basic')
    Subscription_Type_Premium_input = Input(shape=(1,), name='Subscription Type_Premium')
    Subscription_Type_Standard_input = Input(shape=(1,), name='Subscription Type_Standard')
    Subscription_Type_nan_input = Input(shape=(1,), name='Subscription Type_nan')

    # Concatenate the input tensors
    concatenated_inputs = concatenate([
        Support_Calls_input, Payment_Delay_input, Usage_Frequency_input, Total_Spend_input, Last_Interaction_input,
        Age_18_25_input, Age_25_35_input, Age_35_50_input, Age_50_60_input, Age_60_70_input, Age_nan_input,
        Contract_Length_Annual_input, Contract_Length_Monthly_input, Contract_Length_Quarterly_input, Contract_Length_nan_input,
        Gender_Female_input, Gender_Male_input, Gender_nan_input,
        Subscription_Type_Basic_input, Subscription_Type_Premium_input, Subscription_Type_Standard_input, Subscription_Type_nan_input
    ])

    # Define the rest of the model
    x = Flatten()(concatenated_inputs)
    x = Dense(32, activation="relu", kernel_initializer=he_normal())(x)
    x = Dense(96, activation="PReLU", kernel_initializer=he_normal())(x)
    x = Dense(128, activation="tanh", kernel_initializer=glorot_normal())(x)
    x = Dense(96, activation="sigmoid", kernel_initializer=glorot_normal())(x)
    output = Dense(1, activation="sigmoid", kernel_initializer=glorot_normal())(x)

    # Construct the model
    model = tf.keras.models.Model(inputs=[
        Support_Calls_input, Payment_Delay_input, Usage_Frequency_input, Total_Spend_input, Last_Interaction_input,
        Age_18_25_input, Age_25_35_input, Age_35_50_input, Age_50_60_input, Age_60_70_input, Age_nan_input,
        Contract_Length_Annual_input, Contract_Length_Monthly_input, Contract_Length_Quarterly_input, Contract_Length_nan_input,
        Gender_Female_input, Gender_Male_input, Gender_nan_input,
        Subscription_Type_Basic_input, Subscription_Type_Premium_input, Subscription_Type_Standard_input, Subscription_Type_nan_input
    ], outputs=output)

    # Compile the model with accuracy, precision, recall, and F1-score metrics
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy", tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
    return model


In [20]:
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, trainset, valset) -> None:
        # Create model
        self.model = get_model()
        self.trainset = trainset
        self.valset = valset

    def get_parameters(self, config):
        return self.model.get_weights()

    def fit(self, parameters, config):
        self.model.set_weights(parameters)
        self.model.fit(self.trainset, epochs=1, verbose=settings['VERBOSE'])
        return self.model.get_weights(), len(self.trainset), {}

    def evaluate(self, parameters, config):
        self.model.set_weights(parameters)
        results = self.model.evaluate(self.valset)

        loss = results[0]
        accuracy = results[1]
        precision = results[2]
        recall = results[3]
        #f1_score = results[4]

        # Convert metrics results to appropriate data types
        loss = float(loss)
        accuracy = float(accuracy)
        precision = float(precision)
        recall = float(recall)
        #f1_score = float(f1_score)


        return loss, len(self.valset), {"accuracy": accuracy, "precision":precision, "recall":recall}

In [21]:
# Download MNIST dataset and partition it
mnist_fds = FederatedDataset(dataset="giulioappetito/churn_dataset_giulioappetito", partitioners={"train": settings['NUM_CLIENTS']})
# Get the whole test set for centralised evaluation
centralized_testset = mnist_fds.load_full("test").to_tf_dataset(
  columns=[
            'Support Calls',
            'Payment Delay',
            'Usage Frequency',
            'Total Spend',
            'Last Interaction',
            'Age_binned_(18.0, 25.0]',
            'Age_binned_(25.0, 35.0]',
            'Age_binned_(35.0, 50.0]',
            'Age_binned_(50.0, 60.0]',
            'Age_binned_(60.0, 70.0]',
            'Age_binned_nan',
            'Contract Length_Annual',
            'Contract Length_Monthly',
            'Contract Length_Quarterly',
            'Contract Length_nan',
            'Gender_Female',
            'Gender_Male',
            'Gender_nan',
            'Subscription Type_Basic',
            'Subscription Type_Premium',
            'Subscription Type_Standard',
            'Subscription Type_nan'
        ],
        label_cols="Churn", batch_size=64
    )



In [22]:
# @title
def get_client_fn(dataset: FederatedDataset):
    """Return a function to construct a client.

    The VirtualClientEngine will execute this function whenever a client is sampled by
    the strategy to participate.
    """

    def client_fn(cid: str) -> fl.client.Client:
        """Construct a FlowerClient with its own dataset partition."""

        # Extract partition for client with id = cid
        client_dataset = dataset.load_partition(int(cid), "train")

        # Now let's split it into train (90%) and validation (10%)
        client_dataset_splits = client_dataset.train_test_split(test_size=0.1)
        trainset = client_dataset_splits["train"].to_tf_dataset(
            columns=[
                'Support Calls',
                'Payment Delay',
                'Usage Frequency',
                'Total Spend',
                'Last Interaction',
                'Age_binned_(18.0, 25.0]',
                'Age_binned_(25.0, 35.0]',
                'Age_binned_(35.0, 50.0]',
                'Age_binned_(50.0, 60.0]',
                'Age_binned_(60.0, 70.0]',
                'Age_binned_nan',
                'Contract Length_Annual',
                'Contract Length_Monthly',
                'Contract Length_Quarterly',
                'Contract Length_nan',
                'Gender_Female',
                'Gender_Male',
                'Gender_nan',
                'Subscription Type_Basic',
                'Subscription Type_Premium',
                'Subscription Type_Standard',
                'Subscription Type_nan'
            ],
            label_cols="Churn", batch_size=32
        )
        valset = client_dataset_splits["test"].to_tf_dataset(
            columns=[
                'Support Calls',
                'Payment Delay',
                'Usage Frequency',
                'Total Spend',
                'Last Interaction',
                'Age_binned_(18.0, 25.0]',
                'Age_binned_(25.0, 35.0]',
                'Age_binned_(35.0, 50.0]',
                'Age_binned_(50.0, 60.0]',
                'Age_binned_(60.0, 70.0]',
                'Age_binned_nan',
                'Contract Length_Annual',
                'Contract Length_Monthly',
                'Contract Length_Quarterly',
                'Contract Length_nan',
                'Gender_Female',
                'Gender_Male',
                'Gender_nan',
                'Subscription Type_Basic',
                'Subscription Type_Premium',
                'Subscription Type_Standard',
                'Subscription Type_nan'
            ],
            label_cols="Churn", batch_size=64
        )

        # Create and return client
        return FlowerClient(trainset, valset).to_client()

    return client_fn


def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics:
    """Aggregation function for (federated) evaluation metrics, i.e. those returned by
    the client's evaluate() method."""
    # Multiply accuracy of each client by number of examples used
    accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics]
    precisions = [num_examples * m["precision"] for num_examples, m in metrics]
    recalls = [num_examples * m["recall"] for num_examples, m in metrics]
    #f1_scores = [num_examples * m["f1_score"] for num_examples, m in metrics]
    examples = [num_examples for num_examples, _ in metrics]

    # Aggregate and return custom metric (weighted average)
    return {"accuracy": sum(accuracies) / sum(examples), "precision":sum(precisions) / sum(examples),"recall":sum(recalls) / sum(examples)}


def get_evaluate_fn(testset: Dataset):
    """Return an evaluation function for server-side (i.e. centralised) evaluation."""

    # The `evaluate` function will be called after every round by the strategy
    def evaluate(
        server_round: int,
        parameters: fl.common.NDArrays,
        config: Dict[str, fl.common.Scalar],
    ):
        model = get_model()  # Construct the model
        model.set_weights(parameters)  # Update model with the latest parameters
        results = model.evaluate(testset)
        loss = results[0]
        return loss, {"accuracy": results[1], "precision":results[2], "recall":results[3]}

    return evaluate


In [None]:
# Create FedAvg strategy
strategy_FedOptim = fl.server.strategy.FedProx(
    proximal_mu=1.0,
    fraction_fit=settings["FRACTION_FIT"],
    fraction_evaluate=settings["FRACTION_EVALUATE"],
    min_fit_clients=settings["MIN_FIT_CLIENTS"],
    min_evaluate_clients=settings["MIN_EVALUATE_CLIENTS"],
    min_available_clients=int(settings["NUM_CLIENTS"] * settings["MIN_AVAILABLE_CLIENTS_FRACTION"]),
    evaluate_metrics_aggregation_fn=weighted_average,
    evaluate_fn=get_evaluate_fn(centralized_testset),
)

strategy_FedProx = fl.server.strategy.FedProx(
    proximal_mu=1.0,
    fraction_fit=settings["FRACTION_FIT"],
    fraction_evaluate=settings["FRACTION_EVALUATE"],
    min_fit_clients=settings["MIN_FIT_CLIENTS"],
    min_evaluate_clients=settings["MIN_EVALUATE_CLIENTS"],
    min_available_clients=int(settings["NUM_CLIENTS"] * settings["MIN_AVAILABLE_CLIENTS_FRACTION"]),
    evaluate_metrics_aggregation_fn=weighted_average,
    evaluate_fn=get_evaluate_fn(centralized_testset),
)

strategy_FedAvg = fl.server.strategy.FedAvg(
    fraction_fit=settings["FRACTION_FIT"],
    fraction_evaluate=settings["FRACTION_EVALUATE"],
    min_fit_clients=settings["MIN_FIT_CLIENTS"],
    min_evaluate_clients=settings["MIN_EVALUATE_CLIENTS"],
    min_available_clients=int(settings["NUM_CLIENTS"] * settings["MIN_AVAILABLE_CLIENTS_FRACTION"]),
    evaluate_metrics_aggregation_fn=weighted_average,
    evaluate_fn=get_evaluate_fn(centralized_testset),
)

# Selezione della strategia
if settings["STRATEGY_TYPE"] == 'FedProx':
    strategy = strategy_FedProx
elif settings["STRATEGY_TYPE"] == 'FedOptim':
    strategy = strategy_FedOptim
else:
    strategy = strategy_FedAvg

# Configurazione delle risorse del client
client_resources = {"num_cpus": 1, "num_gpus": 0.0}

# Avvia la simulazione una volta
history = fl.simulation.start_simulation(
    client_fn=get_client_fn(mnist_fds),
    num_clients=settings["NUM_CLIENTS"],
    config=fl.server.ServerConfig(num_rounds=settings["NUM_ROUNDS"]),
    strategy=strategy,
    client_resources=client_resources,
    actor_kwargs={"on_actor_init_fn": enable_tf_gpu_growth}  # Abilita la crescita della GPU durante l'inizializzazione dell'attore
)

INFO flwr 2024-03-05 16:17:19,439 | app.py:178 | Starting Flower simulation, config: ServerConfig(num_rounds=10, round_timeout=None)
INFO:flwr:Starting Flower simulation, config: ServerConfig(num_rounds=10, round_timeout=None)
2024-03-05 16:17:24,813	INFO worker.py:1621 -- Started a local Ray instance.
INFO flwr 2024-03-05 16:17:26,995 | app.py:213 | Flower VCE: Ray initialized with resources: {'memory': 7895315252.0, 'object_store_memory': 3947657625.0, 'CPU': 2.0, 'node:__internal_head__': 1.0, 'node:172.28.0.12': 1.0}
INFO:flwr:Flower VCE: Ray initialized with resources: {'memory': 7895315252.0, 'object_store_memory': 3947657625.0, 'CPU': 2.0, 'node:__internal_head__': 1.0, 'node:172.28.0.12': 1.0}
INFO flwr 2024-03-05 16:17:26,998 | app.py:219 | Optimize your simulation with Flower VCE: https://flower.dev/docs/framework/how-to-run-simulations.html
INFO:flwr:Optimize your simulation with Flower VCE: https://flower.dev/docs/framework/how-to-run-simulations.html
INFO flwr 2024-03-05 1



INFO flwr 2024-03-05 16:18:03,609 | server.py:94 | initial parameters (loss, other metrics): 1.0796140432357788, {'accuracy': 0.44499316811561584, 'precision': 0.0, 'recall': 0.0}
INFO:flwr:initial parameters (loss, other metrics): 1.0796140432357788, {'accuracy': 0.44499316811561584, 'precision': 0.0, 'recall': 0.0}
INFO flwr 2024-03-05 16:18:03,612 | server.py:104 | FL starting
INFO:flwr:FL starting
DEBUG flwr 2024-03-05 16:18:03,617 | server.py:222 | fit_round 1: strategy sampled 80 clients (out of 100)
DEBUG:flwr:fit_round 1: strategy sampled 80 clients (out of 100)
DEBUG flwr 2024-03-05 16:20:53,998 | server.py:236 | fit_round 1 received 80 results and 0 failures
DEBUG:flwr:fit_round 1 received 80 results and 0 failures




INFO flwr 2024-03-05 16:21:12,599 | server.py:125 | fit progress: (1, 0.3350878357887268, {'accuracy': 0.8784465789794922, 'precision': 0.885421633720398, 'recall': 0.8970737457275391}, 188.98185028599983)
INFO:flwr:fit progress: (1, 0.3350878357887268, {'accuracy': 0.8784465789794922, 'precision': 0.885421633720398, 'recall': 0.8970737457275391}, 188.98185028599983)
DEBUG flwr 2024-03-05 16:21:12,603 | server.py:173 | evaluate_round 1: strategy sampled 20 clients (out of 100)
DEBUG:flwr:evaluate_round 1: strategy sampled 20 clients (out of 100)


1/7 [===>..........................] - ETA: 5s - loss: 0.2049 - accuracy: 0.9531 - precision_42: 0.9231 - recall_42: 1.0000
1/7 [===>..........................] - ETA: 4s - loss: 0.2208 - accuracy: 0.9531 - precision_43: 0.9394 - recall_43: 0.9688[32m [repeated 5x across cluster][0m


DEBUG flwr 2024-03-05 16:21:27,905 | server.py:187 | evaluate_round 1 received 20 results and 0 failures
DEBUG:flwr:evaluate_round 1 received 20 results and 0 failures
DEBUG flwr 2024-03-05 16:21:27,908 | server.py:222 | fit_round 2: strategy sampled 80 clients (out of 100)
DEBUG:flwr:fit_round 2: strategy sampled 80 clients (out of 100)


[2m[36m(DefaultActor pid=67532)[0m 1/7 [===>..........................] - ETA: 4s - loss: 0.2438 - accuracy: 0.8906 - precision_48: 0.9500 - recall_48: 0.8837[32m [repeated 6x across cluster][0m


DEBUG flwr 2024-03-05 16:24:22,281 | server.py:236 | fit_round 2 received 80 results and 0 failures
DEBUG:flwr:fit_round 2 received 80 results and 0 failures




INFO flwr 2024-03-05 16:24:41,583 | server.py:125 | fit progress: (2, 0.299630343914032, {'accuracy': 0.8940044641494751, 'precision': 0.8846460580825806, 'recall': 0.9303304553031921}, 397.9665921709993)
INFO:flwr:fit progress: (2, 0.299630343914032, {'accuracy': 0.8940044641494751, 'precision': 0.8846460580825806, 'recall': 0.9303304553031921}, 397.9665921709993)
DEBUG flwr 2024-03-05 16:24:41,587 | server.py:173 | evaluate_round 2: strategy sampled 20 clients (out of 100)
DEBUG:flwr:evaluate_round 2: strategy sampled 20 clients (out of 100)


[2m[36m(DefaultActor pid=67534)[0m 1/7 [===>..........................] - ETA: 4s - loss: 0.3058 - accuracy: 0.9219 - precision_91: 0.8837 - recall_91: 1.0000[32m [repeated 2x across cluster][0m
1/7 [===>..........................] - ETA: 6s - loss: 0.1621 - accuracy: 0.9531 - precision_93: 1.0000 - recall_93: 0.9167[32m [repeated 7x across cluster][0m
1/7 [===>..........................] - ETA: 4s - loss: 0.3444 - accuracy: 0.8750 - precision_98: 0.8537 - recall_98: 0.9459[32m [repeated 5x across cluster][0m


DEBUG flwr 2024-03-05 16:24:56,830 | server.py:187 | evaluate_round 2 received 20 results and 0 failures
DEBUG:flwr:evaluate_round 2 received 20 results and 0 failures
DEBUG flwr 2024-03-05 16:24:56,833 | server.py:222 | fit_round 3: strategy sampled 80 clients (out of 100)
DEBUG:flwr:fit_round 3: strategy sampled 80 clients (out of 100)




DEBUG flwr 2024-03-05 16:28:02,313 | server.py:236 | fit_round 3 received 80 results and 0 failures
DEBUG:flwr:fit_round 3 received 80 results and 0 failures




INFO flwr 2024-03-05 16:28:22,021 | server.py:125 | fit progress: (3, 0.28421550989151, {'accuracy': 0.9013083577156067, 'precision': 0.8884526491165161, 'recall': 0.9402271509170532}, 618.4045050459999)
INFO:flwr:fit progress: (3, 0.28421550989151, {'accuracy': 0.9013083577156067, 'precision': 0.8884526491165161, 'recall': 0.9402271509170532}, 618.4045050459999)
DEBUG flwr 2024-03-05 16:28:22,026 | server.py:173 | evaluate_round 3: strategy sampled 20 clients (out of 100)
DEBUG:flwr:evaluate_round 3: strategy sampled 20 clients (out of 100)


[2m[36m(DefaultActor pid=67534)[0m 1/7 [===>..........................] - ETA: 3s - loss: 0.1792 - accuracy: 0.9375 - precision_141: 0.9259 - recall_141: 0.9259[32m [repeated 4x across cluster][0m
1/7 [===>..........................] - ETA: 4s - loss: 0.3431 - accuracy: 0.8438 - precision_145: 0.8250 - recall_145: 0.9167[32m [repeated 5x across cluster][0m
1/7 [===>..........................] - ETA: 4s - loss: 0.1790 - accuracy: 0.9531 - precision_148: 0.9250 - recall_148: 1.0000[32m [repeated 5x across cluster][0m


DEBUG flwr 2024-03-05 16:28:37,140 | server.py:187 | evaluate_round 3 received 20 results and 0 failures
DEBUG:flwr:evaluate_round 3 received 20 results and 0 failures
DEBUG flwr 2024-03-05 16:28:37,146 | server.py:222 | fit_round 4: strategy sampled 80 clients (out of 100)
DEBUG:flwr:fit_round 4: strategy sampled 80 clients (out of 100)




In [None]:
import matplotlib.pyplot as plt
from tabulate import tabulate

# I dati dell'accuratezza
print(f"{history.metrics_centralized = }")
global_accuracy_centralised = history.metrics_centralized["accuracy"]
global_recall_centralised = history.metrics_centralized["recall"]
global_precision_centralised = history.metrics_centralized["precision"]


rounds = [data[0] for data in global_accuracy_centralised]
accuracies = [data[1] for data in global_accuracy_centralised]
recalls = [data[1] for data in global_recall_centralised]
precisions = [data[1] for data in global_precision_centralised]
losses = [t[1] for t in history.losses_centralized]

# Calcoliamo gli F1 score
f1_scores = []
for precision, recall in zip(precisions, recalls):
    if precision == 0 or recall == 0:
        f1_scores.append(0)
    else:
        f1_scores.append(2 * (precision * recall) / (precision + recall))

# Plot
plt.plot(rounds, accuracies, label="Accuracy")
plt.plot(rounds, recalls, label="Recall")
plt.plot(rounds, precisions, label="Precision")
plt.plot(rounds, f1_scores, label="F1 Score")

plt.grid()
plt.ylabel("Percentage")
plt.xlabel("Round")
plt.title(str(settings['NUM_CLIENTS']) + ' clients with ' + str(int(settings['FRACTION_FIT']*settings['NUM_CLIENTS'])) + ' clients per round, with strategy ' + settings['STRATEGY_TYPE'])
plt.legend()
plt.show()

# Tabella
data_table = [["Round", "Loss", "Accuracy (%)", "Precision", "Recall", "F1_score"]] + [[r, l, a, p, rec, f1] for r, l, a, p, rec, f1 in zip(rounds, losses, accuracies, precisions, recalls, f1_scores)]
table = tabulate(data_table, headers="firstrow", tablefmt="fancy_grid")
print(table)

print(losses)