<a href="https://colab.research.google.com/github/AnastasiaBrinati/Progetto-ML-23-24/blob/main/task1_federato.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import tensorflow as tf

!pip install -q flwr["simulation"] tensorflow
!pip install -q flwr_datasets["vision"]
!pip install matplotlib

from typing import Dict, List, Tuple
import flwr as fl
from flwr.common import Metrics
import tensorflow as tf
from tensorflow.keras.initializers import he_normal, glorot_normal
from tensorflow.keras.layers import Input, Dense, concatenate, Flatten
!pip install tensorflow-addons
from tensorflow_addons.metrics import F1Score
from flwr.simulation.ray_transport.utils import enable_tf_gpu_growth
from datasets import Dataset
from flwr_datasets import FederatedDataset
from keras.initializers import RandomNormal, he_normal, glorot_normal, he_uniform, glorot_uniform



In [9]:
# Simulation parameters
settings = {
    "VERBOSE": 0,
    "NUM_CLIENTS": 100,
    "FRACTION_FIT": 0.1,
    "FRACTION_EVALUATE": 0.05,
    "MIN_FIT_CLIENTS": 10,
    "MIN_EVALUATE_CLIENTS": 5,
    "MIN_AVAILABLE_CLIENTS_FRACTION": 0.75,
    "STRATEGY_TYPE": 'FedOptim', # FedOptim, FedAvg, FedProx
    "NUM_ROUNDS": 10
}

In [10]:

def get_model():
    """Constructs a model suitable for features."""
    # Define input layers
    Support_Calls_input = Input(shape=(1,), name='Support Calls')
    Payment_Delay_input = Input(shape=(1,), name='Payment Delay')
    Usage_Frequency_input = Input(shape=(1,), name='Usage Frequency')
    Total_Spend_input = Input(shape=(1,), name='Total Spend')
    Last_Interaction_input = Input(shape=(1,), name='Last Interaction')
    Age_18_25_input = Input(shape=(1,), name='Age_binned_(18.0, 25.0]')
    Age_25_35_input = Input(shape=(1,), name='Age_binned_(25.0, 35.0]')
    Age_35_50_input = Input(shape=(1,), name='Age_binned_(35.0, 50.0]')
    Age_50_60_input = Input(shape=(1,), name='Age_binned_(50.0, 60.0]')
    Age_60_70_input = Input(shape=(1,), name='Age_binned_(60.0, 70.0]')
    Age_nan_input = Input(shape=(1,), name='Age_binned_nan')
    Contract_Length_Annual_input = Input(shape=(1,), name='Contract Length_Annual')
    Contract_Length_Monthly_input = Input(shape=(1,), name='Contract Length_Monthly')
    Contract_Length_Quarterly_input = Input(shape=(1,), name='Contract Length_Quarterly')
    Contract_Length_nan_input = Input(shape=(1,), name='Contract Length_nan')
    Gender_Female_input = Input(shape=(1,), name='Gender_Female')
    Gender_Male_input = Input(shape=(1,), name='Gender_Male')
    Gender_nan_input = Input(shape=(1,), name='Gender_nan')
    Subscription_Type_Basic_input = Input(shape=(1,), name='Subscription Type_Basic')
    Subscription_Type_Premium_input = Input(shape=(1,), name='Subscription Type_Premium')
    Subscription_Type_Standard_input = Input(shape=(1,), name='Subscription Type_Standard')
    Subscription_Type_nan_input = Input(shape=(1,), name='Subscription Type_nan')

    # Concatenate the input tensors
    concatenated_inputs = concatenate([
        Support_Calls_input, Payment_Delay_input, Usage_Frequency_input, Total_Spend_input, Last_Interaction_input,
        Age_18_25_input, Age_25_35_input, Age_35_50_input, Age_50_60_input, Age_60_70_input, Age_nan_input,
        Contract_Length_Annual_input, Contract_Length_Monthly_input, Contract_Length_Quarterly_input, Contract_Length_nan_input,
        Gender_Female_input, Gender_Male_input, Gender_nan_input,
        Subscription_Type_Basic_input, Subscription_Type_Premium_input, Subscription_Type_Standard_input, Subscription_Type_nan_input
    ])

    # Define the rest of the model
    x = Flatten()(concatenated_inputs)
    x = Dense(80, activation="PReLU", kernel_initializer=he_normal())(x)
    x = Dense(64, activation="relu", kernel_initializer=he_normal())(x)
    x = Dense(16, activation="tanh", kernel_initializer=he_normal())(x)
    x = Dense(32, activation="sigmoid", kernel_initializer=glorot_normal())(x)
    output = Dense(1, activation="sigmoid", kernel_initializer=glorot_normal())(x)

    # Construct the model
    model = tf.keras.models.Model(inputs=[
        Support_Calls_input, Payment_Delay_input, Usage_Frequency_input, Total_Spend_input, Last_Interaction_input,
        Age_18_25_input, Age_25_35_input, Age_35_50_input, Age_50_60_input, Age_60_70_input, Age_nan_input,
        Contract_Length_Annual_input, Contract_Length_Monthly_input, Contract_Length_Quarterly_input, Contract_Length_nan_input,
        Gender_Female_input, Gender_Male_input, Gender_nan_input,
        Subscription_Type_Basic_input, Subscription_Type_Premium_input, Subscription_Type_Standard_input, Subscription_Type_nan_input
    ], outputs=output)

    # Compile the model with accuracy, precision, recall, and F1-score metrics
    model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy", tf.keras.metrics.Precision(), tf.keras.metrics.Recall(),tf.keras.metrics.F1Score()])
    return model


In [11]:
class FlowerClient(fl.client.NumPyClient):
    def __init__(self, trainset, valset) -> None:
        # Create model
        self.model = get_model()
        self.trainset = trainset
        self.valset = valset

    def get_parameters(self, config):
        return self.model.get_weights()

    def fit(self, parameters, config):
        self.model.set_weights(parameters)
        self.model.fit(self.trainset, epochs=1, verbose=settings['VERBOSE'])
        return self.model.get_weights(), len(self.trainset), {}

    def evaluate(self, parameters, config):
        self.model.set_weights(parameters)
        results = self.model.evaluate(self.valset)

        loss = results[0]
        accuracy = results[1]
        precision = results[2]
        recall = results[3]
        f1_score = results[4]

        # Convert metrics results to appropriate data types
        loss = float(loss)
        accuracy = float(accuracy)
        precision = float(precision)
        recall = float(recall)
        f1_score = float(f1_score)


        return loss, len(self.valset), {"accuracy": accuracy, "precision":precision, "recall":recall, "f1_score":f1_score}

In [12]:
# Download MNIST dataset and partition it
mnist_fds = FederatedDataset(dataset="giulioappetito/churn_dataset_giulioappetito", partitioners={"train": settings['NUM_CLIENTS']})
# Get the whole test set for centralised evaluation
centralized_testset = mnist_fds.load_full("test").to_tf_dataset(
  columns=[
            'Support Calls',
            'Payment Delay',
            'Usage Frequency',
            'Total Spend',
            'Last Interaction',
            'Age_binned_(18.0, 25.0]',
            'Age_binned_(25.0, 35.0]',
            'Age_binned_(35.0, 50.0]',
            'Age_binned_(50.0, 60.0]',
            'Age_binned_(60.0, 70.0]',
            'Age_binned_nan',
            'Contract Length_Annual',
            'Contract Length_Monthly',
            'Contract Length_Quarterly',
            'Contract Length_nan',
            'Gender_Female',
            'Gender_Male',
            'Gender_nan',
            'Subscription Type_Basic',
            'Subscription Type_Premium',
            'Subscription Type_Standard',
            'Subscription Type_nan'
        ],
        label_cols="Churn", batch_size=64
    )



In [13]:
# @title
def get_client_fn(dataset: FederatedDataset):
    """Return a function to construct a client.

    The VirtualClientEngine will execute this function whenever a client is sampled by
    the strategy to participate.
    """

    def client_fn(cid: str) -> fl.client.Client:
        """Construct a FlowerClient with its own dataset partition."""

        # Extract partition for client with id = cid
        client_dataset = dataset.load_partition(int(cid), "train")

        # Now let's split it into train (90%) and validation (10%)
        client_dataset_splits = client_dataset.train_test_split(test_size=0.1)
        trainset = client_dataset_splits["train"].to_tf_dataset(
            columns=[
                'Support Calls',
                'Payment Delay',
                'Usage Frequency',
                'Total Spend',
                'Last Interaction',
                'Age_binned_(18.0, 25.0]',
                'Age_binned_(25.0, 35.0]',
                'Age_binned_(35.0, 50.0]',
                'Age_binned_(50.0, 60.0]',
                'Age_binned_(60.0, 70.0]',
                'Age_binned_nan',
                'Contract Length_Annual',
                'Contract Length_Monthly',
                'Contract Length_Quarterly',
                'Contract Length_nan',
                'Gender_Female',
                'Gender_Male',
                'Gender_nan',
                'Subscription Type_Basic',
                'Subscription Type_Premium',
                'Subscription Type_Standard',
                'Subscription Type_nan'
            ],
            label_cols="Churn", batch_size=32
        )
        valset = client_dataset_splits["test"].to_tf_dataset(
            columns=[
                'Support Calls',
                'Payment Delay',
                'Usage Frequency',
                'Total Spend',
                'Last Interaction',
                'Age_binned_(18.0, 25.0]',
                'Age_binned_(25.0, 35.0]',
                'Age_binned_(35.0, 50.0]',
                'Age_binned_(50.0, 60.0]',
                'Age_binned_(60.0, 70.0]',
                'Age_binned_nan',
                'Contract Length_Annual',
                'Contract Length_Monthly',
                'Contract Length_Quarterly',
                'Contract Length_nan',
                'Gender_Female',
                'Gender_Male',
                'Gender_nan',
                'Subscription Type_Basic',
                'Subscription Type_Premium',
                'Subscription Type_Standard',
                'Subscription Type_nan'
            ],
            label_cols="Churn", batch_size=64
        )

        # Create and return client
        return FlowerClient(trainset, valset).to_client()

    return client_fn


def weighted_average(metrics: List[Tuple[int, Metrics]]) -> Metrics:
    """Aggregation function for (federated) evaluation metrics, i.e. those returned by
    the client's evaluate() method."""
    # Multiply accuracy of each client by number of examples used
    accuracies = [num_examples * m["accuracy"] for num_examples, m in metrics]
    precisions = [num_examples * m["precision"] for num_examples, m in metrics]
    recalls = [num_examples * m["recall"] for num_examples, m in metrics]
    f1_scores = [num_examples * m["f1_score"] for num_examples, m in metrics]
    examples = [num_examples for num_examples, _ in metrics]

    # Aggregate and return custom metric (weighted average)
    return {"accuracy": sum(accuracies) / sum(examples), "precision":sum(precisions) / sum(examples),"recall":sum(recalls) / sum(examples),"f1_score":sum(f1_scores) / sum(examples)}


def get_evaluate_fn(testset: Dataset):
    """Return an evaluation function for server-side (i.e. centralised) evaluation."""

    # The `evaluate` function will be called after every round by the strategy
    def evaluate(
        server_round: int,
        parameters: fl.common.NDArrays,
        config: Dict[str, fl.common.Scalar],
    ):
        model = get_model()  # Construct the model
        model.set_weights(parameters)  # Update model with the latest parameters
        results = model.evaluate(testset)
        loss = results[0]
        return loss, {"accuracy": results[1], "precision":results[2], "recall":results[3], "f1_score":results[4]}

    return evaluate


In [14]:
# Create FedAvg strategy
strategy_FedOptim = fl.server.strategy.FedProx(
    proximal_mu=1.0,
    fraction_fit=settings["FRACTION_FIT"],
    fraction_evaluate=settings["FRACTION_EVALUATE"],
    min_fit_clients=settings["MIN_FIT_CLIENTS"],
    min_evaluate_clients=settings["MIN_EVALUATE_CLIENTS"],
    min_available_clients=int(settings["NUM_CLIENTS"] * settings["MIN_AVAILABLE_CLIENTS_FRACTION"]),
    evaluate_metrics_aggregation_fn=weighted_average,
    evaluate_fn=get_evaluate_fn(centralized_testset),
)

strategy_FedProx = fl.server.strategy.FedProx(
    proximal_mu=1.0,
    fraction_fit=settings["FRACTION_FIT"],
    fraction_evaluate=settings["FRACTION_EVALUATE"],
    min_fit_clients=settings["MIN_FIT_CLIENTS"],
    min_evaluate_clients=settings["MIN_EVALUATE_CLIENTS"],
    min_available_clients=int(settings["NUM_CLIENTS"] * settings["MIN_AVAILABLE_CLIENTS_FRACTION"]),
    evaluate_metrics_aggregation_fn=weighted_average,
    evaluate_fn=get_evaluate_fn(centralized_testset),
)

strategy_FedAvg = fl.server.strategy.FedAvg(
    fraction_fit=settings["FRACTION_FIT"],
    fraction_evaluate=settings["FRACTION_EVALUATE"],
    min_fit_clients=settings["MIN_FIT_CLIENTS"],
    min_evaluate_clients=settings["MIN_EVALUATE_CLIENTS"],
    min_available_clients=int(settings["NUM_CLIENTS"] * settings["MIN_AVAILABLE_CLIENTS_FRACTION"]),
    evaluate_metrics_aggregation_fn=weighted_average,
    evaluate_fn=get_evaluate_fn(centralized_testset),
)

# Selezione della strategia
if settings["STRATEGY_TYPE"] == 'FedProx':
    strategy = strategy_FedProx
elif settings["STRATEGY_TYPE"] == 'FedOptim':
    strategy = strategy_FedOptim
else:
    strategy = strategy_FedAvg

# Configurazione delle risorse del client
client_resources = {"num_cpus": 1, "num_gpus": 0.0}

# Avvia la simulazione una volta
history = fl.simulation.start_simulation(
    client_fn=get_client_fn(mnist_fds),
    num_clients=settings["NUM_CLIENTS"],
    config=fl.server.ServerConfig(num_rounds=settings["NUM_ROUNDS"]),
    strategy=strategy,
    client_resources=client_resources,
    actor_kwargs={"on_actor_init_fn": enable_tf_gpu_growth}  # Abilita la crescita della GPU durante l'inizializzazione dell'attore
)

INFO flwr 2024-02-29 13:19:02,587 | app.py:178 | Starting Flower simulation, config: ServerConfig(num_rounds=10, round_timeout=None)
INFO:flwr:Starting Flower simulation, config: ServerConfig(num_rounds=10, round_timeout=None)
2024-02-29 13:19:12,959	INFO worker.py:1621 -- Started a local Ray instance.
INFO flwr 2024-02-29 13:19:17,878 | app.py:213 | Flower VCE: Ray initialized with resources: {'node:__internal_head__': 1.0, 'node:172.28.0.12': 1.0, 'CPU': 2.0, 'memory': 7842462107.0, 'object_store_memory': 3921231052.0}
INFO:flwr:Flower VCE: Ray initialized with resources: {'node:__internal_head__': 1.0, 'node:172.28.0.12': 1.0, 'CPU': 2.0, 'memory': 7842462107.0, 'object_store_memory': 3921231052.0}
INFO flwr 2024-02-29 13:19:17,897 | app.py:219 | Optimize your simulation with Flower VCE: https://flower.dev/docs/framework/how-to-run-simulations.html
INFO:flwr:Optimize your simulation with Flower VCE: https://flower.dev/docs/framework/how-to-run-simulations.html
INFO flwr 2024-02-29 1

RuntimeError: Simulation crashed.

In [None]:
import matplotlib.pyplot as plt
from tabulate import tabulate

# I dati dell'accuratezza
print(f"{history.metrics_centralized = }")
global_accuracy_centralised = history.metrics_centralized["accuracy"]
global_recall_centralised = history.metrics_centralized["recall"]
global_precision_centralised = history.metrics_centralized["precision"]
global_f1_score_centralised = history.metrics_centralized["f1_score"]



rounds = [data[0] for data in global_accuracy_centralised]
accuracies = [100.0 * data[1] for data in global_accuracy_centralised]
recalls = [100.0 * data[1] for data in global_recall_centralised]
precisions = [100.0 * data[1] for data in global_precision_centralised]
f1_scores = [100.0 * data[1] for data in global_f1_score_centralised]


# Plot
plt.plot(rounds, accuracies)
plt.grid()
plt.ylabel("Accuracy (%)")
plt.xlabel("Round")
plt.scatter(rounds,accuracies)

plt.plot(rounds,recalls, label="Recall")
plt.scatter(rounds,recalls)

plt.plot(rounds,precisions, label = "Precision")
plt.scatter(rounds,precisions)

plt.plot(rounds,f1_scores, label = "F1 Score")
plt.scatter(rounds,f1_scores)


plt.title(str(settings['NUM_CLIENTS']) + ' clients with ' + str(int(settings['FRACTION_FIT']*settings['NUM_CLIENTS'])) + ' clients per round, with strategy ' + settings['STRATEGY_TYPE'])  # Correzione qui
plt.show()
plt.legend()

# Tabella
data_table = [["Round", "Accuracy (%)","Precision","Recall","F1_score"]] + [[r, a,p,rec,f1] for r, a,p, rec,f1 in zip(rounds, accuracies, precisions, recalls, f1_scores)]
table = tabulate(data_table, headers="firstrow", tablefmt="fancy_grid")
print(table)