In [35]:
import pandas as pd
import numpy as np

import flwr as fl
from flwr.common import Metrics
from logging import INFO, DEBUG
from flwr.common.logger import log

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss

from collections import OrderedDict
from typing import List, Tuple, Union, Dict
import sys
import time
import warnings
warnings.simplefilter('ignore')

In [36]:
NUM_CLIENTS = 3
EPOCHS = 10
ROUNDS = 5

In [37]:
# def load_dataset():
#     df = pd.read_csv('../datasets/label_data.csv')
#     df = df.rename(columns={'label': 'target'})
#     X = df.iloc[:, :-1]
#     y = df.loc[:, 'target']
#     
#     # Split the training set and testing set in 80% ratio
#     X_train, X_test, y_train, y_test = (
#         train_test_split(
#             X, y, 
#             train_size=0.8,
#             random_state=42, 
#             shuffle=True,
#             stratify=y,
#         )
#     )
#     
#     # Split the training set by the number of clients
#     X_train = np.array_split(X_train, NUM_CLIENTS)
#     y_train = np.array_split(y_train, NUM_CLIENTS)
#     
#     return X_train, X_test, y_train, y_test
#     
# X_train, X_test, y_train, y_test = load_dataset()

In [38]:
XY = Tuple[np.ndarray, np.ndarray]
Dataset = Tuple[XY, XY]
LogRegParams = Union[XY, Tuple[np.ndarray]]
XYList = List[XY]


def load_dataset() -> Dataset:
    df = pd.read_csv('../datasets/label_data.csv')
    df = df.rename(columns={'label': 'target'})
    X = df.iloc[:, :-1]
    y = df.loc[:, 'target']

    # Split the training set and testing set in 80% ratio
    X_train, X_test, y_train, y_test = (
        train_test_split(
            X, y,
            train_size=0.8,
            random_state=42,
            shuffle=True,
            stratify=y,
        )
    )

    return (X_train, y_train), (X_test, y_test)


def partition(X: np.ndarray, y: np.ndarray, num_partitions: int) -> XYList:
    return list(
        zip(np.array_split(X, num_partitions), np.array_split(y, num_partitions))
    )

In [39]:
def get_model_params(model: LogisticRegression) -> LogRegParams:
    if model.fit_intercept:
        params = [
            model.coef_,
            model.intercept_,
        ]
    else:
        params = [
            model.coef_,
        ]
    return params


def set_model_params(
    model: LogisticRegression, params: LogRegParams
) -> LogisticRegression:

    model.coef_ = params[0]
    if model.fit_intercept:
        model.intercept_ = params[1]
    return model


def params_initialize(model: LogisticRegression):
    
    n_classes = 2
    n_features = 3
    model.classes_ = np.array([i for i in range(2)])

    model.coef_ = np.zeros((n_classes, n_features))
    if model.fit_intercept:
        model.intercept_ = np.zeros((n_classes,))

In [40]:
(X_train, y_train), (X_test, y_test) = load_dataset()

partition_id = np.random.choice(NUM_CLIENTS)
(X_train, y_train) = partition(X_train, y_train, NUM_CLIENTS)[partition_id]

model = LogisticRegression(
    penalty="l2",
    class_weight='balanced',
    warm_start=True,
)

params_initialize(model)

class FlowerClient(fl.client.NumPyClient):
    def __init__(self, cid):
        self.cid = cid
    
    def get_parameters(self, config):
        log(INFO, f"Client {self.cid} received the parameters")
        return get_model_params(model)

    def fit(self, params, config):
        set_model_params(model, params)
        log(INFO, f"Client {self.cid} fit with config: {config}")
        model.fit(X_train, y_train)
        print(f"Training finished for round {config['server_round']}")
        return get_model_params(model), len(X_train), {}

    def evaluate(self, params, config):
        log(INFO, f"Client {self.cid} evaluate with config: {config}")
        set_model_params(model, params)
        loss = log_loss(y_test, model.predict_proba(X_test))
        accuracy = model.score(X_test, y_test)
        
        # accuracy = accuracy_score(X_test, y_test)
        # precision = precision_score(X_test, y_test, average='weighted')
        # recall = recall_score(X_test, y_test, average='weighted')
        # f1 = f1_score(X_test, y_test, average='weighted')
        return loss, len(X_test), {'accuracy': accuracy}
        #return loss, len(X_test), {'accuracy': accuracy, 'precision': precision, 'recall':recall, 'f1_score':f1}


def client_fn(cid) -> FlowerClient:
    return FlowerClient(cid)

In [41]:
def fit_round(server_round: int) -> Dict:
    return {"server_round": server_round}


def get_evaluate_fn(model: LogisticRegression):

    def evaluate(server_round, parameters: fl.common.NDArrays, config):
        set_model_params(model, parameters)
        loss = log_loss(y_test, model.predict_proba(X_test))
        accuracy = model.score(X_test, y_test)
        
        # accuracy = accuracy_score(X_test, y_test)
        # precision = precision_score(X_test, y_test, average='weighted')
        # recall = recall_score(X_test, y_test, average='weighted')
        # f1 = f1_score(X_test, y_test, average='weighted')
        
        #return loss, {'accuracy': accuracy, 'precision': precision, 'recall':recall, 'f1_score':f1}
        return loss, {'accuracy': accuracy}
    
    return evaluate

In [42]:
model = LogisticRegression()
params_initialize(model)

strategy = fl.server.strategy.FedAvg(
    fraction_fit=1.0,
    fraction_evaluate=1.0,
    min_fit_clients=NUM_CLIENTS,
    min_evaluate_clients=NUM_CLIENTS,
    min_available_clients=NUM_CLIENTS,
    evaluate_fn=get_evaluate_fn(model),
    on_fit_config_fn=fit_round,
)

fl.common.logger.configure(identifier="FL_Model", filename="../Testing/log.txt")

fl.simulation.start_simulation(
    client_fn=client_fn,
    num_clients=NUM_CLIENTS,
    config=fl.server.ServerConfig(num_rounds=ROUNDS),
    strategy=strategy,
)

INFO flwr 2023-11-07 16:49:24,624 | app.py:175 | Starting Flower simulation, config: ServerConfig(num_rounds=1, round_timeout=None)


FileNotFoundError: [WinError 2] 系统找不到指定的文件。

In [None]:
#!pip install -U flwr[simulation]
#!pip install ray
#!pip install numpy

In [None]:
# Create RandomForestClassifier model
# model = RandomForestClassifier(
#     class_weight='balanced',
#     warm_start=True,
# )