In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('./cl1.csv')

In [3]:
from sklearn.model_selection import train_test_split

# splitting data
X_train, X_test, y_train, y_test = train_test_split(
                df.drop('diagnosis', axis=1),
                df['diagnosis'],
                test_size=0.2,
                random_state=42)

print("Shape of training set:", X_train.shape)
print("Shape of test set:", X_test.shape)

Shape of training set: (227, 31)
Shape of test set: (57, 31)


In [5]:
import argparse
import warnings
from typing import Union
from logging import INFO
from datasets import Dataset, DatasetDict
import xgboost as xgb

import flwr as fl
from flwr_datasets import FederatedDataset
from flwr.common.logger import log
from flwr.common import (
    Code,
    EvaluateIns,
    EvaluateRes,
    FitIns,
    FitRes,
    GetParametersIns,
    GetParametersRes,
    Parameters,
    Status,
)
from flwr_datasets.partitioner import IidPartitioner

from sklearn.metrics import confusion_matrix, classification_report
from sklearn import metrics 
warnings.filterwarnings("ignore", category=UserWarning)

# def transform_dataset_to_dmatrix(data: Union[Dataset, DatasetDict]) -> xgb.core.DMatrix:
#     """Transform dataset to DMatrix format for xgboost."""
#     x = data["inputs"]
#     y = data["label"]
#     new_data = xgb.DMatrix(x, label=y)
#     return new_data

# # Train/test splitting
# train_data, valid_data, num_train, num_val = X_train, X_test, y_train, y_test
num_train = 227
num_val = 57


xgb_train = xgb.DMatrix(X_train, y_train, enable_categorical=True)
xgb_test = xgb.DMatrix(X_test, y_test, enable_categorical=True)

# Reformat data to DMatrix for xgboost
log(INFO, "Reformatting data...")
# train_dmatrix = transform_dataset_to_dmatrix(train_data)
# valid_dmatrix = transform_dataset_to_dmatrix(valid_data)
train_dmatrix = xgb_train
valid_dmatrix = xgb_test


# Hyper-parameters for xgboost training
num_local_round = 1
params = {
    "objective": "binary:logistic",
    "eta": 0.5,  # Learning rate
    "max_depth": 8,
    "eval_metric": "error",
    "nthread": 16,
    "num_parallel_tree": 1,
    "subsample": 1,
    "tree_method": "hist",
}


# Define Flower client
class XgbClient(fl.client.Client):
    def __init__(self):
        self.bst = None
        self.config = None

    def get_parameters(self, ins: GetParametersIns) -> GetParametersRes:
        _ = (self, ins)
        return GetParametersRes(
            status=Status(
                code=Code.OK,
                message="OK",
            ),
            parameters=Parameters(tensor_type="", tensors=[]),
        )

    def _local_boost(self):
        # Update trees based on local training data.
        for i in range(num_local_round):
            self.bst.update(train_dmatrix, self.bst.num_boosted_rounds())

        # Extract the last N=num_local_round trees for sever aggregation
        bst = self.bst[
            self.bst.num_boosted_rounds()
            - num_local_round : self.bst.num_boosted_rounds()
        ]

        return bst

    def fit(self, ins: FitIns) -> FitRes:
        if not self.bst:
            # First round local training
            log(INFO, "Start training at round 1")
            bst = xgb.train(
                params,
                train_dmatrix,
                num_boost_round=num_local_round,
                evals=[(valid_dmatrix, "validate"), (train_dmatrix, "train")],
            )
            self.config = bst.save_config()
            self.bst = bst
        else:
            for item in ins.parameters.tensors:
                global_model = bytearray(item)

            # Load global model into booster
            self.bst.load_model(global_model)
            self.bst.load_config(self.config)

            bst = self._local_boost()

        local_model = bst.save_raw("json")
        local_model_bytes = bytes(local_model)

        return FitRes(
            status=Status(
                code=Code.OK,
                message="OK",
            ),
            parameters=Parameters(tensor_type="", tensors=[local_model_bytes]),
            num_examples=num_train,
            metrics={},
        )

    def evaluate(self, ins: EvaluateIns) -> EvaluateRes:
        eval_results = self.bst.eval_set(
            evals=[(valid_dmatrix, "valid")],
            iteration=self.bst.num_boosted_rounds() - 1,
        )
        auc = round(float(eval_results.split("\t")[1].split(":")[1]), 4)
        
        preds = self.bst.predict(valid_dmatrix)
        y_pred = np.multiply(preds,100)
        y_pred = y_pred.astype(int)
        a = [1 if i >= 50 else 0 for i in y_pred]
        print("Confusion Matrix: \n", confusion_matrix(y_test, a))
        print(classification_report(y_test, a))
        


        return EvaluateRes(
            status=Status(
                code=Code.OK,
                message="OK",
            ),
            loss=0.0,
            num_examples=num_val,
            metrics={"error": auc},
        )


# Start Flower client
fl.client.start_client(server_address="127.0.0.1:8080", client=XgbClient().to_client())

INFO flwr 2024-05-14 10:27:40,216 | 1498306053.py:45 | Reformatting data...
INFO flwr 2024-05-14 10:27:40,220 | grpc.py:52 | Opened insecure gRPC connection (no certificates were passed)
DEBUG flwr 2024-05-14 10:27:40,235 | connection.py:55 | ChannelConnectivity.IDLE
DEBUG flwr 2024-05-14 10:27:40,238 | connection.py:55 | ChannelConnectivity.READY
INFO flwr 2024-05-14 10:27:43,230 | 1498306053.py:98 | Start training at round 1


[0]	validate-error:0.12281	train-error:0.02643
Confusion Matrix: 
 [[21  6]
 [ 1 29]]
              precision    recall  f1-score   support

           0       0.95      0.78      0.86        27
           1       0.83      0.97      0.89        30

    accuracy                           0.88        57
   macro avg       0.89      0.87      0.87        57
weighted avg       0.89      0.88      0.88        57

Confusion Matrix: 
 [[26  1]
 [ 4 26]]
              precision    recall  f1-score   support

           0       0.87      0.96      0.91        27
           1       0.96      0.87      0.91        30

    accuracy                           0.91        57
   macro avg       0.91      0.91      0.91        57
weighted avg       0.92      0.91      0.91        57

Confusion Matrix: 
 [[25  2]
 [ 4 26]]
              precision    recall  f1-score   support

           0       0.86      0.93      0.89        27
           1       0.93      0.87      0.90        30

    accuracy      

DEBUG flwr 2024-05-14 10:27:43,855 | connection.py:220 | gRPC channel closed


Confusion Matrix: 
 [[25  2]
 [ 3 27]]
              precision    recall  f1-score   support

           0       0.89      0.93      0.91        27
           1       0.93      0.90      0.92        30

    accuracy                           0.91        57
   macro avg       0.91      0.91      0.91        57
weighted avg       0.91      0.91      0.91        57

Confusion Matrix: 
 [[25  2]
 [ 3 27]]
              precision    recall  f1-score   support

           0       0.89      0.93      0.91        27
           1       0.93      0.90      0.92        30

    accuracy                           0.91        57
   macro avg       0.91      0.91      0.91        57
weighted avg       0.91      0.91      0.91        57

Confusion Matrix: 
 [[25  2]
 [ 3 27]]
              precision    recall  f1-score   support

           0       0.89      0.93      0.91        27
           1       0.93      0.90      0.92        30

    accuracy                           0.91        57
   macro av

INFO flwr 2024-05-14 10:27:43,856 | app.py:398 | Disconnect and shut down


In [11]:
from xgboost import XGBClassifier 

xgb = XGBClassifier(objective = 'binary:logistic')
# 93
xgb.fit(X_train, y_train)
print()
y_pred = xgb.predict(X_test)




In [12]:
# accuracy score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
print(accuracy_score(y_train, xgb.predict(X_train)))

xgb_acc = accuracy_score(y_test, y_pred)
print(xgb_acc)

AttributeError: module 'xgboost' has no attribute 'predict'

In [13]:
print("Confusion Matrix: \n", confusion_matrix(y_test, y_pred))
print(metrics.classification_report(y_test,y_pred))

Confusion Matrix: 
 [[26  1]
 [ 1 29]]
              precision    recall  f1-score   support

           0       0.96      0.96      0.96        27
           1       0.97      0.97      0.97        30

    accuracy                           0.96        57
   macro avg       0.96      0.96      0.96        57
weighted avg       0.96      0.96      0.96        57



In [None]:
y_pred = xgb.predict(X_test)

In [None]:
# accuracy score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
print(accuracy_score(y_train, xgb.predict(X_train)))

xgb_acc = accuracy_score(y_test, y_pred)