In [1]:
# Import necessary libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the dataset
pima = pd.read_csv("./cl2.csv")
print(pima.head())

   Unnamed: 0  Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin  \
0         720            4       83             86             19        0   
1         619            0      119              0              0        0   
2         510           12       84             72             31        0   
3         362            5      103            108             37        0   
4         141            5      106             82             30        0   

    BMI  DiabetesPedigreeFunction  Age  Outcome  
0  29.3                     0.317   34        0  
1  32.4                     0.141   24        1  
2  29.7                     0.297   46        1  
3  39.2                     0.305   65        0  
4  39.5                     0.286   38        0  


In [2]:
# Split the data into features (X) and target variable (y)
X = pima.drop(columns='Outcome')
y = pima['Outcome']

In [9]:
# Number of negative and positive cases in the data
num_obs = len(pima)
negative = len(pima.loc[pima['Outcome'] == 0])
positive = len(pima.loc[pima['Outcome'] == 1])
print("Number of negative cases:  {0} ({1:2.2f}%)".format(negative, ((1.00 * negative)/(1.0 * num_obs)) * 100))
print("Number of positve cases:  {0} ({1:2.2f}%)".format(positive, ((1.00 * positive)/(1.0 * num_obs)) * 100))

Number of negative cases:  257 (66.93%)
Number of positve cases:  127 (33.07%)


In [3]:
# Split xscale
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state = 0) 

In [11]:
# Number of each case in the data training and testing
print("Original negative : {0} ({1:0.2f}%)".format(len(pima.loc[pima['Outcome'] == 0]), (len(pima.loc[pima['Outcome'] == 0])/len(pima.index)) * 100.0))
print("Original positive : {0} ({1:0.2f}%)".format(len(pima.loc[pima['Outcome'] == 1]), (len(pima.loc[pima['Outcome'] == 1])/len(pima.index)) * 100.0))
print("")
print("Training negative : {0} ({1:0.2f}%)".format(len(y_train[y_train[:] == 0]), (len(y_train[y_train[:] == 0])/len(y_train) * 100.0)))
print("Training positive : {0} ({1:0.2f}%)".format(len(y_train[y_train[:] == 1]), (len(y_train[y_train[:] == 1])/len(y_train) * 100.0)))
print("")
print("Test negative     : {0} ({1:0.2f}%)".format(len(y_test[y_test[:] == 0]), (len(y_test[y_test[:] == 0])/len(y_test) * 100.0)))
print("Test positive     : {0} ({1:0.2f}%)".format(len(y_test[y_test[:] == 1]), (len(y_test[y_test[:] == 1])/len(y_test) * 100.0)))

Original negative : 257 (66.93%)
Original positive : 127 (33.07%)

Training negative : 208 (67.75%)
Training positive : 99 (32.25%)

Test negative     : 49 (63.64%)
Test positive     : 28 (36.36%)


In [12]:
print(X_train.shape,X_test.shape)

(307, 9) (77, 9)


In [None]:
# Confusion Matrix: 
#  [[46  3] cl2 after
#  [11 17]]
#               precision    recall  f1-score   support

#            0       0.81      0.94      0.87        49
#            1       0.85      0.61      0.71        28

#     accuracy                           0.82        77
#    macro avg       0.83      0.77      0.79        77
# weighted avg       0.82      0.82      0.81        77


In [14]:
import argparse
import warnings
from typing import Union
from logging import INFO
from datasets import Dataset, DatasetDict
import xgboost as xgb
import numpy as np
import flwr as fl
from flwr_datasets import FederatedDataset
from flwr.common.logger import log
from flwr.common import (
    Code,
    EvaluateIns,
    EvaluateRes,
    FitIns,
    FitRes,
    GetParametersIns,
    GetParametersRes,
    Parameters,
    Status,
)
from flwr_datasets.partitioner import IidPartitioner


warnings.filterwarnings("ignore", category=UserWarning)

# def transform_dataset_to_dmatrix(data: Union[Dataset, DatasetDict]) -> xgb.core.DMatrix:
#     """Transform dataset to DMatrix format for xgboost."""
#     x = data["inputs"]
#     y = data["label"]
#     new_data = xgb.DMatrix(x, label=y)
#     return new_data

# # Train/test splitting
# train_data, valid_data, num_train, num_val = X_train, X_test, y_train, y_test
num_train = 307
num_val = 77


xgb_train = xgb.DMatrix(X_train, y_train, enable_categorical=True)
xgb_test = xgb.DMatrix(X_test, y_test, enable_categorical=True)

# Reformat data to DMatrix for xgboost
log(INFO, "Reformatting data...")
# train_dmatrix = transform_dataset_to_dmatrix(train_data)
# valid_dmatrix = transform_dataset_to_dmatrix(valid_data)
train_dmatrix = xgb_train
valid_dmatrix = xgb_test

from sklearn import metrics
from sklearn.metrics import confusion_matrix, classification_report


# Hyper-parameters for xgboost training
num_local_round = 3
params = {
    "objective": "binary:logistic",
    "eta": 0.5,  # Learning rate
    "max_depth": 8,
    "eval_metric": "error",
    "nthread": 16,
    "num_parallel_tree": 1,
    "subsample": 1,
    "tree_method": "hist",
}


# Define Flower client
class XgbClient(fl.client.Client):
    def __init__(self):
        self.bst = None
        self.config = None

    def get_parameters(self, ins: GetParametersIns) -> GetParametersRes:
        _ = (self, ins)
        return GetParametersRes(
            status=Status(
                code=Code.OK,
                message="OK",
            ),
            parameters=Parameters(tensor_type="", tensors=[]),
        )

    def _local_boost(self):
        # Update trees based on local training data.
        for i in range(num_local_round):
            self.bst.update(train_dmatrix, self.bst.num_boosted_rounds())

        # Extract the last N=num_local_round trees for sever aggregation
        bst = self.bst[
            self.bst.num_boosted_rounds()
            - num_local_round : self.bst.num_boosted_rounds()
        ]

        return bst

    def fit(self, ins: FitIns) -> FitRes:
        if not self.bst:
            # First round local training
            log(INFO, "Start training at round 1")
            bst = xgb.train(
                params,
                train_dmatrix,
                num_boost_round=num_local_round,
                evals=[(valid_dmatrix, "validate"), (train_dmatrix, "train")],
            )
            self.config = bst.save_config()
            self.bst = bst
        else:
            for item in ins.parameters.tensors:
                global_model = bytearray(item)

            # Load global model into booster
            self.bst.load_model(global_model)
            self.bst.load_config(self.config)

            bst = self._local_boost()

        local_model = bst.save_raw("json")
        local_model_bytes = bytes(local_model)

        return FitRes(
            status=Status(
                code=Code.OK,
                message="OK",
            ),
            parameters=Parameters(tensor_type="", tensors=[local_model_bytes]),
            num_examples=num_train,
            metrics={},
        )

    def evaluate(self, ins: EvaluateIns) -> EvaluateRes:
        eval_results = self.bst.eval_set(
            evals=[(valid_dmatrix, "valid")],
            iteration=self.bst.num_boosted_rounds() - 1,
        )
        auc = round(float(eval_results.split("\t")[1].split(":")[1]), 4)
        
        preds = self.bst.predict(valid_dmatrix)
        y_pred = np.multiply(preds,100)
        y_pred = y_pred.astype(int)
        a = [1 if i >= 50 else 0 for i in y_pred]
        print("nigga")
        print("Confusion Matrix: \n", confusion_matrix(y_test, a))
        print(metrics.classification_report(y_test,a))
        

        return EvaluateRes(
            status=Status(
                code=Code.OK,
                message="OK",
            ),
            loss=0.0,
            num_examples=num_val,
            metrics={"error": auc},
        )


# Start Flower client
fl.client.start_client(server_address="127.0.0.1:8080", client=XgbClient().to_client())

INFO flwr 2024-05-13 17:57:18,649 | 355970421.py:44 | Reformatting data...
INFO flwr 2024-05-13 17:57:18,654 | grpc.py:52 | Opened insecure gRPC connection (no certificates were passed)
DEBUG flwr 2024-05-13 17:57:18,672 | connection.py:55 | ChannelConnectivity.IDLE
DEBUG flwr 2024-05-13 17:57:18,676 | connection.py:55 | ChannelConnectivity.READY
INFO flwr 2024-05-13 17:57:18,678 | 355970421.py:100 | Start training at round 1


[0]	validate-error:0.31169	train-error:0.12704
[1]	validate-error:0.31169	train-error:0.08795
[2]	validate-error:0.29870	train-error:0.04886
nigga
Confusion Matrix: 
 [[42  7]
 [16 12]]
              precision    recall  f1-score   support

           0       0.72      0.86      0.79        49
           1       0.63      0.43      0.51        28

    accuracy                           0.70        77
   macro avg       0.68      0.64      0.65        77
weighted avg       0.69      0.70      0.69        77

nigga
Confusion Matrix: 
 [[44  5]
 [14 14]]
              precision    recall  f1-score   support

           0       0.76      0.90      0.82        49
           1       0.74      0.50      0.60        28

    accuracy                           0.75        77
   macro avg       0.75      0.70      0.71        77
weighted avg       0.75      0.75      0.74        77

nigga
Confusion Matrix: 
 [[41  8]
 [13 15]]
              precision    recall  f1-score   support

           0   

DEBUG flwr 2024-05-13 17:57:19,810 | connection.py:220 | gRPC channel closed
INFO flwr 2024-05-13 17:57:19,811 | app.py:398 | Disconnect and shut down


              precision    recall  f1-score   support

           0       0.75      0.86      0.80        49
           1       0.67      0.50      0.57        28

    accuracy                           0.73        77
   macro avg       0.71      0.68      0.69        77
weighted avg       0.72      0.73      0.72        77

nigga
Confusion Matrix: 
 [[42  7]
 [14 14]]
              precision    recall  f1-score   support

           0       0.75      0.86      0.80        49
           1       0.67      0.50      0.57        28

    accuracy                           0.73        77
   macro avg       0.71      0.68      0.69        77
weighted avg       0.72      0.73      0.72        77

nigga
Confusion Matrix: 
 [[42  7]
 [13 15]]
              precision    recall  f1-score   support

           0       0.76      0.86      0.81        49
           1       0.68      0.54      0.60        28

    accuracy                           0.74        77
   macro avg       0.72      0.70     

In [14]:
from xgboost import XGBClassifier 

xgb = XGBClassifier(objective = 'binary:logistic')

xgb.fit(X_train, y_train)
print()
y_pred = xgb.predict(X_test)

print("Confusion Matrix: \n", confusion_matrix(y_test, y_pred))
print(metrics.classification_report(y_test,y_pred))


Confusion Matrix: 
 [[44  5]
 [15 13]]
              precision    recall  f1-score   support

           0       0.75      0.90      0.81        49
           1       0.72      0.46      0.57        28

    accuracy                           0.74        77
   macro avg       0.73      0.68      0.69        77
weighted avg       0.74      0.74      0.72        77

