In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
import matplotlib.pyplot as plt
from customxgboost import XGBoostClassifier, XGBoostTree
from fedXGB import Client, FedXGBoost


In [2]:
# Generate dummy data
def create_dummy_data(n_samples = 100, n_features = 3, n_informative = 3, n_redundant = 0, random_state = 42):

    # Create the dataset
    X, y = make_classification(
        n_samples=n_samples,
        n_features=n_features,
        n_informative=n_informative,
        n_redundant=n_redundant,
        n_classes=2,
        random_state=random_state
    )

    # Convert to DataFrame for convenience
    df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(n_features)])
    df['label'] = y

    return df

In [3]:
data = create_dummy_data()

In [4]:
data

Unnamed: 0,feature_0,feature_1,feature_2,label
0,-0.055047,-0.076900,1.014236,1
1,-1.539644,-1.135646,0.951151,0
2,-0.801402,0.114717,-0.534702,0
3,2.203781,-2.440252,2.165799,1
4,-0.766340,0.198916,-1.456762,0
...,...,...,...,...
95,2.510895,-1.187769,-1.737078,0
96,-0.916990,0.062745,1.309143,0
97,1.924313,-1.563592,0.495242,0
98,1.650580,-1.108798,-0.227124,0


In [5]:
# split data into train and test sets
X = data.drop('label', axis=1)
y = data['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# fit model no training data
model = xgb.XGBClassifier()
model.fit(X_train, y_train)

# make predictions for test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 81.82%


In [6]:
from customxgboost import XGBoostClassifier as myxgb

my_model = myxgb()
my_model.fit(X_train, y_train)

# make predictions for test data
y_pred = my_model.predict(X_test)
predictions = [round(value) for value in y_pred]

# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))


Training Complete
Accuracy: 90.91%


In [7]:
my_model = myxgb(method='hist')
my_model.fit(X_train, y_train)

# make predictions for test data
y_pred = my_model.predict(X_test)
predictions = [round(value) for value in y_pred]

# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Training Complete
Accuracy: 66.67%


In [13]:
# make data of 1000 rows and 5 features
data = create_dummy_data(n_samples=1000, n_features=5)

# seperate 20 percent of total data for testing
X = data.drop('label', axis=1)
y = data['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# create 3 clients
client1 = Client(X_train[:300], y_train[:300])
client2 = Client(X_train[300:600], y_train[300:600])
client3 = Client(X_train[600:], y_train[600:])

# create a federated model with the 3 clients
fed_model = FedXGBoost([client1, client2, client3])

# fit the model
fed_model.fit(subsample_cols=0.8)


Training Complete


In [14]:
# make predictions
preds = fed_model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, preds)}")
print(f"Precision: {precision_score(y_test, preds)}")
print(f"Recall: {recall_score(y_test, preds)}")
print(f"F1 Score: {f1_score(y_test, preds)}")

Accuracy: 0.915
Precision: 0.8956521739130435
Recall: 0.9537037037037037
F1 Score: 0.9237668161434978


In [10]:
# test xgboost classifier from pytho
model = xgb.XGBClassifier()

model.fit(X_train, y_train)
preds = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, preds)}")
print(f"Precision: {precision_score(y_test, preds)}")
print(f"Recall: {recall_score(y_test, preds)}")
print(f"F1 Score: {f1_score(y_test, preds)}")


Accuracy: 0.97
Precision: 0.9903846153846154
Recall: 0.9537037037037037
F1 Score: 0.9716981132075472
