In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
import matplotlib.pyplot as plt
from customxgboost import XGBoostClassifier, XGBoostTree
from fedXGB import Client, FedXGBoost
from customxgboost import XGBoostClassifier as myxgb


In [2]:
# Generate dummy data
def create_dummy_data(n_samples = 10000, n_features = 10, n_informative = 8, n_redundant = 2, random_state = 42):

    # Create the dataset
    X, y = make_classification(
        n_samples=n_samples,
        n_features=n_features,
        n_informative=n_informative,
        n_redundant=n_redundant,
        n_classes=2,
        random_state=random_state
    )

    # Convert to DataFrame for convenience
    df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(n_features)])
    df['label'] = y

    return df

In [3]:
data = create_dummy_data()

In [4]:
data

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,label
0,-2.273577,0.025135,-0.098951,-1.910959,-0.490930,-2.612120,-2.340507,-0.072464,2.671115,-0.658415,1
1,-2.633602,-1.064530,0.596068,-0.315907,3.620619,0.243131,-4.924947,1.309551,-0.441909,8.134933,1
2,2.560910,1.289750,1.271519,2.424127,-1.311381,-0.850302,2.476814,0.341372,2.234609,-5.429692,0
3,0.255632,-3.290428,1.514160,-0.085143,1.126927,-4.755379,0.820723,2.578251,0.362170,-3.539635,0
4,-1.250910,0.373713,1.226333,1.505599,-0.718096,1.228442,2.782940,-1.623311,2.992206,-6.717889,0
...,...,...,...,...,...,...,...,...,...,...,...
9995,-2.518808,-3.326460,-1.203037,2.227000,-1.920999,0.034816,0.105673,-0.221480,3.400589,-8.257107,0
9996,3.439655,1.400724,1.085159,2.172286,-3.800224,-2.774783,0.897944,-3.399414,-0.924083,-6.527160,0
9997,-2.118261,-1.773686,3.844588,-1.183138,2.543479,-2.705070,-2.435192,4.493852,2.417857,2.353397,0
9998,0.449766,3.097363,2.537797,-0.380602,0.594230,-0.740834,0.338637,-1.883345,0.621974,0.608565,0


In [5]:
# split data into train and test sets
X = data.drop('label', axis=1)
y = data['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# fit model no training data
model = xgb.XGBClassifier()
model.fit(X_train, y_train)

# make predictions for test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 93.48%


In [6]:


# my_model = myxgb()
# my_model.fit(X_train, y_train, boosting_rounds=20, depth=5, learning_rate=0.3)

# # make predictions for test data
# y_pred = my_model.predict(X_test)
# predictions = [round(value) for value in y_pred]

# # evaluate predictions
# accuracy = accuracy_score(y_test, predictions)
# print("Accuracy: %.2f%%" % (accuracy * 100.0))


In [7]:
# # accuracy on training data
# y_pred = my_model.predict(X_train)
# predictions = [round(value) for value in y_pred]
# accuracy = accuracy_score(y_train, predictions)
# print("Accuracy: %.2f%%" % (accuracy * 100.0))

In [8]:
my_model = myxgb(method='hist')
# my_model.max_bins = 1000
my_model.fit(X_train, y_train, boosting_rounds=20, depth=5, learning_rate=0.3)

# make predictions for test data
y_pred = my_model.predict(X_test)
predictions = [round(value) for value in y_pred]

# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

getting regions
Training Complete
Accuracy: 82.85%


In [9]:
# accuracy on training data
y_pred = my_model.predict(X_train)
predictions = [round(value) for value in y_pred]
accuracy = accuracy_score(y_train, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 85.52%


In [10]:
max_bins = 1024 # each client must have the same bin size


# create 3 clients
client1 = Client(X_train[:3000], y_train[:3000], max_bins=max_bins, cliend_id=1)
client2 = Client(X_train[3000:6000], y_train[3000:6000], max_bins=max_bins, cliend_id=2)
client3 = Client(X_train[6000:], y_train[6000:], max_bins=max_bins, cliend_id=3)

# create a federated model with the 3 clients
fed_model = FedXGBoost([client1, client2, client3], max_bins=max_bins)

# fit the model
fed_model.fit(subsample_cols=0.6, boosting_rounds=25, depth=7, learning_rate=0.3)


Initializing Clients
getting regions
getting regions
getting regions
Boosting round 1 done.
Boosting round 2 done.
Boosting round 3 done.
Boosting round 4 done.
Boosting round 5 done.
Boosting round 6 done.
Boosting round 7 done.
Boosting round 8 done.
Boosting round 9 done.
Boosting round 10 done.
Boosting round 11 done.
Boosting round 12 done.
Boosting round 13 done.
Boosting round 14 done.
Boosting round 15 done.
Boosting round 16 done.
Boosting round 17 done.
Boosting round 18 done.
Boosting round 19 done.
Boosting round 20 done.
Boosting round 21 done.
Boosting round 22 done.
Boosting round 23 done.
Boosting round 24 done.
Boosting round 25 done.
Training Complete


In [14]:
# make predictions
preds = fed_model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, preds)}")
print(f"Precision: {precision_score(y_test, preds)}")
print(f"Recall: {recall_score(y_test, preds)}")
print(f"F1 Score: {f1_score(y_test, preds)}")

Accuracy: 0.8357575757575758
Precision: 0.8810178817056397
Recall: 0.7763636363636364
F1 Score: 0.8253865979381443


In [15]:
# accuracy on training data
y_pred = fed_model.predict(X_train)
predictions = [round(value) for value in y_pred]
accuracy = accuracy_score(y_train, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 86.61%


In [26]:
# test xgboost classifier from pytho
model = xgb.XGBClassifier()

model.fit(X_train, y_train)
preds = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, preds)}")
print(f"Precision: {precision_score(y_test, preds)}")
print(f"Recall: {recall_score(y_test, preds)}")
print(f"F1 Score: {f1_score(y_test, preds)}")


Accuracy: 0.9348484848484848
Precision: 0.9263220439691028
Recall: 0.9448484848484848
F1 Score: 0.9354935493549354
