In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
import matplotlib.pyplot as plt
from customxgboost import XGBoostClassifier, XGBoostTree
from fedXGB import Client, FedXGBoost
from customxgboost import XGBoostClassifier as myxgb


In [2]:
# Generate dummy data
def create_dummy_data(n_samples = 15000, n_features = 10, n_informative = 8, n_redundant = 0, random_state = 42):

    # Create the dataset
    X, y = make_classification(
        n_samples=n_samples,
        n_features=n_features,
        n_informative=n_informative,
        n_redundant=n_redundant,
        n_classes=2,
        random_state=random_state
    )

    # Convert to DataFrame for convenience
    df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(n_features)])
    df['label'] = y

    return df

In [3]:
data = create_dummy_data()

In [4]:
data

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,label
0,-0.720305,-1.237531,-0.454258,1.571461,0.700770,1.982791,-3.120386,0.826585,-0.755221,-0.961137,0
1,0.523927,-0.300823,1.193739,1.546690,0.279284,-0.160006,1.431960,-1.205824,-1.701729,0.483841,0
2,-0.667393,-0.504174,-1.033114,3.192187,-2.412546,-1.797621,-1.909689,1.667438,-3.983311,0.133892,1
3,0.576402,-4.044822,1.529363,-0.303941,0.903713,0.271204,-0.175285,-0.767430,-2.677140,-0.520583,0
4,0.494973,-2.861203,1.220322,-0.286901,0.257327,-3.092110,-1.689975,0.244554,0.292380,0.751493,0
...,...,...,...,...,...,...,...,...,...,...,...
14995,-0.089057,-0.291074,1.746100,0.444515,0.339796,-0.908918,-1.367856,-0.498903,-1.691124,0.152264,1
14996,-0.225801,-2.265174,2.827270,-1.201393,0.655284,-2.561458,-0.706593,-0.032691,-0.142616,0.834814,0
14997,1.104642,1.304908,0.419491,1.263706,-2.090225,0.578868,0.387278,1.491463,-1.073523,-0.074165,0
14998,-0.345116,2.334344,1.320172,1.715078,0.059883,1.218548,-4.212452,1.363364,-3.089374,0.363273,1


In [5]:
# split data into train and test sets
X = data.drop('label', axis=1)
y = data['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# fit model no training data
model = xgb.XGBClassifier()
model.fit(X_train, y_train)

# make predictions for test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 94.18%


In [6]:


# my_model = myxgb()
# my_model.fit(X_train, y_train, boosting_rounds=20, depth=5, learning_rate=0.3)

# # make predictions for test data
# y_pred = my_model.predict(X_test)
# predictions = [round(value) for value in y_pred]

# # evaluate predictions
# accuracy = accuracy_score(y_test, predictions)
# print("Accuracy: %.2f%%" % (accuracy * 100.0))


In [7]:
# # accuracy on training data
# y_pred = my_model.predict(X_train)
# predictions = [round(value) for value in y_pred]
# accuracy = accuracy_score(y_train, predictions)
# print("Accuracy: %.2f%%" % (accuracy * 100.0))

In [8]:
my_model = myxgb(method='hist')
# my_model.max_bins = 1000
my_model.fit(X_train, y_train, boosting_rounds=20, depth=5, learning_rate=0.3)

# make predictions for test data
y_pred = my_model.predict(X_test)
predictions = [round(value) for value in y_pred]

# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Optimized: Getting regions
Training Complete
Accuracy: 79.43%


In [9]:
# accuracy on training data
y_pred = my_model.predict(X_train)
predictions = [round(value) for value in y_pred]
accuracy = accuracy_score(y_train, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 83.15%


In [7]:
# max_bins = 100000 # each client must have the same bin size

max_bins = 10240
# create 3 clients
client1 = Client(X_train[:3000], y_train[:3000], max_bins=max_bins, cliend_id=1)
client2 = Client(X_train[3000:6000], y_train[3000:6000], max_bins=max_bins, cliend_id=2)
client3 = Client(X_train[6000:], y_train[6000:], max_bins=max_bins, cliend_id=3)

# create a federated model with the 3 clients
fed_model = FedXGBoost()

fed_model.fit([client1, client2, client3], max_bins=max_bins)

Initializing Clients
Optimized: Getting regions
Optimized: Getting regions
Optimized: Getting regions


In [8]:
# train the model
fed_model.train(subsample_cols=0.8, boosting_rounds=25, depth=10, learning_rate=0.1)

Boosting round 1 done.
Boosting round 2 done.
Boosting round 3 done.
Boosting round 4 done.
Boosting round 5 done.
Boosting round 6 done.
Boosting round 7 done.
Boosting round 8 done.
Boosting round 9 done.
Boosting round 10 done.
Boosting round 11 done.
Boosting round 12 done.
Boosting round 13 done.
Boosting round 14 done.
Boosting round 15 done.
Boosting round 16 done.
Boosting round 17 done.
Boosting round 18 done.
Boosting round 19 done.
Boosting round 20 done.
Boosting round 21 done.
Boosting round 22 done.
Boosting round 23 done.
Boosting round 24 done.
Boosting round 25 done.
Training Complete


In [12]:
#continue training
fed_model.train(subsample_cols=0.8, boosting_rounds=25, depth=10, learning_rate=0.1, gamma=0, lambda_=1)

Boosting round 1 done.
Boosting round 2 done.
Boosting round 3 done.
Boosting round 4 done.
Boosting round 5 done.
Boosting round 6 done.
Boosting round 7 done.
Boosting round 8 done.
Boosting round 9 done.
Boosting round 10 done.
Boosting round 11 done.
Boosting round 12 done.
Boosting round 13 done.
Boosting round 14 done.
Boosting round 15 done.
Boosting round 16 done.
Boosting round 17 done.
Boosting round 18 done.
Boosting round 19 done.
Boosting round 20 done.
Boosting round 21 done.
Boosting round 22 done.
Boosting round 23 done.
Boosting round 24 done.
Boosting round 25 done.
Training Complete


In [15]:
fed_model.train(subsample_cols=0.9, boosting_rounds=25, depth=15, learning_rate=0.1, gamma=0, lambda_=1)

Boosting round 1 done.
Boosting round 2 done.
Boosting round 3 done.
Boosting round 4 done.
Boosting round 5 done.
Boosting round 6 done.
Boosting round 7 done.
Boosting round 8 done.
Boosting round 9 done.
Boosting round 10 done.
Boosting round 11 done.
Boosting round 12 done.
Boosting round 13 done.
Boosting round 14 done.
Boosting round 15 done.
Boosting round 16 done.
Boosting round 17 done.
Boosting round 18 done.
Boosting round 19 done.
Boosting round 20 done.
Boosting round 21 done.
Boosting round 22 done.
Boosting round 23 done.
Boosting round 24 done.
Boosting round 25 done.
Training Complete


In [9]:
# predictions for 25 rounds
preds = fed_model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, preds)}")
print(f"Precision: {precision_score(y_test, preds)}")
print(f"Recall: {recall_score(y_test, preds)}")
print(f"F1 Score: {f1_score(y_test, preds)}")

Accuracy: 0.8432323232323232
Precision: 0.8489932885906041
Recall: 0.8295081967213115
F1 Score: 0.8391376451077943


In [None]:
# predictions for next model
preds = fed_model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, preds)}")
print(f"Precision: {precision_score(y_test, preds)}")
print(f"Recall: {recall_score(y_test, preds)}")
print(f"F1 Score: {f1_score(y_test, preds)}")

Accuracy: 0.8713131313131313
Precision: 0.858509366281387
Recall: 0.8842364532019704
F1 Score: 0.8711830131445905


In [16]:
# predictions for 3rd model
preds = fed_model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, preds)}")
print(f"Precision: {precision_score(y_test, preds)}")
print(f"Recall: {recall_score(y_test, preds)}")
print(f"F1 Score: {f1_score(y_test, preds)}")

Accuracy: 0.8644444444444445
Precision: 0.8557033454252317
Recall: 0.8715106732348111
F1 Score: 0.8635346756152126


In [14]:
# accuracy on training data
y_pred = fed_model.predict(X_train)
predictions = [round(value) for value in y_pred]
accuracy = accuracy_score(y_train, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 91.49%


In [15]:
# test xgboost classifier from pytho
model = xgb.XGBClassifier()

model.fit(X_train, y_train)
preds = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, preds)}")
print(f"Precision: {precision_score(y_test, preds)}")
print(f"Recall: {recall_score(y_test, preds)}")
print(f"F1 Score: {f1_score(y_test, preds)}")


Accuracy: 0.9418181818181818
Precision: 0.9373983739837398
Recall: 0.9450819672131148
F1 Score: 0.9412244897959183
