In [1]:
# Dependencies
import pandas as pd
import numpy as np

In [2]:
from fdt import FDT

In [3]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

# Classification

In [4]:
breast_cancer = datasets.load_breast_cancer(as_frame=True)
breast_cancer.data.shape, type(breast_cancer['data'])

((569, 30), pandas.core.frame.DataFrame)

In [5]:
X = breast_cancer['data']
Y = breast_cancer['target']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [6]:
%%time
# Initiate and train the federated decision tree
root = FDT().fit(X_train, Y_train)
result = root.predict(X_train) == Y_train
print(sum(result), len(result), sum(result) / len(result))

436 455 0.9582417582417583
CPU times: user 7.83 s, sys: 58.9 ms, total: 7.89 s
Wall time: 7.88 s


In [7]:
result = root.predict(X_test) == Y_test
print(sum(result), len(result), sum(result) / len(result))

108 114 0.9473684210526315


In [8]:
root.merge(FDT().fit(X_test, Y_test))
result = root.predict(X_test) == Y_test
print(sum(result), len(result), sum(result) / len(result))

108 114 0.9473684210526315


In [9]:
%%time
# Initiate and train the federated bayes tree
root = FDT(algo_type="bayes").fit(X_train, Y_train)
result = root.predict(X_train) == Y_train
print(sum(result), len(result), sum(result) / len(result))

455 455 1.0
CPU times: user 56.3 s, sys: 315 ms, total: 56.6 s
Wall time: 56.7 s


In [10]:
result = root.predict(X_test) == Y_test
print(sum(result), len(result), sum(result) / len(result))

108 114 0.9473684210526315


In [11]:
root.merge(FDT(algo_type="bayes").fit(X_test, Y_test))
result = root.predict(X_test) == Y_test
print(sum(result), len(result), sum(result) / len(result))

110 114 0.9649122807017544


# Regression

In [12]:
diabetes = datasets.load_diabetes(as_frame=True)
diabetes.data.shape, type(diabetes['data'])

((442, 10), pandas.core.frame.DataFrame)

In [13]:
X = diabetes['data']
Y = diabetes['target']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [14]:
%%time
# Initiate and train the federated distribution tree
root = FDT(algo_type="regression").fit(X_train, Y_train)
result = root.predict(X_train) - Y_train
print(sum(result ** 2), len(result), sum(result ** 2) / len(result))

840550.3128372148 353 2381.162359312223
CPU times: user 1.15 s, sys: 8.5 ms, total: 1.15 s
Wall time: 1.15 s


In [15]:
result = root.predict(X_test) - Y_test
print(sum(result ** 2), len(result), sum(result ** 2) / len(result))

342853.75501510437 89 3852.289382192184


In [16]:
root.merge(FDT(algo_type="regression").fit(X_test, Y_test))
result = root.predict(X_test) - Y_test
print(sum(result ** 2), len(result), sum(result ** 2) / len(result))

239876.7580940796 89 2695.2444729671865


In [17]:
%%time
# Initiate and train the federated distribution tree
root = FDT(algo_type="linear", max_depth=2).fit(X_train, Y_train)
result = root.predict(X_train) - Y_train
print(sum(result ** 2), len(result), sum(result ** 2) / len(result))

796043.6958011813 353 2255.0812912214765
CPU times: user 2.58 s, sys: 6.8 ms, total: 2.59 s
Wall time: 2.58 s


In [18]:
result = root.predict(X_test) - Y_test
print(sum(result ** 2), len(result), sum(result ** 2) / len(result))

287036.98222060344 89 3225.1346316921736


In [19]:
root.merge(FDT(algo_type="linear", max_depth=2).fit(X_test, Y_test))
result = root.predict(X_test) - Y_test
print(sum(result ** 2), len(result), sum(result ** 2) / len(result))

122731.30059249181 89 1379.0033774437281
