16 aug

Jaap-Meerhof · Aug 16, 2023 · 4770538 · 4770538
1 parent 5a3cbec
commit 4770538
Show file tree

Hide file tree

Showing 4 changed files with 122 additions and 13 deletions.
diff --git a/src/SFXGBoost/MemberShip.py b/src/SFXGBoost/MemberShip.py
@@ -54,6 +54,14 @@ def f_random(D_Train_Shadow, D_Out_Shadow):
     # print(f"X_Train_Attack = {X_Train_Attack.shape}")
     return X_Train_Attack, labels
 
+def create_D_attack_centralised(shadow_model_s, D_Train_Shadow, D_Out_Shadow):
+    # Shadow_model_s can be multiple shadow_models! TODO deal with that!
+    x, labels = f_random(D_Train_Shadow, D_Out_Shadow)
+    z = shadow_model_s.predict(x)
+    z_top_indices = np.argsort(z)[::-1][:3]
+    z = np.take(z, z_top_indices)
+    return z, labels
+
 def preform_attack_centralised(config:Config, D_Shadow, target_model, shadow_model, attack_model, X, y, fName=None) -> np.ndarray:
     """_summary_
 
@@ -77,6 +85,8 @@ def preform_attack_centralised(config:Config, D_Shadow, target_model, shadow_mod
     z_train = shadow_model.predict_proba(X_train)
     z_test = target_model.predict_proba(X) # todo test data outside
     test_x, test_label = f_random((X,y), D_Test)
+
+
     attack_model.fit(z_train, label_train)
 
     y_pred = attack_model.predict(target_model.predict_proba(test_x))

diff --git a/src/SFXGBoost/config.py b/src/SFXGBoost/config.py
@@ -13,7 +13,8 @@
 np.set_printoptions(precision=4, suppress=True)
 
 class Config:
-    def __init__(self, nameTest:str, model:str, dataset:str, lam:float, gamma:float, alpha:float, learning_rate:float, max_depth:int, max_tree:int, nBuckets:int):
+    def __init__(self, experimentName:str, nameTest:str, model:str, dataset:str, lam:float, gamma:float, alpha:float, learning_rate:float, max_depth:int, max_tree:int, nBuckets:int):
+        self.experimentName = experimentName
         self.nameTest = nameTest
         self.model = model
         self.dataset = dataset
@@ -29,6 +30,7 @@ def __init__(self, nameTest:str, model:str, dataset:str, lam:float, gamma:float,
         self.save_location= "Saves/" + nameTest + "/"
 
     def prettyprint(self):
+        print(f"experiment name = {self.experimentName}")
         print(f"test: {self.nameTest}")
         print(f"model: {self.model}")
         print(f"dataset: {self.dataset}")

diff --git a/src/SFXGBoost/view/metric_save.py b/src/SFXGBoost/view/metric_save.py
@@ -0,0 +1,23 @@
+# This file is tasked with saving all data into tables and plots by using plotter.py and table.py
+
+
+class saver:
+    def test():
+        print("test")
+# save_metrics_one_run()
+def save_metrics_one_run(target_model, shadow_model, attack_model, D_target_train, D_target_test, D_Train_Shadow, D_Train_Attack, D_Test):
+    pass
+
+    #Table 1: save accuracy, precision, f-score, degree-overfitting and tested metric name+value
+
+    # Histogram 1:
+    # save precision, 
+
+
+def save_experiment_2_one_run():
+    pass
+
+def create_plots_experiment_2():
+    pass
+
+# create_plots_and_tables_multiple_runs()
diff --git a/tests/main.py b/tests/main.py
@@ -1,13 +1,16 @@
 from SFXGBoost.config import Config, rank, comm, MyLogger
-import numpy as np
-from logging import Logger
 from SFXGBoost.Model import PARTY_ID, SFXGBoostClassifierBase, SFXGBoost
 from SFXGBoost.data_structure.databasestructure import QuantiledDataBase, DataBase
-from SFXGBoost.MemberShip import preform_attack_centralised
+from SFXGBoost.MemberShip import preform_attack_centralised, split_shadow, create_D_attack_centralised
+from SFXGBoost.common.pickler import *
+import SFXGBoost.view.metric_save as saver
+from SFXGBoost.dataset.datasetRetrieval import getDataBase
+
+import numpy as np
+from logging import Logger
 import xgboost as xgb
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.neural_network import MLPClassifier
-from SFXGBoost.common.pickler import *
 
 def log_distribution(logger, X_train, y_train, y_test):
     nTrain = len(y_train)
@@ -84,8 +87,71 @@ def test_global(config:Config, logger:Logger, model: SFXGBoostClassifierBase, ge
 POSSIBLE_PATHS = ["/data/BioGrid/meerhofj/Database/", \
                       "/home/hacker/jaap_cloud/SchoolCloud/Master Thesis/Database/", \
                       "/home/jaap/Documents/JaapCloud/SchoolCloud/Master Thesis/Database/"]
+def train_all(target_model, shadow_model_s, attack_model, config, logger):
+    X_train, y_train, X_test, y_test, fName, X_shadow, y_shadow = getDataBase(config.dataset, POSSIBLE_PATHS)()
+    log_distribution(logger, X_train, y_train, y_test)
+
+    target_model.fit(X_train, y_train, fName)
+    D_Train_Shadow, D_Out_Shadow, D_Test = split_shadow((X_shadow, y_shadow))
+    if type(shadow_model_s) == SFXGBoost:
+        shadow_model_s.fit(D_Train_Shadow[0], D_Train_Shadow[1], fName)
+    else:
+        for shadow_model in shadow_model_s:
+            shadow_model.fit(D_Train_Shadow[0], D_Train_Shadow[1], fName)  # TODO split shadow model such that they are not all the same!
+
+    z, labels = create_D_attack_centralised(shadow_model_s, D_Train_Shadow, D_Out_Shadow)
+
+    attack_model.fit(z, labels)
+
+def experiment2():
+    seed = 10
+
+    datasets = ["healtcare", "MNIST", "synthetic", "Census", "DNA", "Purchase-10", "Purhase-20", "Purchase-50", "Purchase-100"]
+    targetArchitectures = ["XGBoost", "FederBoost", ]
+    for targetArchitecture in targetArchitectures:
+
+        for dataset in datasets:
+            config = Config(experimentName= "experiment 2",
+            nameTest= dataset + " test",
+            model="normal",
+            dataset=dataset,
+            lam=0.1, # 0.1 10
+            gamma=0.5,
+            alpha=0.0,
+            learning_rate=1,
+            max_depth=5,
+            max_tree=9,
+            nBuckets=100)
+            logger = MyLogger(config).logger
+            logger.warning(config.prettyprint())
+            np.random.RandomState(seed) # TODO set seed sklearn.split
+
+            if targetArchitecture == "Federboost":
+                target_model = SFXGBoost(config, logger)
+                shadow_model = SFXGBoost(config, logger)
+                shadow_models = [SFXGBoost(config, logger) for _ in range(10)]  # 
+
+                attack_model_central = MLPClassifier(hidden_layer_sizes=(20,11,11), activation='relu', solver='adam', learning_rate_init=0.01, max_iter=2000)
+                attack_model_federated = None  # TODO define federated neural network.
+
+
+            elif targetArchitecture == "XGBoost":  # define neural network
+                target_model = xgb.XGBClassifier(ax_depth=config.max_depth, objective="multi:softmax", tree_method="approx",
+                        learning_rate=config.learning_rate, n_estimators=config.max_tree, gamma=config.gamma, reg_alpha=config.alpha, reg_lambda=config.lam)
+                shadow_model = xgb.XGBClassifier(ax_depth=config.max_depth, objective="multi:softmax", tree_method="approx",
+                        learning_rate=config.learning_rate, n_estimators=config.max_tree, gamma=config.gamma, reg_alpha=config.alpha, reg_lambda=config.lam)
+                attack_model = MLPClassifier(hidden_layer_sizes=(20,11,11), activation='relu', solver='adam', learning_rate_init=0.01, max_iter=2000)
+            else:
+                raise Exception("Wrong model types given!")
+
+            train_all(target_model, shadow_model, attack_model, config, logger)
+            saver.save_experiment_2_one_run(attack_model, )
+
+    saver.create_plots_experiment_2
+
+
 def main():
-    dataset = "healthcare"
+    dataset = "texas"
     config = Config(nameTest= dataset + " test",
            model="normal",
            dataset=dataset,
@@ -98,9 +164,8 @@ def main():
            nBuckets=100)
     logger = MyLogger(config).logger
     if rank ==0 : logger.debug(config.prettyprint())
-    from SFXGBoost.dataset.datasetRetrieval import getDataBase
     if config.model == "normal":
-        model = SFXGBoost(config, logger)
+        target_model = SFXGBoost(config, logger)
         shadow_model = SFXGBoost(config, logger)
         # shadow_model = xgb.XGBClassifier(ax_depth=config.max_depth, objective="multi:softmax", tree_method="approx",
         #                 learning_rate=0.3, n_estimators=config.max_tree, gamma=config.gamma, reg_alpha=0, reg_lambda=config.lam)
@@ -115,13 +180,22 @@ def main():
     # that way I can save the model reuse it and apply different attack_models on it.
     # TODO SFXGBoost().getGradients.
 
-    # X_train, y_train, X_test, y_test, fName, X_shadow, y_shadow = getDataBase(config.dataset, POSSIBLE_PATHS)()
-    # log_distribution(logger, X_train, y_train, y_test)
-    # model.fit(X_train, y_train, fName)
+    X_train, y_train, X_test, y_test, fName, X_shadow, y_shadow = getDataBase(config.dataset, POSSIBLE_PATHS)()
 
-    X, y, y_pred_org, y_test, model, X_shadow, y_shadow, fName = test_global(config, logger, model, getDataBase(config.dataset, POSSIBLE_PATHS))
+    log_distribution(logger, X_train, y_train, y_test)
+    target_model.fit(X_train, y_train, fName)
+    D_Train_Shadow, D_Out_Shadow, D_Test = split_shadow((X_shadow, y_shadow))
+    shadow_model.fit(D_Train_Shadow[0], D_Train_Shadow[1], fName)
+
+    z, labels = create_D_attack_centralised(shadow_model, D_Train_Shadow, D_Out_Shadow)
+
+    attack_model.fit(z, labels)
+
+    save_metrics_one_run(target_model, shadow_model, attack_model, (X_train, y_train), (X_test, y_test), D_Train_Shadow, (z, labels), D_Test)
+
+    X, y, y_pred_org, y_test, target_model, X_shadow, y_shadow, fName = test_global(config, logger, target_model, getDataBase(config.dataset, POSSIBLE_PATHS))
 
-    preform_attack_centralised(config, (X_shadow, y_shadow), model, shadow_model, attack_model, X, y, fName)
+    preform_attack_centralised(config, (X_shadow, y_shadow), target_model, shadow_model, attack_model, X, y, fName)
 
     if rank == 0:
         from sklearn.metrics import accuracy_score