# Notebook for developing code to go into structural_attack class

In [2]:
import sys
import os


#for development use local copy of aisdc in preference to installed version
sys.path.insert(0,os.path.abspath(".."))


In [3]:
import json

import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC




from aisdc.attacks.structural_attack import StructuralAttack  # pylint: disable = import-error
from aisdc.attacks.target import Target  # pylint: disable = import-error

## helper function for test

In [20]:
def get_target(modeltype:str, **kwargs)->Target:
    """ loads dataset and creates target of the desired type"""

    X, y = load_breast_cancer(return_X_y=True, as_frame=False)
    train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3)
    
    #these types should be handled
    if modeltype =='dt':
        target_model = DecisionTreeClassifier(**kwargs)
    elif modeltype =='rf':
        target_model = RandomForestClassifier(**kwargs)
    elif modeltype =='xgb':
        target_model = XGBClassifier(**kwargs)
    # should get polite error but not DoF yet    
    elif modeltype == 'svc' :
        target_model = SVC(**kwargs)
    else:
        raise NotImplementedError('model type passed to get_model unknown')
        
    # Train the classifier
    target_model.fit(train_X, train_y)

    #  Wrap the model and data in a Target object
    target = Target(
        model=target_model)
    target.add_processed_data(train_X, train_y, test_X, test_y)
    
    return target

In [66]:
import importlib
import aisdc.attacks.structural_attack
importlib.reload(aisdc.attacks.structural_attack)
from aisdc.attacks.structural_attack import StructuralAttack

In [67]:
def test_dt():
    """ test for decision tree classifier """
    
    # 'non' disclosive'
    param_dict = {'max_depth': 1,
                  'min_samples_leaf':150
                 }
    target = get_target("dt", **param_dict)
    myattack = StructuralAttack()
    myattack.attack(target)
    assert myattack.DoF_risk ==0 ,"should be no DoF risk with devision stump"
    assert myattack.k_anonymity_risk ==0, 'should be no k-anonymity risk with min_samples_leaf 150'
    assert myattack.class_disclosure_risk ==0,'no class disclsoure risk for stump with min samles leaf 150'
    assert myattack.unnecessary_risk ==0, 'not unnecessary risk if max_depth < 3.5'
    
    # highly disclosive
    param_dict = {'max_depth': None,
                  'min_samples_leaf':1,
                  'min_samples_split': 2
                 }
    target = get_target("dt", **param_dict)
    myattack = StructuralAttack()
    myattack.attack(target)
    assert myattack.DoF_risk ==0 ,"should be no DoF risk with decision stump"
    assert myattack.k_anonymity_risk ==1, 'should be  k-anonymity risk with unlimited depth and min_samples_leaf 5'
    assert myattack.class_disclosure_risk ==1,'should be class disclsoure risk with unlimited depth and min_samples_leaf 5'
    assert myattack.unnecessary_risk ==1, ' unnecessary risk with unlimited depth and min_samples_leaf 5'

In [68]:
test_dt()

INFO:acro:version: 0.4.2
INFO:acro:config: {'safe_threshold': 10, 'safe_dof_threshold': 10, 'safe_nk_n': 2, 'safe_nk_k': 0.9, 'safe_pratio_p': 0.1, 'check_missing_values': False}
INFO:acro:automatic suppression: False
INFO:structural_attack:Thresholds for count 10 and DoF 10
INFO:acro:version: 0.4.2
INFO:acro:config: {'safe_threshold': 10, 'safe_dof_threshold': 10, 'safe_nk_n': 2, 'safe_nk_k': 0.9, 'safe_pratio_p': 0.1, 'check_missing_values': False}
INFO:acro:automatic suppression: False
INFO:structural_attack:Thresholds for count 10 and DoF 10
