In [1]:
from surrogate.neighbor import GeneticAlgorithmPermutationGenerator, calculate_feature_values
from utils.prediction import PredictionTypeWrapper, PredictionTypeWrapper, generate_local_predictions, wrap_information
from surrogate.wrapper import SurrogateWrapper
from lore.prepare_dataset import prepare_diabetes_dataset

from utils import load
from datetime import datetime
import tensorflow as tf
import numpy as np
import pandas as pd
from surrogate.evaluate import get_evaluation

### Set random seed
seed = 123
tf.random.set_seed(seed)
np.random.seed(seed)

In [2]:
df = pd.read_csv('cf_proto_result.csv')

In [3]:
### Load data
dataset = load.SelectableDataset.Diabetes
data, balanced_data, X, Y, encoder, scaler, n_features, n_classes, feature_names, target_name = load.load_dataset(dataset)
X_train, Y_train, X_test, Y_test, X_validation, Y_validation = load.load_training_data(dataset)
model = load.load_trained_model_for_dataset(dataset)

Loaded model from disk


In [4]:
diabetes_feature_range = (X_train.min(axis=0), X_train.max(axis=0))
# store all information 
local_data_dict = generate_local_predictions( X_test, Y_test, model, scaler, encoder )
# sorting by different conditions
true_positives,true_negatives, false_positives, false_negatives = wrap_information( local_data_dict )
# get all prediction types
all_predictions = PredictionTypeWrapper(true_positives,true_negatives, false_positives, false_negatives)

In [5]:
surrogate_wrapper = SurrogateWrapper(model)
diabetes_dataset = prepare_diabetes_dataset('./datasets/diabetes.csv')
diabetes_dataset['feature_values'] = calculate_feature_values(balanced_data, list(balanced_data.columns), 'Outcome', ['Pregnancies'], size=1000)

In [6]:
idx_features = diabetes_dataset['idx_features']
discrete = diabetes_dataset['discrete']
continuous = diabetes_dataset['continuous']
target_name = diabetes_dataset['class_name']
feature_values = diabetes_dataset['feature_values']

In [7]:
generator = GeneticAlgorithmPermutationGenerator(idx_features, discrete, continuous, target_name, surrogate_wrapper, scaler)

In [8]:
df.head(5)

Unnamed: 0.1,Unnamed: 0,orgin_Pregnancies,orgin_Glucose,orgin_BloodPressure,orgin_SkinThickness,orgin_Insulin,orgin_BMI,orgin_DiabetesPedigreeFunction,orgin_Age,cf_Pregnancies,cf_Glucose,cf_BloodPressure,cf_SkinThickness,cf_Insulin,cf_BMI,cf_DiabetesPedigreeFunction,cf_Age,time(sec),prediction_type
0,0,1.0,189.0,56.065574,23.0,846.0,30.1,0.391938,59.0,4.864439,77.610229,56.065575,23.0,713.106689,30.1,1.039824,59.0,7.610511,TRUE POSITIVE
1,0,1.0,180.0,0.0,0.0,0.0,43.3,0.275591,41.0,1.0,162.397217,0.0,0.0,0.0,22.18553,0.275591,41.0,8.429571,TRUE POSITIVE
2,0,8.0,179.0,67.278689,42.0,130.0,32.7,0.713901,36.0,8.0,89.772514,67.278687,45.410671,130.0,32.700001,0.713901,36.0,8.814658,TRUE POSITIVE
3,0,8.0,151.0,72.885246,32.0,210.0,42.9,0.510292,36.0,8.0,98.750587,72.885246,32.453152,210.0,42.900002,0.510292,36.0,8.336116,TRUE POSITIVE
4,0,11.0,138.0,69.147541,26.0,144.0,36.1,0.551415,50.0,10.591582,67.560814,72.238609,26.0,222.060013,36.099998,0.551415,50.0,7.659001,TRUE POSITIVE


In [9]:
# 0:42:15.884303
start = datetime.now()
result_df = get_evaluation(df, feature_values, feature_names, surrogate_wrapper, scaler, generator)
end = datetime.now()
print(end-start)


| 0 




decision node 0 : (x [Glucose] = 189.0) > 115.32575607299805)
decision node 12 : (x [Age] = 59.0) > 18.119117736816406)
decision node 16 : (x [Glucose] = 189.0) > 125.2251091003418)
decision node 0 : (x [Glucose] = 77.6102294921875) <= 115.32575607299805)
decision node 1 : (x [SkinThickness] = 23.0) <= 77.69699287414551)
decision node 2 : (x [BMI] = 30.100000381469727) <= 39.42234420776367)
decision node 3 : (x [Glucose] = 77.6102294921875) <= 79.40485763549805)

| 1 




decision node 0 : (x [Age] = 41.0) > 39.7443733215332)
decision node 12 : (x [BloodPressure] = 0.0) <= 22.10234832763672)
decision node 13 : (x [SkinThickness] = 0.0) <= 38.20528221130371)
decision node 0 : (x [Age] = 41.0) > 39.7443733215332)
decision node 12 : (x [BloodPressure] = 0.0) <= 22.10234832763672)
decision node 13 : (x [SkinThickness] = 0.0) <= 38.20528221130371)

| 2 




KeyboardInterrupt: 

In [None]:
result_df.to_csv('surrogate_proto.csv')