In [10]:
import tensorflow as tf
import pandas as pd
import numpy as np

from utils.df_loader import load_adult_df, load_compas_df, load_german_df, load_diabetes_df, load_breast_cancer_df
from sklearn.model_selection import train_test_split
from utils.preprocessing import preprocess_df

### Disable TF2 and enable TF1 for alibi.
tf.get_logger().setLevel(40) 
tf.compat.v1.disable_v2_behavior()
tf.keras.backend.clear_session()
pd.options.mode.chained_assignment = None 

print('TF version: ', tf.__version__)
print('Eager execution enabled: ', tf.executing_eagerly()) # False


seed = 123
# tf.random.set_seed(seed)
# np.random.seed(seed)


TF version:  2.4.0-rc0
Eager execution enabled:  False


In [11]:
#### Select dataset ####

dataset_name = 'adult' # [adult, german, compas]

if dataset_name == 'adult':
    dataset_loading_fn = load_adult_df
elif dataset_name == 'german':
    dataset_loading_fn = load_german_df
elif dataset_name == 'compas':
    dataset_loading_fn = load_compas_df
elif dataset_name == 'diabetes':
    dataset_loading_fn = load_diabetes_df
elif dataset_name == 'breast_cancer':
    dataset_loading_fn = load_breast_cancer_df
else:
    raise Exception("Unsupported dataset")

In [12]:
#### Load datafram info.
df_info = preprocess_df(dataset_loading_fn)

In [13]:
### Seperate to train and test set.
train_df, test_df = train_test_split(df_info.dummy_df, train_size=.8, random_state=seed, shuffle=True)

In [14]:
### Get training and testing array.
X_train = np.array(train_df[df_info.ohe_feature_names])
y_train = np.array(train_df[df_info.target_name])
X_test = np.array(test_df[df_info.ohe_feature_names])
y_test = np.array(test_df[df_info.target_name])

In [15]:
X_test_df = pd.DataFrame(X_test, columns= df_info.ohe_feature_names)

In [16]:
from utils.preprocessing import inverse_scaling_and_dummy, inverse_scaling, inverse_dummy

In [17]:
inverse_scaling_and_dummy(X_test_df, df_info)

Unnamed: 0,age,capital-gain,capital-loss,hours-per-week,workclass,education,marital-status,occupation,relationship,race,sex,native-country
0,55.0,0.0,0.0,15.0,State-gov,Bachelors,Married-civ-spouse,Prof-specialty,Husband,White,Male,United-States
1,65.0,0.0,0.0,20.0,Private,Bachelors,Married-civ-spouse,Transport-moving,Husband,White,Male,United-States
2,29.0,0.0,0.0,40.0,Private,HS-grad,Never-married,Craft-repair,Not-in-family,White,Male,Guatemala
3,53.0,0.0,0.0,40.0,State-gov,Doctorate,Married-civ-spouse,Prof-specialty,Husband,White,Male,United-States
4,32.0,0.0,0.0,40.0,Private,Bachelors,Never-married,Adm-clerical,Not-in-family,Asian-Pac-Islander,Male,United-States
...,...,...,...,...,...,...,...,...,...,...,...,...
6508,50.0,0.0,0.0,55.0,Federal-gov,Bachelors,Divorced,Exec-managerial,Not-in-family,White,Male,United-States
6509,54.0,0.0,0.0,44.0,Private,HS-grad,Married-civ-spouse,Craft-repair,Husband,White,Male,United-States
6510,28.0,0.0,1740.0,30.0,Private,HS-grad,Married-civ-spouse,Craft-repair,Husband,White,Male,United-States
6511,45.0,15024.0,0.0,40.0,Self-emp-not-inc,Doctorate,Married-civ-spouse,Prof-specialty,Husband,White,Male,United-States


In [19]:
inverse_scaling(X_test_df, df_info)

Unnamed: 0,age,capital-gain,capital-loss,hours-per-week,workclass_Federal-gov,workclass_Local-gov,workclass_Never-worked,workclass_Private,workclass_Self-emp-inc,workclass_Self-emp-not-inc,...,native-country_Portugal,native-country_Puerto-Rico,native-country_Scotland,native-country_South,native-country_Taiwan,native-country_Thailand,native-country_Trinadad&Tobago,native-country_United-States,native-country_Vietnam,native-country_Yugoslavia
0,55.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1,65.0,0.0,0.0,20.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2,29.0,0.0,0.0,40.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,53.0,0.0,0.0,40.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,32.0,0.0,0.0,40.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6508,50.0,0.0,0.0,55.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
6509,54.0,0.0,0.0,44.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
6510,28.0,0.0,1740.0,30.0,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
6511,45.0,15024.0,0.0,40.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [20]:
inverse_dummy(X_test_df, df_info.cat_to_ohe_cat)

Unnamed: 0,age,capital-gain,capital-loss,hours-per-week,workclass,education,marital-status,occupation,relationship,race,sex,native-country
0,0.520548,0.000000,0.000000,0.142857,State-gov,Bachelors,Married-civ-spouse,Prof-specialty,Husband,White,Male,United-States
1,0.657534,0.000000,0.000000,0.193878,Private,Bachelors,Married-civ-spouse,Transport-moving,Husband,White,Male,United-States
2,0.164384,0.000000,0.000000,0.397959,Private,HS-grad,Never-married,Craft-repair,Not-in-family,White,Male,Guatemala
3,0.493151,0.000000,0.000000,0.397959,State-gov,Doctorate,Married-civ-spouse,Prof-specialty,Husband,White,Male,United-States
4,0.205479,0.000000,0.000000,0.397959,Private,Bachelors,Never-married,Adm-clerical,Not-in-family,Asian-Pac-Islander,Male,United-States
...,...,...,...,...,...,...,...,...,...,...,...,...
6508,0.452055,0.000000,0.000000,0.551020,Federal-gov,Bachelors,Divorced,Exec-managerial,Not-in-family,White,Male,United-States
6509,0.506849,0.000000,0.000000,0.438776,Private,HS-grad,Married-civ-spouse,Craft-repair,Husband,White,Male,United-States
6510,0.150685,0.000000,0.399449,0.295918,Private,HS-grad,Married-civ-spouse,Craft-repair,Husband,White,Male,United-States
6511,0.383562,0.150242,0.000000,0.397959,Self-emp-not-inc,Doctorate,Married-civ-spouse,Prof-specialty,Husband,White,Male,United-States
