# Overview
This notebook provides code to reproduce FairFIS intrepretability results for a tree-based surrogate of an MLP model. This notebook provides an example on the Adult dataset. 

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
import math
from sklearn.tree import DecisionTreeRegressor
from scipy.special import expit
from scipy.stats import pearsonr
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from FairTreeFIS import fis_tree, fis_forest, fis_boosting
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier,GradientBoostingRegressor, RandomForestRegressor
from FairTreeFIS import util
import random
from scipy.special import expit
from sklearn.model_selection import train_test_split
from scipy.special import logit, expit
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.constraints import max_norm
from sklearn.metrics import r2_score

In [None]:
#Read in Data
dat = pd.read_csv("adult.csv")

In [None]:
#Set target and protected attributes
nrow = dat.shape[0]
target_sensitive_attribute = "sex"
sensitive_attribute2 = "race"
target = "income-per-year"
drop_features = [target_sensitive_attribute,sensitive_attribute2,target]

In [None]:
column_names = list(dat.columns)
column_names.remove(target_sensitive_attribute)
column_names.remove(sensitive_attribute2)
column_names.remove(target)
ncol = len(column_names)

In [None]:
train, test = train_test_split(dat, test_size=0.3 ,shuffle=True,random_state = 0)
a_train = train[target_sensitive_attribute].to_numpy()
y_train = train[target].to_numpy()
y_train = np.where(y_train != 1, 0, y_train)
train = train.drop(drop_features, axis = 1)
train = train.to_numpy()

a_test = test[target_sensitive_attribute].to_numpy()
y_test = test[target].to_numpy()
y_test = np.where(y_test != 1, 0, y_test)
test = test.drop(drop_features, axis = 1)
test = test.to_numpy()

# Fit MLP model
model = Sequential()
model.add(Dense(28, input_dim=ncol, activation='relu', kernel_initializer="uniform"))
model.add(Dropout(0.2))
model.add(Dense(20, activation='relu', kernel_constraint=max_norm(3), kernel_initializer="uniform"))
model.add(Dropout(0.2))
model.add(Dense(10, activation='relu', kernel_initializer="uniform"))
model.add(Dense(1, activation='sigmoid', kernel_initializer="uniform"))

# Compile MLP
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Fit the MLP model
model.fit(train, y_train, epochs=100, batch_size=10)
scores = model.predict(train)
train_pred = np.sign(logit(scores))


#Fit surrogate Model
clf = DecisionTreeClassifier()
clf.fit(train,train_pred)


#Compute Scores
f_forest = fis_tree(clf,train,y_train,a_train,0, triangle = False)
f_forest.calculate_fairness_importance_score()

dp_scores = f_forest._fairness_importance_score_dp_root
eo_scores = f_forest._fairness_importance_score_eqop_root
acc_scores = f_forest.fitted_clf.feature_importances_

In [None]:
data = {'Feature': column_names,
    'DP': dp_scores,
        'ACC': acc_scores,
         'EO': eo_scores}

# Create DataFrame
df = pd.DataFrame(data)