In [1]:
import sys
sys.path.append("../")

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from sklearn import feature_extraction
from sklearn import preprocessing

In [3]:
from influence.influence_model import InfluenceModel

## Binary MNIST Digits

In [4]:
mnist_dataset = tf.keras.datasets.mnist
(full_train_images, full_train_labels), (full_test_images, full_test_labels) = mnist_dataset.load_data()

train_images = full_train_images[(full_train_labels == 1) | (full_train_labels == 7)]
train_labels = full_train_labels[(full_train_labels == 1) | (full_train_labels == 7)]

test_images = full_test_images[(full_test_labels == 1) | (full_test_labels == 7)]
test_labels = full_test_labels[(full_test_labels == 1) | (full_test_labels == 7)]

train_images = train_images / 255.0
test_images = test_images / 255.0

categorical_train_labels = ((train_labels == 1).astype(np.float64).reshape((-1, 1)))
categorical_test_labels = (test_labels == 1).astype(np.float64).reshape((-1, 1))

tf.keras.backend.set_floatx("float64")

In [5]:
model = tf.keras.Sequential(
    [
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(1, use_bias=False),
    ]
)

model.compile(
    optimizer="adam",
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

model.fit(
    train_images, categorical_train_labels, epochs=10
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1996788fac8>

In [6]:
old_acc = model.evaluate(test_images, categorical_test_labels)[1]
print("Old Accuracy: ", old_acc)

Old Accuracy:  0.9944521497919556


In [7]:
influence_model = InfluenceModel(
    model,
    train_images,
    categorical_train_labels,
    test_images,
    categorical_test_labels,
    model.loss,
    dtype=np.float64,
    damping=0.2
)

In [8]:
old_weights = model.get_layer(index=1).get_weights()
print("Old weights:")
print(old_weights)

Old weights:
[array([[ 3.77276356e-02],
       [-5.55871050e-02],
       [ 2.85683995e-02],
       [ 5.78550685e-02],
       [ 5.92643348e-02],
       [-3.33398908e-02],
       [-1.48558175e-02],
       [-5.27513401e-02],
       [-5.66434159e-02],
       [-6.56164684e-02],
       [ 5.02333339e-02],
       [-4.65472741e-03],
       [ 7.78191568e-02],
       [ 6.17931255e-02],
       [ 5.88382651e-04],
       [-2.23482368e-02],
       [ 3.97003513e-02],
       [ 6.09197971e-02],
       [ 1.76735522e-02],
       [-6.31467902e-02],
       [ 4.51511671e-02],
       [-6.89897631e-02],
       [ 4.97802903e-02],
       [-8.28229778e-02],
       [-8.32911170e-02],
       [ 5.32605253e-02],
       [ 6.94888635e-03],
       [ 3.71583842e-02],
       [-6.93735193e-02],
       [ 4.87229290e-02],
       [-1.95783176e-02],
       [-2.77352813e-03],
       [ 2.43288577e-02],
       [ 4.17723203e-02],
       [ 4.29271947e-02],
       [ 1.34861783e-02],
       [-3.44116862e-02],
       [-3.83544256e-02]

In [9]:
new_weights = influence_model.get_new_parameters(8903, -0.01) # Epsilon is roughly 100 times that of leave-one-out.
model.get_layer(index=1).set_weights(new_weights)
print("New weights: ")
print(new_weights)

New weights: 
[<tf.Tensor: shape=(784, 1), dtype=float64, numpy=
array([[ 3.77276356e-02],
       [-5.55871050e-02],
       [ 2.85683995e-02],
       [ 5.78550685e-02],
       [ 5.92643348e-02],
       [-3.33398908e-02],
       [-1.48558175e-02],
       [-5.27513401e-02],
       [-5.66434159e-02],
       [-6.56164684e-02],
       [ 5.02333339e-02],
       [-4.65472741e-03],
       [ 7.78191568e-02],
       [ 6.17931255e-02],
       [ 5.88382651e-04],
       [-2.23482368e-02],
       [ 3.97003513e-02],
       [ 6.09197971e-02],
       [ 1.76735522e-02],
       [-6.31467902e-02],
       [ 4.51511671e-02],
       [-6.89897631e-02],
       [ 4.97802903e-02],
       [-8.28229778e-02],
       [-8.32911170e-02],
       [ 5.32605253e-02],
       [ 6.94888635e-03],
       [ 3.71583842e-02],
       [-6.93735193e-02],
       [ 4.87229290e-02],
       [-1.95783176e-02],
       [-2.77352813e-03],
       [ 2.43288577e-02],
       [ 4.17723203e-02],
       [ 4.29271947e-02],
       [ 1.34861783e-02],

In [10]:
print("Difference in weights: ")
print(np.subtract(new_weights, old_weights))

Difference in weights: 
[[[ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [-2.06678576e-08]
  [-3.65573722e-08]
  [-5.02313335e-09]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+

In [11]:
new_acc = model.evaluate(test_images, categorical_test_labels)[1]
print("New accuracy: ", new_acc)
print("Difference in accuracy: ", new_acc-old_acc)

New accuracy:  0.9921405455386038
Difference in accuracy:  -0.002311604253351862


## COMPAS Dataset

In [12]:
df = pd.read_csv("../data/compas-scores-two-years.csv")
df

Unnamed: 0,id,name,first,last,compas_screening_date,sex,dob,age,age_cat,race,...,v_decile_score,v_score_text,v_screening_date,in_custody,out_custody,priors_count.1,start,end,event,two_year_recid
0,1,miguel hernandez,miguel,hernandez,2013-08-14,Male,1947-04-18,69,Greater than 45,Other,...,1,Low,2013-08-14,2014-07-07,2014-07-14,0,0,327,0,0
1,3,kevon dixon,kevon,dixon,2013-01-27,Male,1982-01-22,34,25 - 45,African-American,...,1,Low,2013-01-27,2013-01-26,2013-02-05,0,9,159,1,1
2,4,ed philo,ed,philo,2013-04-14,Male,1991-05-14,24,Less than 25,African-American,...,3,Low,2013-04-14,2013-06-16,2013-06-16,4,0,63,0,1
3,5,marcu brown,marcu,brown,2013-01-13,Male,1993-01-21,23,Less than 25,African-American,...,6,Medium,2013-01-13,,,1,0,1174,0,0
4,6,bouthy pierrelouis,bouthy,pierrelouis,2013-03-26,Male,1973-01-22,43,25 - 45,Other,...,1,Low,2013-03-26,,,2,0,1102,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7209,10996,steven butler,steven,butler,2013-11-23,Male,1992-07-17,23,Less than 25,African-American,...,5,Medium,2013-11-23,2013-11-22,2013-11-24,0,1,860,0,0
7210,10997,malcolm simmons,malcolm,simmons,2014-02-01,Male,1993-03-25,23,Less than 25,African-American,...,5,Medium,2014-02-01,2014-01-31,2014-02-02,0,1,790,0,0
7211,10999,winston gregory,winston,gregory,2014-01-14,Male,1958-10-01,57,Greater than 45,Other,...,1,Low,2014-01-14,2014-01-13,2014-01-14,0,0,808,0,0
7212,11000,farrah jean,farrah,jean,2014-03-09,Female,1982-11-17,33,25 - 45,African-American,...,2,Low,2014-03-09,2014-03-08,2014-03-09,3,0,754,0,0


In [13]:
# Filters from mbilalzafar/fair-classification.
df = df.dropna(subset=["days_b_screening_arrest"]) # Dropping missing values.
idx = np.logical_and(df["days_b_screening_arrest"]<=30, df["days_b_screening_arrest"]>=-30)
idx = np.logical_and(idx, df["is_recid"] != -1)
idx = np.logical_and(idx, df["c_charge_degree"] != "O") # F: felony, M: misconduct
idx = np.logical_and(idx, df["score_text"] != "NA")
idx = np.logical_and(idx, np.logical_or(df["race"] == "African-American", df["race"] == "Caucasian"))
df = df[idx]

In [14]:
priors_count = np.reshape(preprocessing.scale(df["priors_count"]), (-1, 1))
age_cat = preprocessing.LabelBinarizer().fit(df["age_cat"]).transform(df["age_cat"])
race = preprocessing.LabelBinarizer().fit(df["race"]).transform(df["race"])
sex = preprocessing.LabelBinarizer().fit(df["sex"]).transform(df["sex"])
c_charge_degree = preprocessing.LabelBinarizer().fit(df["c_charge_degree"]).transform(df["c_charge_degree"])

In [15]:
feature_data = np.hstack((
    priors_count,
    age_cat,
    sex,
    c_charge_degree
))

In [16]:
target_data = race

In [17]:
train_idxs = range(0, 4278)
test_idxs = range(4278, 5278)

In [41]:
compas_model = tf.keras.Sequential(
    [
        tf.keras.layers.Input(shape=(6,)),
        tf.keras.layers.Dense(1, use_bias=False),
    ]
)

compas_model.compile(
    optimizer="adam",
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

compas_model.fit(
    feature_data[train_idxs], target_data[train_idxs], epochs=10
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x19970ee9b08>

In [42]:
old_compas_acc = compas_model.evaluate(feature_data[test_idxs], target_data[test_idxs])[1]
print("Old Accuracy: ", old_compas_acc)

Old Accuracy:  0.616


In [43]:
compas_influence_model = InfluenceModel(
    compas_model,
    feature_data[train_idxs],
    target_data[train_idxs],
    feature_data[test_idxs],
    target_data[test_idxs],
    compas_model.loss,
    dtype=np.float64,
    damping=0.01
)

In [44]:
old_compas_weights = compas_model.get_layer(index=0).get_weights()
print("Old weights:")
print(old_compas_weights)

Old weights:
[array([[-0.46991552],
       [-0.25010939],
       [ 0.1031139 ],
       [-0.15648047],
       [-0.01136604],
       [-0.36742988]])]


In [45]:
new_compas_weights = compas_influence_model.get_new_parameters(0, -0.1) # Epsilon is roughly 400 times that of leave-one-out.
compas_model.get_layer(index=0).set_weights(new_compas_weights)
print("New weights: ")
print(new_compas_weights)

New weights: 
[<tf.Tensor: shape=(6, 1), dtype=float64, numpy=
array([[-0.20686398],
       [-0.52716811],
       [ 0.15942617],
       [ 0.04117734],
       [-0.26664875],
       [-0.03147678]])>]


In [46]:
print("Difference in weights: ")
print(np.subtract(new_compas_weights, old_compas_weights))

Difference in weights: 
[[[ 0.26305155]
  [-0.27705871]
  [ 0.05631227]
  [ 0.19765781]
  [-0.25528271]
  [ 0.33595309]]]


In [47]:
new_compas_acc = compas_model.evaluate(feature_data[test_idxs], target_data[test_idxs])[1]
print("New accuracy: ", new_compas_acc)
print("Difference in accuracy: ", new_compas_acc-old_compas_acc)

New accuracy:  0.616
Difference in accuracy:  0.0
