In [1]:
import sys
sys.path.append("../")

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from sklearn import feature_extraction
from sklearn import preprocessing

In [3]:
from influence.influence_model import InfluenceModel

## Binary MNIST Digits

In [4]:
mnist_dataset = tf.keras.datasets.mnist
(full_train_images, full_train_labels), (full_test_images, full_test_labels) = mnist_dataset.load_data()

train_images = full_train_images[(full_train_labels == 1) | (full_train_labels == 7)]
train_labels = full_train_labels[(full_train_labels == 1) | (full_train_labels == 7)]

test_images = full_test_images[(full_test_labels == 1) | (full_test_labels == 7)]
test_labels = full_test_labels[(full_test_labels == 1) | (full_test_labels == 7)]

train_images = train_images / 255.0
test_images = test_images / 255.0

categorical_train_labels = ((train_labels == 1).astype(np.float64).reshape((-1, 1)))
categorical_test_labels = (test_labels == 1).astype(np.float64).reshape((-1, 1))

tf.keras.backend.set_floatx("float64")

In [5]:
model = tf.keras.Sequential(
    [
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(1, use_bias=False),
    ]
)

model.compile(
    optimizer="adam",
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

model.fit(
    train_images, categorical_train_labels, epochs=10
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x186d5b1b888>

In [6]:
old_acc = model.evaluate(test_images, categorical_test_labels)[1]
print("Old Accuracy: ", old_acc)

Old Accuracy:  0.9939898289412853


In [7]:
influence_model = InfluenceModel(
    model,
    train_images,
    categorical_train_labels,
    test_images,
    categorical_test_labels,
    model.loss,
    dtype=np.float64,
    damping=0.2
)

In [8]:
old_weights = model.get_layer(index=1).get_weights()
print("Old weights:")
print(old_weights)

Old weights:
[array([[ 7.93116912e-02],
       [-6.03703186e-02],
       [-1.39434146e-02],
       [ 6.45550402e-02],
       [ 1.92334945e-02],
       [ 7.48109905e-02],
       [ 4.86425006e-02],
       [-2.42976992e-02],
       [ 2.18553120e-02],
       [ 6.09921668e-02],
       [-5.03372392e-02],
       [ 7.76481861e-02],
       [-2.18303834e-02],
       [-2.34118447e-02],
       [-3.13066487e-02],
       [ 5.62807617e-02],
       [ 3.85785139e-02],
       [ 3.33911942e-02],
       [-6.90603033e-03],
       [ 8.89392779e-03],
       [ 5.10239509e-02],
       [-4.72590179e-02],
       [-6.34030671e-02],
       [-8.37583660e-02],
       [-7.26081789e-02],
       [-5.54650742e-02],
       [-8.13131411e-02],
       [ 8.46516529e-02],
       [ 1.63691437e-02],
       [-2.12698013e-02],
       [-4.90964474e-02],
       [ 6.48148994e-02],
       [ 2.10795110e-02],
       [ 5.01452563e-02],
       [ 1.91645517e-02],
       [-1.29695260e-02],
       [ 5.81218744e-02],
       [ 5.37833185e-02]

In [9]:
new_weights = influence_model.get_new_parameters(8903, -0.01) # Epsilon is roughly 100 times that of leave-one-out.
model.get_layer(index=1).set_weights(new_weights)
print("New weights: ")
print(new_weights)

New weights: 
[<tf.Tensor: shape=(784, 1), dtype=float64, numpy=
array([[ 7.93116912e-02],
       [-6.03703186e-02],
       [-1.39434146e-02],
       [ 6.45550402e-02],
       [ 1.92334945e-02],
       [ 7.48109905e-02],
       [ 4.86425006e-02],
       [-2.42976992e-02],
       [ 2.18553120e-02],
       [ 6.09921668e-02],
       [-5.03372392e-02],
       [ 7.76481861e-02],
       [-2.18303834e-02],
       [-2.34118447e-02],
       [-3.13066487e-02],
       [ 5.62807617e-02],
       [ 3.85785139e-02],
       [ 3.33911942e-02],
       [-6.90603033e-03],
       [ 8.89392779e-03],
       [ 5.10239509e-02],
       [-4.72590179e-02],
       [-6.34030671e-02],
       [-8.37583660e-02],
       [-7.26081789e-02],
       [-5.54650742e-02],
       [-8.13131411e-02],
       [ 8.46516529e-02],
       [ 1.63691437e-02],
       [-2.12698013e-02],
       [-4.90964474e-02],
       [ 6.48148994e-02],
       [ 2.10795110e-02],
       [ 5.01452563e-02],
       [ 1.91645517e-02],
       [-1.29695260e-02],

In [10]:
print("Difference in weights: ")
print(np.subtract(new_weights, old_weights))

Difference in weights: 
[[[ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [-3.38107463e-08]
  [-5.74032814e-08]
  [-7.95306489e-09]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+00]
  [ 0.00000000e+

In [11]:
new_acc = model.evaluate(test_images, categorical_test_labels)[1]
print("New accuracy: ", new_acc)
print("Difference in accuracy: ", new_acc-old_acc)

New accuracy:  0.9921405455386038
Difference in accuracy:  -0.0018492834026815341


## COMPAS Dataset

In [12]:
df = pd.read_csv("../data/compas-scores-two-years.csv")
df

Unnamed: 0,id,name,first,last,compas_screening_date,sex,dob,age,age_cat,race,...,v_decile_score,v_score_text,v_screening_date,in_custody,out_custody,priors_count.1,start,end,event,two_year_recid
0,1,miguel hernandez,miguel,hernandez,2013-08-14,Male,1947-04-18,69,Greater than 45,Other,...,1,Low,2013-08-14,2014-07-07,2014-07-14,0,0,327,0,0
1,3,kevon dixon,kevon,dixon,2013-01-27,Male,1982-01-22,34,25 - 45,African-American,...,1,Low,2013-01-27,2013-01-26,2013-02-05,0,9,159,1,1
2,4,ed philo,ed,philo,2013-04-14,Male,1991-05-14,24,Less than 25,African-American,...,3,Low,2013-04-14,2013-06-16,2013-06-16,4,0,63,0,1
3,5,marcu brown,marcu,brown,2013-01-13,Male,1993-01-21,23,Less than 25,African-American,...,6,Medium,2013-01-13,,,1,0,1174,0,0
4,6,bouthy pierrelouis,bouthy,pierrelouis,2013-03-26,Male,1973-01-22,43,25 - 45,Other,...,1,Low,2013-03-26,,,2,0,1102,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7209,10996,steven butler,steven,butler,2013-11-23,Male,1992-07-17,23,Less than 25,African-American,...,5,Medium,2013-11-23,2013-11-22,2013-11-24,0,1,860,0,0
7210,10997,malcolm simmons,malcolm,simmons,2014-02-01,Male,1993-03-25,23,Less than 25,African-American,...,5,Medium,2014-02-01,2014-01-31,2014-02-02,0,1,790,0,0
7211,10999,winston gregory,winston,gregory,2014-01-14,Male,1958-10-01,57,Greater than 45,Other,...,1,Low,2014-01-14,2014-01-13,2014-01-14,0,0,808,0,0
7212,11000,farrah jean,farrah,jean,2014-03-09,Female,1982-11-17,33,25 - 45,African-American,...,2,Low,2014-03-09,2014-03-08,2014-03-09,3,0,754,0,0


In [13]:
# Filters from mbilalzafar/fair-classification.
df = df.dropna(subset=["days_b_screening_arrest"]) # Dropping missing values.
idx = np.logical_and(df["days_b_screening_arrest"]<=30, df["days_b_screening_arrest"]>=-30)
idx = np.logical_and(idx, df["is_recid"] != -1)
idx = np.logical_and(idx, df["c_charge_degree"] != "O") # F: felony, M: misconduct
idx = np.logical_and(idx, df["score_text"] != "NA")
idx = np.logical_and(idx, np.logical_or(df["race"] == "African-American", df["race"] == "Caucasian"))
df = df[idx]

In [14]:
priors_count = np.reshape(preprocessing.scale(df["priors_count"]), (-1, 1))
age_cat = preprocessing.LabelBinarizer().fit(df["age_cat"]).transform(df["age_cat"])
race = preprocessing.LabelBinarizer().fit(df["race"]).transform(df["race"])
sex = preprocessing.LabelBinarizer().fit(df["sex"]).transform(df["sex"])
c_charge_degree = preprocessing.LabelBinarizer().fit(df["c_charge_degree"]).transform(df["c_charge_degree"])

In [15]:
feature_data = np.hstack((
    priors_count,
    age_cat,
    race,
    sex,
    c_charge_degree
))

In [16]:
target_data = np.reshape(np.array(df["two_year_recid"]), (-1, 1))

In [17]:
train_idxs = range(0, 4278)
test_idxs = range(4278, 5278)

In [18]:
compas_model = tf.keras.Sequential(
    [
        tf.keras.layers.Input(shape=(7,)),
        tf.keras.layers.Dense(1, use_bias=False, kernel_regularizer="l2"),
    ]
)

compas_model.compile(
    optimizer="adam",
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
    metrics=["accuracy"],
)

compas_model.fit(
    feature_data[train_idxs], target_data[train_idxs], epochs=30, shuffle=False
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x186d5b45ec8>

In [19]:
old_compas_acc = compas_model.evaluate(feature_data[test_idxs], target_data[test_idxs])[1]
print("Old Accuracy: ", old_compas_acc)

Old Accuracy:  0.607


In [20]:
compas_influence_model = InfluenceModel(
    compas_model,
    feature_data[train_idxs],
    target_data[train_idxs],
    feature_data[test_idxs],
    target_data[test_idxs],
    compas_model.loss,
    dtype=np.float64,
    damping=0.01
)

In [21]:
old_compas_weights = compas_model.get_layer(index=0).get_weights()
print("Old weights:")
print(old_compas_weights)

Old weights:
[array([[ 0.65683818],
       [-0.12535728],
       [-0.50870799],
       [ 0.40755013],
       [-0.16006805],
       [ 0.18326684],
       [-0.17623822]])]


In [22]:
new_compas_weights = compas_influence_model.get_new_parameters(0, -0.1) # Epsilon is roughly 400 times that of leave-one-out.
compas_model.get_layer(index=0).set_weights(new_compas_weights)
print("New weights: ")
print(new_compas_weights)

New weights: 
[<tf.Tensor: shape=(7, 1), dtype=float64, numpy=
array([[ 0.28626672],
       [ 0.3061418 ],
       [-0.42365726],
       [ 0.2184905 ],
       [-0.56202403],
       [ 0.52393232],
       [-0.53076174]])>]


In [23]:
print("Difference in weights: ")
print(np.subtract(new_compas_weights, old_compas_weights))

Difference in weights: 
[[[-0.37057146]
  [ 0.43149908]
  [ 0.08505073]
  [-0.18905963]
  [-0.40195598]
  [ 0.34066548]
  [-0.35452352]]]


In [24]:
new_compas_acc = compas_model.evaluate(feature_data[test_idxs], target_data[test_idxs])[1]
print("New accuracy: ", new_compas_acc)
print("Difference in accuracy: ", new_compas_acc-old_compas_acc)

New accuracy:  0.62
Difference in accuracy:  0.013000000000000012
