# # A1 Learning Fair Representations (LFR)

## Loading Libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
from sklearn.model_selection import train_test_split
import scipy.optimize as optim
from sklearn.model_selection import KFold
import time
import sys

## Cleaning Data

In [2]:
df_raw = pd.read_csv("../data/compas-scores-two-years.csv")

In [3]:
df = df_raw[['age', 'c_charge_degree', 'race', 'age_cat',
                    'score_text', 'sex', 'priors_count', 'days_b_screening_arrest',
                    'decile_score', 'is_recid', 'c_jail_in',
                    'c_jail_out', 'two_year_recid']]\
                    .query('days_b_screening_arrest <= 30')\
                    .query('days_b_screening_arrest >= -30')\
                    .query('is_recid != -1')\
                    .query('c_charge_degree != "O"')\
                    .query('score_text != "N/A"')
df

Unnamed: 0,age,c_charge_degree,race,age_cat,score_text,sex,priors_count,days_b_screening_arrest,decile_score,is_recid,c_jail_in,c_jail_out,two_year_recid
0,69,F,Other,Greater than 45,Low,Male,0,-1.0,1,0,2013-08-13 06:03:42,2013-08-14 05:41:20,0
1,34,F,African-American,25 - 45,Low,Male,0,-1.0,3,1,2013-01-26 03:45:27,2013-02-05 05:36:53,1
2,24,F,African-American,Less than 25,Low,Male,4,-1.0,4,1,2013-04-13 04:58:34,2013-04-14 07:02:04,1
5,44,M,Other,25 - 45,Low,Male,0,0.0,1,0,2013-11-30 04:50:18,2013-12-01 12:28:56,0
6,41,F,Caucasian,25 - 45,Medium,Male,14,-1.0,6,1,2014-02-18 05:08:24,2014-02-24 12:18:30,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7209,23,F,African-American,Less than 25,Medium,Male,0,-1.0,7,0,2013-11-22 05:18:27,2013-11-24 02:59:20,0
7210,23,F,African-American,Less than 25,Low,Male,0,-1.0,3,0,2014-01-31 07:13:54,2014-02-02 04:03:52,0
7211,57,F,Other,Greater than 45,Low,Male,0,-1.0,1,0,2014-01-13 05:48:01,2014-01-14 07:49:46,0
7212,33,M,African-American,25 - 45,Low,Female,3,-1.0,2,0,2014-03-08 08:06:02,2014-03-09 12:18:04,0


In [4]:
# Drop unrelated columns
df = df[(df.race=='African-American') | (df.race=='Caucasian')]
df = df.dropna()

df.shape

(5278, 13)

In [5]:
from datetime import datetime


dt1 = list(map(lambda x: datetime.strptime(x,'%Y-%m-%d %H:%M:%S').date(), df['c_jail_out']))
dt2 = list(map(lambda x: datetime.strptime(x,'%Y-%m-%d %H:%M:%S').date(), df['c_jail_in']))

len_stay = [(a-b).days for a,b in zip(dt1,dt2)]

df['length_of_stay'] = len_stay
df = df.drop(['c_jail_out', 'c_jail_in'], axis=1)


In [6]:
#rearrange columns so y is the last column
cols = df.columns.tolist()
cols = cols[:-2] + cols[-1:] + cols[-2:-1]
df = df[cols]

## Encoding features

In [7]:
label_column = ['two_year_recid']
catogory_features = []
numeric_features = []

for col in df.columns.values:
    if col in label_column:
        continue
    elif df[col].dtypes in ('int64', 'float64') :
        numeric_features += [col]
    else:
        catogory_features += [col]
        
print("categorical:", catogory_features)
print("numerical:", numeric_features)

categorical: ['c_charge_degree', 'race', 'age_cat', 'score_text', 'sex']
numerical: ['age', 'priors_count', 'days_b_screening_arrest', 'decile_score', 'is_recid', 'length_of_stay']


In [8]:
# Now we replace categorical columns with numeric values
df_num = df.copy()
feat2name = {}
encoders = {}

# Use Label Encoder for categorical columns (including target column)
for feature in catogory_features:
    encoder = LabelEncoder()
    encoder.fit(df_num[feature])
    
    df_num[feature] = encoder.transform(df_num[feature])
    
    feat2name[feature] = encoder.classes_
    encoders[feature] = encoder

# Use MinMaxScaler for numerical columns     
for feature in numeric_features:
    val = df_num[feature].values[:, np.newaxis]
    mms = MinMaxScaler().fit(val)
    df_num[feature] = mms.transform(val)
    encoders[feature] = mms
    
df_num = df_num.astype(float)




In [9]:
encoders['race'].classes_

array(['African-American', 'Caucasian'], dtype=object)

In [10]:
df_num.shape

(5278, 12)

In [11]:
df_num.head()

Unnamed: 0,age,c_charge_degree,race,age_cat,score_text,sex,priors_count,days_b_screening_arrest,decile_score,is_recid,length_of_stay,two_year_recid
1,0.258065,0.0,0.0,0.0,1.0,1.0,0.0,0.483333,0.222222,1.0,0.0125,1.0
2,0.096774,0.0,0.0,2.0,1.0,1.0,0.105263,0.483333,0.333333,1.0,0.00125,1.0
6,0.370968,0.0,1.0,0.0,2.0,1.0,0.368421,0.483333,0.555556,1.0,0.0075,1.0
8,0.33871,1.0,1.0,0.0,1.0,0.0,0.0,0.483333,0.0,0.0,0.00375,0.0
10,0.145161,0.0,1.0,0.0,1.0,1.0,0.0,0.483333,0.333333,0.0,0.00125,0.0


In [12]:
encoders['race'].classes_

array(['African-American', 'Caucasian'], dtype=object)

## Spliting Data

In [13]:
data_train, data_test = train_test_split(df_num, test_size=0.2)
data_train, data_val= train_test_split(data_train, test_size=0.2)

In [14]:
# First, we will define some of the constants and functions mentioned in the paper
N = df.shape[0]  # number of samples in X
D = df.shape[1]  # Dimension of x vector
K = 10  # Number of prototypes represented in Z


## LFR Model

The goal of LFR is to learn a good prototype set $Z$ such that:
1. the mapping from $X_0$ to $Z$ satisfies statistical parity;
2. the mapping to $Z$-space retains information in $X$ (except for membership in the protected set); and
3. the induced mapping from $X$ to $Y$ (by first mapping each $x$ probabilistically to $Z$-space, and then mapping $Z$ to $Y$) is close to f.

Each of these aims corresponds to a term in the objective function.



## (1) Define 

We define $M_{nk}$ as the probability that $x_n$ maps to $v_k$.

So,
$$M_{nk} = P(Z=k|x_n) \space\space 
=\frac{exp(-d(x_n, v_k))}{\sum_{k=1}^K exp(-d(x_n, v_k))}$$

###  Calculates the euclidean distance

$$d(x_n, v_k, \alpha) = \sum^D_{d=1} \alpha_d (x_{nd} - v_{kd})^2$$

In [15]:
def d(x1, x2, alpha):
    """
        Calculates the euclidean distance between x1 and x2 with feature weights alpha
        x1: First vector in X vector space (D, 1)
        x2: Second vector in X vector space (D, 1)
        alpha: weight vector for each of the features (D, 1)
    """
    x1 = np.matrix(x1)
    x2 = np.matrix(x2)
    alpha = np.matrix(alpha)
#     print(x1, x2, alpha)
#     print(np.multiply(np.multiply((x1 - x2), (x1 - x2)),alpha))
    return sum(np.multiply(np.multiply((x1 - x2), (x1 - x2)), alpha))[0, 0]

In [16]:
# Test 
d(np.matrix([1,2,3]).T, np.matrix([0,0,0]).T, np.matrix([1,1,2]).T)

23

In [17]:
# To save time for later, we will cache the distance map between all inputs X_i 
# and current prototypes V_k
def d_map(X, V, alpha):
    """
        Returns a 2D matrix with shape (N, K) with each cell (i, j) 
            distance from input x_i to prototype v_j with weighted features
        X: Input matrix (N, D)
        V: Prototype matrix (K, D)
        alpha: weight vector for each of the features (D, 1)
    """
    distance_map = np.zeros((X.shape[0], V.shape[0]))
    for i in range(X.shape[0]):
        for j in range(V.shape[0]):
            distance_map[i, j] = d(X[i, :], V[j, :], alpha)
            
    return distance_map

In [18]:
# Test
d_map(np.matrix([[1,2],[3,4],[6,7]]), np.matrix([[10,2],[3,40]]), np.matrix([[1.0],[1.0]]))

array([[162.,   8.],
       [ 98.,   0.],
       [ 32.,  18.]])

In [19]:
def M_nk(X, n, V, k, alpha, dist_map, summation):
    """
        Calculate the prob of X_n is classified to kth prototype using softmax
        X: Input matrix (N, D)
        n: the nth input to calculate the prob for
        V: prototype matrix (K, D)
        k: the kth prototype to classify for
        alpha: weight vector for each of the features (D, 1)
    """
    p = 0
    exponent = np.exp(-1 * dist_map[n, k])
    p = exponent / summation
    return p

In [20]:
# To save time later, we will cache the probs of each x mapped to k
def M_map(X, V, alpha):
    """
        Return the prob of each x mapping to a prototype v (N, K)
        X: Input matrix (N, D)
        V: Prototype matrix (K, D)
        alpha: weight vector for each of the features (D, 1)
    """
    M = np.zeros((X.shape[0], V.shape[0]))
    
    dist_map = d_map(X, V, alpha)
    
    for i in range(X.shape[0]):
        for j in range(V.shape[0]):
            summation = 0
            for k_idx in range(V.shape[0]):
                summation += np.exp(-1 * dist_map[i, k_idx])
            # To avoid value error
            if (summation == 0): 
                summation = 0.000001
            M[i, j] = M_nk(X, i, V, j, alpha, dist_map, summation)
    return M
    

$$M_k =\mathop{\mathbb{E}}_{x \in X} P(Z=k|x)= \frac{1}{|X|} \sum_{n \in X} M_{nk}$$



In [21]:
def M_sub_k(M_sub_map):
    """
        Calculate estimated prob of mapping to k for a subset M_map. (K,)
        M_sub_map: prob of each x mapping to a prototype (N0, K)
    """
    Ms = np.zeros(M_sub_map.shape[1])
    
    for k in range(M_sub_map.shape[1]):
        for n in range(M_sub_map.shape[0]):
            Ms[k] += M_sub_map[n, k]
        Ms[k] /= M_sub_map.shape[0]
    return Ms

## (2) Objective Function

## 
<h1 align = "center">$Total \space Loss = A_x * L_x + A_y * L_y + A_z * L_z 􏰂􏰀􏰀$<h1>

where $A_x, A_y, A_z$ are hyper-parameters governing the trade-off between the system desiderata.

## <div align='center' ><font size='5'>$L_x = \sum_{n=1}^N (x_n - \hat{x}_n)^2$</font></div>

where $$\hat{x}_n = \sum^K_{k=1}M_{nk}v_k$$

In [22]:
def L_x(X, x_hats):
    """
        Loss term for goodness of the prototype.
        X: input matrix (N, D)
        x_hats: x estimates (N, D)
    """
    Lx = 0
    for n in range(X.shape[0]):
        for d in range(X.shape[1]):
            Lx += (X[n, d] - x_hats[n, d]) * (X[n, d] - x_hats[n, d])
    return Lx

In [23]:
def x_hats(M, V):
    """
        Return a matrix of reconstructed x through M 
            using each of the prototypes. (N, D)
        M: M_map output (N, K)
        V: Prototy$$\hat{x}_n = \sum^K_{k=1}M_{nk}v_k$$pe matrix (K, D)
    """
    return np.matmul(M, V)

## <div align='center' ><font size='5'>$L_y = \sum_{n=1}^N -y_n log \hat{y}_n - (1-y_n)log(1- \hat{y}_n)$</font></div> 


$$\hat{y}_n = \sum^K_{k=1} M_{nk}w_k \\
0< w_k <1
$$

In [24]:
def L_y(ys, y_hats):
    """
        Loss term for accuracy of the model
        ys: Gound-truth ## $L_y = \sum_{n=1}^N -y_n log \hat{y}_n - (1-y_n)log(1- \hat{y}_n)$label of X (N, 1)
        y_hats: y estimates (N, 1)
    """
    Ly = 0
    for n in range(ys.shape[0]): 
        Ly += (-1 * ys[n] * np.log(y_hats[n]) - (1 - ys[n]) * (np.log(1 - y_hats[n])))
    return Ly[0,0]

In [25]:
def y_hats(M, w):
    """
        Return matrix of final estimates of each input through M and trained w.
        M: M_map output (N, K)
        w: Model weight between 0 and 1 (K, 1)
    """
    y_hat = np.zeros(M.shape[0])
    for n in range(M.shape[0]):
        for k in range(M.shape[1]):
            y_hat[n] += (M[n, k] * w[k])
        # Clipping estimates to (0, 1)
        y_hat[n] = 0.000001 if y_hat[n] <= 0 else y_hat[n]
        y_hat[n] = 0.999999 if y_hat[n] >= 1 else y_hat[n]
    return y_hat

## <div align='center' ><font size='5'>$L_z􏰂􏰀= 􏰀\sum_{k=1}^K|M_k^+-M_k^-|$􏰀</font></div> 



In order to achieve statistical parity, we want to ensure, which can be estimated using the training data as:

$$M_k^+ = M_K^-   \space   \space \space \forall k$$

where

$$M_k^+ =\mathop{\mathbb{E}}_{x \in X^+} P(Z=k|x)= \frac{1}{|X^+|} \sum_{n \in X^+} M_{nk}$$




In [26]:
def L_z(M_sens, M_nonsens):
    """
        Loss term for fairness.
        M_sens: M_sub_k for sensitive data (1, K)
        M_nonsens: M_sub_k for non-sensitive data (1, K)
    """
    Lz= 0.0
    
    for k in range(M_sens.shape[0]):
          Lz += abs(M_sens[k] - M_nonsens[k])
    return Lz

# Implementation

In [27]:
# Compute classification
def compute_error(y_hat, y):
    # we will split y_hat by 0.5
    clipped = np.clip(y_hat, 0, 1)
    rounded = np.around(clipped)
    return np.abs(rounded - y).mean()


In [28]:
compute_error(np.array([0.4, 0.6, 0.7]), np.array([1.0, 1.0, 1.0]))

0.3333333333333333

In [29]:
class LFR():
    def __init__(
        self,
        train_data,
        test_data,
        label_column,
        sensitive_column,
        privileged_group,
        k,
        A_x,
        A_y,
        A_z
    ):
        self.k = k
        self.A_x = A_x
        self.A_y = A_y
        self.A_z = A_z
        
        self.__name__ = str(k) + " " + str(A_x) + " " + str(A_y) + " " + str(A_z) 
        
        self.curr_iters = 0
        
        self.train_data = train_data
        self.test_data = test_data
        self.label_column = label_column
        self.sensitive_column = sensitive_column
        self.privileged_group = privileged_group
        
        train_copy = train_data.copy()
        train_copy.drop(columns=label_column)
        self.X = np.matrix(train_copy.to_numpy())
        self.y = np.matrix(train_data[label_column].to_numpy()).T
        
        sens = train_data[sensitive_column]
        priv_idx = np.array(np.where(sens==privileged_group))[0].flatten()
        nonpriv_idx = np.array(np.where(sens!=privileged_group))[0].flatten()
        self.X_plus = self.X[priv_idx,:]
        self.y_plus = self.y[priv_idx,:]
        self.X_minus = self.X[nonpriv_idx,:]
        self.y_minus = self.y[nonpriv_idx,:]
        
    def fit(self, init_params, maxiters=100):
        bnd = []
        for i, k2 in enumerate(init_params):
            if i < self.X.shape[1] * 2 or i >= self.X.shape[1] * 2 + self.k:
                bnd.append((None, None))
            else:
                bnd.append((0, 1))
        self.curr_param = init_params
#         return
        return optim.fmin_l_bfgs_b(self.forward, x0=init_params, epsilon=1e-5, 
                          bounds = bnd, approx_grad=True, maxfun=maxiters, maxiter=maxiters)
        
    def forward(self, params, return_params=False):
        """
            
        """
        self.curr_iters += 1
        
#         print("N_priv")

        N_priv, D = self.X_plus.shape
        N_nonpriv, _ = self.X_minus.shape

#         print("Extract")
        # Extract all params
        alpha_priv, alpha_nonpriv, w, V = self.extract_param(params)

#         print("Ms")
        M_k_p = M_map(self.X_plus, V, alpha_priv)
        M_k_n = M_map(self.X_minus, V, alpha_nonpriv)

#         print("Lz")
        Lz = L_z(M_sub_k(M_k_p), M_sub_k(M_k_n))

#         print("Xhats")
        # To save time, we will just sum the two groups up
        x_hats_p = x_hats(M_k_p, V)
        x_hats_n = x_hats(M_k_n, V)
#         print("Lx")
        L_x_p = L_x(self.X_plus, x_hats_p)
        L_x_n = L_x(self.X_minus, x_hats_n)

        Lx = L_x_p + L_x_n

#         print("Yhats")
        y_hats_p = y_hats(M_k_p, w)
        y_hats_n = y_hats(M_k_n, w)
#         print("Ly")
        L_y_p = L_y(self.y_plus, y_hats_p)
        L_y_n = L_y(self.y_minus, y_hats_n)

        Ly = L_y_p + L_y_n

#         print("Loss", Lx, Ly, Lz)
        loss = (self.A_x * Lx) + (self.A_y * Ly) + (self.A_z * Lz)

        self.curr_param = params
        if self.curr_iters % 50 == 0:
            print(
                "model:", self.__name__,
                "step:", self.curr_iters, 
                "loss:", loss, 
                "Lx:", Lx, 
                "Ly:", Ly, 
                "Lz:", Lz)
#             print("params y_hats_p, y_hats_n, M_k_p, M_k_n, loss:",
#                  y_hats_p, y_hats_n, M_k_p, M_k_n, loss)
            # Predict
            test_copy = self.test_data.copy()
            test_copy.drop(columns=[label_column])

            X_test = np.matrix(test_copy.to_numpy())
            y_test = np.matrix(self.test_data[label_column].to_numpy()).T
        
            M_k_p_val = M_map(X_test, V, alpha_priv)
            
            y_hat = y_hats(M_k_p_val, w)
            print("current error:", compute_error(y_hat, y_test))


        if return_params:
            return y_hats_p, y_hats_n, M_k_p, M_k_n, loss
        else:
            return loss
        
    def extract_param(self, params):
        
        _, D = self.X_plus.shape
        # Extract all params
        alpha_priv = params[:D].T
        alpha_nonpriv = params[D:2*D].T

        w = params[2*D:2*D+self.k]
        V = np.matrix(params[(2*D)+self.k:]).reshape((self.k, D))
        return alpha_priv, alpha_nonpriv, w, V
        
    def predict(self, X_test, priv=True):
        alpha_priv, alpha_nonpriv, w, V = self.extract_param(self.curr_param)
        
        if (priv):
            M_k_p = M_map(X_test, V, alpha_priv)
        else:
            M_k_p = M_map(X_test, V, alpha_nonpriv)
        
        return y_hats(M_k_p, w)
    
    def predict_with_param(self, X_test, param, priv=True):
        alpha_priv, alpha_nonpriv, w, V = self.extract_param(param)
        
        if (priv):
            M_k_p = M_map(X_test, V, alpha_priv)
        else:
            M_k_p = M_map(X_test, V, alpha_nonpriv)
        
        return y_hats(M_k_p, w)

In [30]:
# model.fit(init_param, maxiters=100)

In [31]:
# We see the model has reached minima at step 1000

In [32]:
label_column="two_year_recid"


In [33]:

# # Vars to store results
# cval_errs = {} # Mean validation errors
# train_time = {} # Training time
# # Best Model
# best_model = None
# # Best Validation Error
# best_err = sys.maxsize

# # Model selection 
# KS = [5]#, 10]
# Axs = [0.00000001]#, 1, 1000000]
# Ays = [0.01]#, 1, 1000]
# Azs = [1000]#, 10000, 1000000]

# train_data = data_train.copy()

# for K in KS:
#     init_param = np.random.uniform(size=df.shape[1] * 2 + K + df.shape[1] * K)
#     for Ax in Axs:
#         for Ay in Ays:    
#             for Az in Azs:
#                 kf = KFold(n_splits=5, random_state=None, shuffle=False)
#                 y_err = []

#                 start = time.time()

#                 # Cross Validaiton
#                 for train_index, val_index in kf.split(train_data):
#                 #     print("TRAIN:", train_index, "VAL:", val_index)
#                     train_copy = train_data.copy()
#                     train_copy.drop(columns=[label_column])
#                     train_df = train_copy.iloc[train_index]
                    
#                     X_val = np.matrix(train_copy.iloc[val_index].to_numpy())
#                     y_val = np.matrix(train_data.iloc[val_index][label_column].to_numpy()).T

#                     model = LFR(
#                         train_df,
#                         data_val,
#                         "two_year_recid",
#                         "race",
#                         1,
#                         K,
#                         Ax,
#                         Ay,
#                         Az
#                     )
#                     model.fit(init_param, maxiters=500)
                    
#                     y_hat = model.predict(X_val)
# #                     print(y_hat)
#                     y_err.append(compute_error(y_hat, y_val))

#                 end = time.time()

#                 print(str(K), str(Ax), str(Ay), str(Az), "mean val MAE:", np.mean(y_err))
#                 print("Time lapsed", str((end - start)*1000))

#                 # add to dict
#                 cval_errs[model.__name__] = np.mean(y_err)
#                 train_time[model.__name__] = (end - start)*1000
#                 if np.mean(y_err) < best_err:
#                     best_model = model
#                     best_err = np.mean(y_err)
#                     best_errs = y_err
                
# print(best_model.__name__, best_err)

In [34]:
# Best model

In [35]:
K = 5
label_column = "two_year_recid"
model = LFR(
    data_train,
    data_val,
    label_column,
    "race",
    1,
    K,
    0.00000001,
    0.01,
    1000
)
model.__name__

'5 1e-08 0.01 1000'

In [36]:

init_param = np.random.uniform(size=df.shape[1] * 2 + K + df.shape[1] * K)

In [37]:
model.forward(init_param)
test_copy = data_test.copy()
X_test = np.matrix(test_copy.to_numpy())
y_test = np.matrix(data_test[label_column].to_numpy()).T
y_hat = model.predict(X_test)
print("error:", compute_error(y_hat, y_test))

error: 0.5388257575757576


In [38]:
op = model.fit(init_param, maxiters=15000)

model: 5 1e-08 0.01 1000 step: 50 loss: 51.71868568473771 Lx: 12282.67876256688 Ly: 2708.8774915032964 Lz: 0.02462978794291712
current error: 0.5372781065088758
model: 5 1e-08 0.01 1000 step: 100 loss: 41.21464650695299 Lx: 12296.75780309468 Ly: 2681.1014009324526 Lz: 0.014403509530050429
current error: 0.5372781065088758
model: 5 1e-08 0.01 1000 step: 150 loss: 41.21464650685327 Lx: 12296.747831476372 Ly: 2681.1014009324526 Lz: 0.014403509530050429
current error: 0.5372781065088758
model: 5 1e-08 0.01 1000 step: 200 loss: 42.794464103048696 Lx: 12318.61055110675 Ly: 2562.600275182554 Lz: 0.017168338165117647
current error: 0.5372781065088758
model: 5 1e-08 0.01 1000 step: 250 loss: 42.79446410304962 Lx: 12318.610643361735 Ly: 2562.600275182554 Lz: 0.017168338165117647
current error: 0.5372781065088758
model: 5 1e-08 0.01 1000 step: 300 loss: 35.53218167132115 Lx: 12306.010676180164 Ly: 2627.5815812224096 Lz: 0.009256242798990288
current error: 0.5372781065088758
model: 5 1e-08 0.01 10

model: 5 1e-08 0.01 1000 step: 2550 loss: 23.36801231847854 Lx: 12269.520378797206 Ly: 2336.611392292585 Lz: 1.7757003488994005e-06
current error: 0.46272189349112425
model: 5 1e-08 0.01 1000 step: 2600 loss: 23.368006894155094 Lx: 12269.519067941103 Ly: 2336.6114117468815 Lz: 1.7700814956000954e-06
current error: 0.46272189349112425
model: 5 1e-08 0.01 1000 step: 2650 loss: 23.366557973743163 Lx: 12269.57804181509 Ly: 2336.6229675762866 Lz: 2.0560219987642014e-07
current error: 0.46272189349112425
model: 5 1e-08 0.01 1000 step: 2700 loss: 23.366557973835953 Lx: 12269.587320799681 Ly: 2336.6229675762866 Lz: 2.0560219987642014e-07
current error: 0.46272189349112425
model: 5 1e-08 0.01 1000 step: 2750 loss: 23.366556860158816 Lx: 12269.589722082117 Ly: 2336.6241467008717 Lz: 1.9269725287651696e-07
current error: 0.46272189349112425
model: 5 1e-08 0.01 1000 step: 2800 loss: 23.366523782551635 Lx: 12269.58201621593 Ly: 2336.6235284646355 Lz: 1.658020851169617e-07
current error: 0.462721893

model: 5 1e-08 0.01 1000 step: 5000 loss: 23.366371999328212 Lx: 12269.594042972953 Ly: 2336.6239426884194 Lz: 9.87650358719172e-09
current error: 0.46272189349112425
model: 5 1e-08 0.01 1000 step: 5050 loss: 23.36637199927373 Lx: 12269.589482103067 Ly: 2336.623942688798 Lz: 9.87649093064924e-09
current error: 0.46272189349112425
model: 5 1e-08 0.01 1000 step: 5100 loss: 23.36637199917762 Lx: 12269.579870752106 Ly: 2336.623942688798 Lz: 9.87649093064924e-09
current error: 0.46272189349112425
model: 5 1e-08 0.01 1000 step: 5150 loss: 23.366371999282002 Lx: 12269.589482111442 Ly: 2336.623942688551 Lz: 9.876501672057003e-09
current error: 0.46272189349112425
model: 5 1e-08 0.01 1000 step: 5200 loss: 23.36637199928168 Lx: 12269.589449663777 Ly: 2336.623942688551 Lz: 9.876501672057003e-09
current error: 0.46272189349112425
model: 5 1e-08 0.01 1000 step: 5250 loss: 23.366372122934877 Lx: 12269.589482104282 Ly: 2336.6239550547652 Lz: 9.876492401694748e-09
current error: 0.46272189349112425
mo

In [39]:
test_copy = data_test.copy()
test_copy.drop(columns=label_column)
test_X = np.matrix(test_copy.to_numpy())
test_y = np.matrix(data_test[label_column].to_numpy()).T
privileged_group = 1
sens = data_test["race"]
priv_idx = np.array(np.where(sens==privileged_group))[0].flatten()
nonpriv_idx = np.array(np.where(sens!=privileged_group))[0].flatten()
test_X_plus = test_X[priv_idx,:]
test_y_plus = test_y[priv_idx,:]
test_X_minus = test_X[nonpriv_idx,:]
test_y_minus = test_y[nonpriv_idx,:]

In [40]:
# Predict

y_hat_priv = model.predict(test_X_plus)
y_hat_nonpriv = model.predict(test_X_minus)
print("priv error:", compute_error(y_hat_priv, test_y_plus))
print("non priv error:", compute_error(y_hat_nonpriv, test_y_minus))
print("overall error:", compute_error(np.concatenate((y_hat_nonpriv, y_hat_priv)), 
                                      np.concatenate((test_y_minus, test_y_plus))))

priv error: 0.3752913752913753
non priv error: 0.5199362041467305
overall error: 0.46117424242424243


In [41]:
op

(array([0.0860446 , 0.48391339, 0.14352093, 0.2260606 , 0.06334933,
        0.62182417, 0.17559923, 0.5987066 , 0.18218406, 0.6286742 ,
        0.04404399, 0.69881137, 0.21485092, 0.46821003, 0.90281652,
        0.02003778, 0.0995888 , 0.46292028, 0.9506006 , 0.32273576,
        0.39858341, 0.62061651, 0.95609486, 0.51230091, 0.11617536,
        0.70181324, 0.21057602, 0.58888266, 0.76104689, 0.18014743,
        0.08054532, 0.00893841, 0.63332043, 0.37238256, 0.10919819,
        0.36424165, 0.44532985, 0.66402941, 0.85402702, 0.95541148,
        0.56231683, 0.18015636, 0.65636991, 0.39717764, 0.64549661,
        0.3397862 , 0.80643798, 0.16736284, 0.47670641, 0.04396053,
        0.81379983, 0.32009883, 0.51548235, 0.16167711, 0.27604056,
        0.64820925, 0.19231329, 0.20385995, 0.38391196, 0.78971249,
        0.58531676, 0.36953208, 0.31819169, 0.47968703, 0.78070219,
        0.18015894, 0.61398203, 0.88346662, 0.69913591, 0.08984385,
        0.6508417 , 0.72624455, 0.78769812, 0.18

In [42]:
out = np.array([0.76944432, 0.44201403, 0.75157162, 0.93536497, 0.47968939,
        0.96171022, 0.43356675, 0.94331112, 0.25556224, 0.77026722,
        0.7024049 , 0.38050666, 0.50203774, 0.28388108, 0.96967332,
        0.61517901, 0.35110927, 0.72631221, 0.18616932, 0.72570045,
        0.31150333, 0.39246312, 0.47864698, 0.97388464, 0.9097853 ,
        0.90803786, 0.9396043 , 0.59208026, 0.75504674, 0.69620556,
        0.6140349 , 0.14928146, 0.96429274, 0.73807815, 0.78801899,
        0.54089478, 0.76872627, 0.97631913, 0.88428702, 0.1670892 ,
        0.3597392 , 0.73960804, 0.11786876, 0.29632785, 0.90119911,
        0.93501812, 0.05906133, 0.52597812, 0.698342  , 0.1376443 ,
        0.62903134, 0.25388978, 0.8276283 , 0.63269536, 0.72433145,
        0.90955815, 0.24890163, 0.69352591, 0.04085464, 0.12961094,
        0.93352539, 0.56594276, 0.58493874, 0.79196367, 0.81338372,
        0.51545163, 0.43163521, 0.22640088, 0.95132431, 0.63158748,
        0.55031502, 0.03016215, 0.27204168, 0.05419203, 0.3846093 ,
        0.28693374, 0.23654671, 0.28052912, 0.99309054, 0.82368537,
        0.22821508, 0.26639806, 0.69196584, 0.82223622, 0.52312309,
        0.66008233, 0.69931073, 0.45596373, 0.79808916])

In [43]:
# Predict
test_copy = data_test.copy()
test_copy.drop(columns=[label_column])

X_test = np.matrix(test_copy.to_numpy())
y_test = np.matrix(data_test[label_column].to_numpy()).T
y_hat = model.predict_with_param(X_test, out)
print("error:", compute_error(y_hat, y_test))

error: 0.5388257575757576
