## Imports

In [1]:
%pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [2]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.tree import DecisionTreeClassifier, export_text
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler, OneHotEncoder
import warnings
warnings.filterwarnings('ignore')

## Data Preprocessing

In [3]:
# Adult dataset
adult = fetch_ucirepo(id=2)

X = adult.data.features
y = adult.data.targets

y['income'] = y['income'].apply(lambda y: y.replace(".", ""))

df = pd.concat([X, y], axis=1)
df = df.drop(['fnlwgt', 'education'], axis = "columns")

In [4]:
df

Unnamed: 0,age,workclass,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income
0,39,State-gov,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,39,Private,13,Divorced,Prof-specialty,Not-in-family,White,Female,0,0,36,United-States,<=50K
48838,64,,9,Widowed,,Other-relative,Black,Male,0,0,40,United-States,<=50K
48839,38,Private,13,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,50,United-States,<=50K
48840,44,Private,13,Divorced,Adm-clerical,Own-child,Asian-Pac-Islander,Male,5455,0,40,United-States,<=50K


In [5]:
# Grouping into 2 label groups <=50k (negative) and >50k (positive)
y['income'].unique()

array(['<=50K', '>50K'], dtype=object)

In [6]:
# Assuming all are protected except White
X['race'].unique()

array(['White', 'Black', 'Asian-Pac-Islander', 'Amer-Indian-Eskimo',
       'Other'], dtype=object)

In [8]:
# Handle missing values (replace with most frequent value for categorical, mean for numerical)
for column in df.columns:
    if df[column].dtype == object:
        df[column] = df[column].fillna(df[column].mode()[0])
    else:
        df[column] = df[column].fillna(df[column].mean())

# Encode categorical variables
ohe = OneHotEncoder()
df = pd.concat([df.drop(['workclass', 'marital-status', 'occupation', 'relationship', 'native-country', 'education-num'], axis = "columns"), pd.DataFrame(ohe.fit_transform(df[['workclass', 'marital-status', 'occupation', 'relationship', 'native-country', 'education-num']]).toarray())], axis = "columns")
le = LabelEncoder()
income_class_mapping = dict()
race_class_mapping = dict()
for column in df.columns:
    if df[column].dtype == object:
        df[column] = le.fit_transform(df[column])
        if(column == "income"):
          income_class_mapping = {class_label: index for index, class_label in enumerate(le.classes_)}
        if(column == "race"):
          race_class_mapping = {class_label: index for index, class_label in enumerate(le.classes_)}


# Standardize numerical features
numerical_features = ['age', 'hours-per-week', 'capital-gain', 'capital-loss']
scaler = StandardScaler()
df[numerical_features] = scaler.fit_transform(df[numerical_features])

# # Min-Max normalize education-num
# minmax_scaler = MinMaxScaler()
# df['education-num'] = minmax_scaler.fit_transform(df[['education-num']])

df.columns = df.columns.astype(str)

# Separate features and target again
X = df.drop('income', axis=1)
y = df['income']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=212)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (32724, 101)
X_test shape: (16118, 101)
y_train shape: (32724,)
y_test shape: (16118,)


## KNN with Custom Distance Functions

In [22]:
def custom_distance(x1, x2, X_train, means, mads, numeric_columns, ordinal_columns):
    distance = 0
    for i in range(len(x1)):
        column_name = X_train.columns[i]

        # Interval-scaled features (numeric)
        if column_name in numeric_columns:
            mean = means[column_name]
            mad = mads[column_name]

            if mad == 0:
                z1 = z2 = 0
            else:
                z1 = (x1[i] - mean) / mad
                z2 = (x2[i] - mean) / mad

            distance += abs(z1 - z2)

        # Ordinal features (numeric but ordered)
        elif column_name in ordinal_columns:
            min_value = X_train[column_name].min()
            max_value = X_train[column_name].max()

            if max_value == min_value:
                z1 = z2 = 0
            else:
                z1 = (x1[i] - min_value) / (max_value - min_value)
                z2 = (x2[i] - min_value) / (max_value - min_value)

            distance += abs(z1 - z2)

        # Nominal (categorical/string) features
        else:
            if x1[i] == x2[i]:
                distance += 0
            else:
                distance += 1

    return distance

# Wrapper function for scikit-learn's KNN to work with custom distances
class CustomKNN:
    def __init__(self, k=16):
        self.k = k
        self.knn = NearestNeighbors(n_neighbors=k, metric=self.custom_metric)

    def fit(self, X_train, y_train):
        """Store the training data"""
        self.X_train = X_train
        self.y_train = y_train

        # Calculate means and MADs for numeric columns
        self.numeric_columns = X_train.select_dtypes(include=[np.number]).columns
        self.ordinal_columns = ['satisfaction_level']  # Example of ordinal columns
        self.means = X_train[self.numeric_columns].mean()
        self.mads = X_train[self.numeric_columns].apply(lambda x: (x - x.mean()).abs().mean())

        # Fit the KNN model using the training data
        self.knn.fit(X_train, y_train)

    def custom_metric(self, x1, x2):
        """Custom distance metric passed to sklearn's KNN"""
        return custom_distance(x1, x2, self.X_train, self.means, self.mads, self.numeric_columns, self.ordinal_columns)

    def predict(self, X_test):
        """Predict using the trained KNN model"""
        return self.knn.predict(X_test)

## Compute Discrimination

In [23]:
def compute_discrimination(y_pred, X_test, sensitive_attribute, protected_values, negative_labels):
    """
    Computes discrimination in model predictions.

    Args:
        y_pred: The labels.
        X_test: The test dataset features.
        sensitive_attribute: The name of the sensitive attribute (e.g., 'race').
        protected_values: List of values of the sensitive attribute representing the protected group.
        negative_labels: Negative Labels in the dataset

    Returns:
        The discrimination score (difference in positive label probability).
    """

    # Create masks for protected and unprotected groups based on the sensitive attribute
    protected_mask = X_test[sensitive_attribute].isin(protected_values)
    unprotected_mask = ~protected_mask

    # Ensure neither group is empty
    if sum(protected_mask) == 0 or sum(unprotected_mask) == 0:
        return 0  # Avoid division by zero if either group is empty

    # Calculate the positive rate for both groups
    p2 = 1 - (sum(pd.Series(y_pred[protected_mask]).isin(negative_labels)) / sum(protected_mask))
    p1 = 1 - (sum(pd.Series(y_pred[unprotected_mask]).isin(negative_labels)) / sum(unprotected_mask))

    # Calculate the discrimination score
    return p1 - p2

In [None]:
def calculate_t_discrimination_percentage(R, t, k, sensitive_attr, protected_value, label_attr, positive_label):
    """
    Calculates the percentage of t-discriminated instances in the dataset R.

    Parameters:
    - R: pandas DataFrame containing the dataset.
    - t: Threshold value for diff(r).
    - k: Number of nearest neighbors.
    - sensitive_attr: Name of the sensitive attribute in R.
    - protected_value: Value of the sensitive attribute that identifies the protected group.
    - label_attr: Name of the label attribute in R.
    - positive_label: Value of the label that represents the positive class (⊕).

    Returns:
    - percentage: The percentage of t-discriminated instances among the protected group.
    - num_t_discriminated: Number of t-discriminated instances.
    - total_protected: Total number of instances in the protected group.
    """
    # Split R into protected group P(R) and unprotected group U(R)
    P_R = R[R[sensitive_attr] == protected_value].reset_index(drop=True)
    U_R = R[R[sensitive_attr] != protected_value].reset_index(drop=True)

    # Feature columns (exclude sensitive attribute and label)
    feature_cols = [col for col in R.columns if col not in [sensitive_attr, label_attr]]

    # Initialize variables
    num_t_discriminated = 0
    total_protected = len(P_R)

    # Precompute NearestNeighbors models
    if len(P_R) > 1:
        nbrs_P = NearestNeighbors(n_neighbors=min(k+1, len(P_R))).fit(P_R[feature_cols])
    else:
        nbrs_P = None  # Not enough data for neighbors
    if len(U_R) >= 1:
        nbrs_U = NearestNeighbors(n_neighbors=min(k, len(U_R))).fit(U_R[feature_cols])
    else:
        nbrs_U = None  # No unprotected instances

    # Iterate over each protected instance
    for idx, r in P_R.iterrows():
        x_r = r[feature_cols].values.reshape(1, -1)
        dec_r = r[label_attr]

        # Compute p1 (protected neighbors)
        if nbrs_P and len(P_R) > 1:
            distances_P, indices_P = nbrs_P.kneighbors(x_r)
            indices_P = indices_P[0]
            # Exclude the instance itself if present
            indices_P = indices_P[indices_P != idx]
            k_P = min(k, len(indices_P))
            if k_P > 0:
                neighbor_labels_P = P_R.iloc[indices_P[:k_P]][label_attr].values
                p1 = np.sum(neighbor_labels_P == dec_r) / k_P
            else:
                p1 = 0
        else:
            p1 = 0

        # Compute p2 (unprotected neighbors)
        if nbrs_U and len(U_R) >= 1:
            distances_U, indices_U = nbrs_U.kneighbors(x_r)
            indices_U = indices_U[0]
            k_U = min(k, len(indices_U))
            neighbor_labels_U = U_R.iloc[indices_U[:k_U]][label_attr].values
            p2 = np.sum(neighbor_labels_U == dec_r) / k_U
        else:
            p2 = 0

        diff_r = p1 - p2

        # Check t-discrimination conditions
        if (dec_r == positive_label) and (diff_r >= t):
            num_t_discriminated += 1

    # Calculate percentage
    if total_protected > 0:
        percentage = (num_t_discriminated / total_protected) * 100
    else:
        percentage = 0

    return percentage, num_t_discriminated, total_protected

## Discrimination Discovery

In [24]:
def DiscoveryN(R, t, k, sensitive_attr, protected_values, label_attr, negative_labels):
    """
    Implements the DiscoveryN method for discrimination discovery.

    Parameters:
    - R: pandas DataFrame containing the dataset.
    - t: Threshold value for diff(r).
    - k: Number of nearest neighbors.
    - sensitive_attr: Name of the sensitive attribute in R.
    - protected_values: List of values of the sensitive attribute that identify the protected group.
    - label_attr: Name of the label attribute in R.
    - negative_labels: List of values of the label that represents the negative class.

    Returns:
    - L: DataFrame containing records from the protected group with 'disc' attribute.
    """
    # Split R into protected group P(R) and unprotected group U(R)
    P_R = R[R[sensitive_attr].isin(protected_values)].reset_index(drop=True)
    U_R = R[~R[sensitive_attr].isin(protected_values)].reset_index(drop=True)

    L_records = []

    # Feature columns (exclude sensitive attribute, label, and 'disc' if present)
    feature_cols = [col for col in R.columns if col not in [sensitive_attr, label_attr, 'disc']]

    # Precompute NearestNeighbors models for U(R)
    if len(U_R) >= k:
        nbrs_U = NearestNeighbors(n_neighbors=k).fit(U_R[feature_cols])
    else:
        nbrs_U = NearestNeighbors(n_neighbors=len(U_R)).fit(U_R[feature_cols])

    # Iterate over each record in P(R)
    for idx, r in P_R.iterrows():
        # Prepare P(R) excluding r
        P_R_excl_r = P_R.drop(idx).reset_index(drop=True)

        x_r = r[feature_cols].values.reshape(1, -1)

        # Neighbors in P(R)\{r}
        k_P = min(k, len(P_R_excl_r))
        if k_P > 0:
            nbrs_P = NearestNeighbors(n_neighbors=k_P).fit(P_R_excl_r[feature_cols])
            distances_P, indices_P = nbrs_P.kneighbors(x_r)
            ksetP_labels = P_R_excl_r.iloc[indices_P[0]][label_attr].values
            p1 = np.sum(ksetP_labels == r[label_attr]) / k_P
        else:
            p1 = 0

        # Neighbors in U(R)
        k_U = min(k, len(U_R))
        if k_U > 0:
            distances_U, indices_U = nbrs_U.kneighbors(x_r)
            ksetU_labels = U_R.iloc[indices_U[0]][label_attr].values
            p2 = np.sum(ksetU_labels == r[label_attr]) / k_U
        else:
            p2 = 0

        diff = p1 - p2

        # Determine 'disc' attribute based on conditions
        if (r[label_attr] in negative_labels) and diff >= t:
            r['disc'] = 'yes'
        else:
            r['disc'] = 'no'

        L_records.append(r)

    # Create DataFrame L from records
    L = pd.DataFrame(L_records)
    return L

In [184]:
# Parameters
t = 0.1
k = 16
sensitive_attr = 'race'
protected_values = [race_class_mapping[v] for v in ['Black', 'Asian-Pac-Islander', 'Amer-Indian-Eskimo', 'Other']]
label_attr = 'income'
negative_labels = [income_class_mapping[v] for v in ['<=50K']]

# Run DiscoveryN
L = DiscoveryN(df, t, k, sensitive_attr, protected_values, label_attr, negative_labels)

In [185]:
X_L = L.drop(['disc'], axis=1)
y_L = L['disc']


# Split L into train and test sets
X_L_train, X_L_test, y_L_train, y_L_test = train_test_split(X_L, y_L, test_size=0.2, random_state=42)

# Train a DecisionTreeClassifier on L
clf = DecisionTreeClassifier()
clf.fit(X_L_train, y_L_train)

# Make predictions on the test set
y_L_pred = clf.predict(X_L_test)

# Evaluate the classifier
accuracy = accuracy_score(y_L_test, y_L_pred)
precision = precision_score(y_L_test, y_L_pred, pos_label='yes')
recall = recall_score(y_L_test, y_L_pred, pos_label='yes')
f1 = f1_score(y_L_test, y_L_pred, pos_label='yes')

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-score: {f1}")

Accuracy: 0.8912429378531074
Precision: 0.4873417721518987
Recall: 0.5133333333333333
F1-score: 0.5


## Discrimation Prevention

In [158]:
def PreventionN(T, V, t, k, sensitive_attr, protected_values, label_attr, negative_labels, classifierName = "DT"):
    """
    Implements the PreventionN method.

    Parameters:
    - T: pandas DataFrame representing the training dataset.
    - V: pandas DataFrame representing the validation dataset.
    - t: Threshold value for diff(r).
    - k: Number of nearest neighbors.
    - sensitive_attr: Name of the sensitive attribute in T.
    - protected_values: Value of the sensitive attribute that identifies the protected group.
    - label_attr: Name of the label attribute in T.
    - negative_labels: List of the labels that represents the negative class.

    Returns:
    - classifier_T: Classifier trained on the original training dataset T.
    - classifier_T_prime: Classifier trained on the modified training dataset T'.
    - performance: A dictionary containing accuracy scores on V for both classifiers.
    """
    # Copy of the training data for T'
    T_prime = T.copy()

    # Split T into protected and unprotected groups
    P_T = T[T[sensitive_attr].isin(protected_values)].reset_index(drop=True)
    U_T = T[~T[sensitive_attr].isin(protected_values)].reset_index(drop=True)

    # Feature columns (exclude sensitive attribute and label)
    feature_cols = [col for col in T.columns if col not in [sensitive_attr, label_attr]]

    # Precompute NearestNeighbors models
    if len(P_T) > 1:
        nbrs_P = NearestNeighbors(n_neighbors=min(k, len(P_T)-1)).fit(P_T[feature_cols])
    else:
        nbrs_P = None  # Not enough data
    if len(U_T) >= k:
        nbrs_U = NearestNeighbors(n_neighbors=k).fit(U_T[feature_cols])
    else:
        nbrs_U = NearestNeighbors(n_neighbors=len(U_T)).fit(U_T[feature_cols])

    # Iterate over each record in T
    for idx, r in T.iterrows():
        # Check if record is protected
        is_protected = r[sensitive_attr] in protected_values

        x_r = r[feature_cols].values.reshape(1, -1)
        dec_r = r[label_attr]

        # Compute diff(r) only if necessary
        if is_protected:
            # Neighbors in P_T excluding r
            if nbrs_P and len(P_T) > 1:
                P_T_excl_r = P_T.drop(P_T.index[idx if idx < len(P_T) else -1]).reset_index(drop=True)
                k_P = min(k, len(P_T_excl_r))
                if k_P > 0:
                    nbrs_P_excl = NearestNeighbors(n_neighbors=k_P).fit(P_T_excl_r[feature_cols])
                    distances_P, indices_P = nbrs_P_excl.kneighbors(x_r)
                    ksetP_labels = P_T_excl_r.iloc[indices_P[0]][label_attr].values
                    p1 = np.sum(ksetP_labels == dec_r) / k_P
                else:
                    p1 = 0
            else:
                p1 = 0

            # Neighbors in U_T
            k_U = min(k, len(U_T))
            if k_U > 0:
                distances_U, indices_U = nbrs_U.kneighbors(x_r)
                ksetU_labels = U_T.iloc[indices_U[0]][label_attr].values
                p2 = np.sum(ksetU_labels == dec_r) / k_U
            else:
                p2 = 0

            diff = p1 - p2
        else:
            diff = 0  # For unprotected, diff is not computed or used

        # Conditions to modify the decision attribute in T_prime
        if (dec_r in negative_labels) and is_protected and (diff >= t):
            T_prime.at[idx, label_attr] = income_class_mapping[">50K"]

    # Build classifiers on T and T_prime
    X_T = T[feature_cols]
    y_T = T[label_attr]

    X_T_prime = T_prime[feature_cols]
    y_T_prime = T_prime[label_attr]

    if(classifierName == "DT"):
      classifier_T = DecisionTreeClassifier()
    elif(classifierName == "NB"):
      classifier_T = GaussianNB()
    elif(classifierName == "LR"):
      classifier_T = LogisticRegression()
    classifier_T.fit(X_T, y_T)

    if(classifierName == "DT"):
      classifier_T_prime = DecisionTreeClassifier()
    elif(classifierName == "NB"):
      classifier_T_prime = GaussianNB()
    elif(classifierName == "LR"):
      classifier_T_prime = LogisticRegression()
    classifier_T_prime.fit(X_T_prime, y_T_prime)

    # Evaluate classifiers on validation set V
    X_V = V[feature_cols]
    y_V = V[label_attr]

    y_pred_T = classifier_T.predict(X_V)
    y_pred_T_prime = classifier_T_prime.predict(X_V)

    performance = {
        'accuracy_T': accuracy_score(y_V, y_pred_T),
        'discrimination_T': compute_discrimination(y_pred_T, pd.concat([X_V, V[["race"]]], axis = 1), sensitive_attr, protected_values, negative_labels),
        'accuracy_T_prime': accuracy_score(y_V, y_pred_T_prime),
        'discrimination_T_prime': compute_discrimination(y_pred_T_prime, pd.concat([X_V, V[["race"]]], axis = 1), sensitive_attr, protected_values, negative_labels),
    }

    return classifier_T, classifier_T_prime, performance

### Experiments

In [159]:
# Parameters
t = 0.1
k = 16
sensitive_attr = 'race'
protected_values = [race_class_mapping[v] for v in ['Black', 'Asian-Pac-Islander', 'Amer-Indian-Eskimo', 'Other']]
label_attr = 'income'
negative_labels = [income_class_mapping[v] for v in ['<=50K']]

In [160]:
# 0.1
classifier_T, classifier_T_prime, performance = PreventionN(pd.concat([X_train, y_train], axis = 1), pd.concat([X_test, y_test], axis = 1), t, k, sensitive_attr, protected_values, label_attr, negative_labels, "DT")
performance

{'accuracy_T': 0.8190842536294826,
 'discrimination_T': 0.07828135382990264,
 'accuracy_T_prime': 0.8121355006824668,
 'discrimination_T_prime': 0.06827703697773779}

In [149]:
# 0.05
classifier_T, classifier_T_prime, performance = PreventionN(pd.concat([X_train, y_train], axis = 1), pd.concat([X_test, y_test], axis = 1), t, k, sensitive_attr, protected_values, label_attr, negative_labels, "DT")
performance

{'accuracy_T': 0.8191462960665095,
 'discrimination_T': 0.07753308882333965,
 'accuracy_T_prime': 0.797555527981139,
 'discrimination_T_prime': 0.03853929970037187}

In [163]:
# 0.1
classifier_T, classifier_T_prime, performance = PreventionN(pd.concat([X_train, y_train], axis = 1), pd.concat([X_test, y_test], axis = 1), t, k, sensitive_attr, protected_values, label_attr, negative_labels, "NB")
performance

{'accuracy_T': 0.7931877404144434,
 'discrimination_T': 0.15347792050517528,
 'accuracy_T_prime': 0.773188779004839,
 'discrimination_T_prime': 0.12583982867821875}


In [152]:
# 0.05
classifier_T, classifier_T_prime, performance = PreventionN(pd.concat([X_train, y_train], axis = 1), pd.concat([X_test, y_test], axis = 1), t, k, sensitive_attr, protected_values, label_attr, negative_labels, "NB")
performance

{'accuracy_T': 0.7931877404144434,
 'discrimination_T': 0.15347792050517534,
 'accuracy_T_prime': 0.7515758779004839,
 'discrimination_T_prime': 0.0759279597587092}


In [162]:
# 0.1
classifier_T, classifier_T_prime, performance = PreventionN(pd.concat([X_train, y_train], axis = 1), pd.concat([X_test, y_test], axis = 1), t, k, sensitive_attr, protected_values, label_attr, negative_labels, "LR")
performance

{'accuracy_T': 0.8520908301278074,
 'discrimination_T': 0.08467820474445498,
 'accuracy_T_prime': 0.8503536418910534,
 'discrimination_T_prime': 0.06655782309453706}

In [151]:
# 0.05
classifier_T, classifier_T_prime, performance = PreventionN(pd.concat([X_train, y_train], axis = 1), pd.concat([X_test, y_test], axis = 1), t, k, sensitive_attr, protected_values, label_attr, negative_labels, "LR")
performance

{'accuracy_T': 0.8520908301278074,
 'discrimination_T': 0.08467820474445495,
 'accuracy_T_prime': 0.8471894776026803,
 'discrimination_T_prime': 0.04200343402639514}