**Note: Run code cells in sequential order.**

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
raw_data = pd.read_csv("Data/bank.csv", delimiter=";")

# Step 3 (1)
Based on your dataset, identify the privileged/unprivileged groups associated with each of your protected class variables.

In [2]:
# print('Protected classes: age and marital')
# print('Privileged groups: age -> old (age>45); marital -> married')
# print('Unprivileged groups: age -> young (age<=45); marital -> not married')

fairness_data = raw_data[['age', 'marital', 'y']]
fairness_data['age'] = fairness_data['age'].apply(lambda x: 'old' if x > 45 else 'young')
fairness_data['marital'] = fairness_data['marital'].apply(lambda x: 'married' if x == 'married' else 'not married')
print(fairness_data.head())

     age      marital   y
0  young      married  no
1  young      married  no
2  young  not married  no
3  young      married  no
4    old      married  no


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fairness_data['age'] = fairness_data['age'].apply(lambda x: 'old' if x > 45 else 'young')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fairness_data['marital'] = fairness_data['marital'].apply(lambda x: 'married' if x == 'married' else 'not married')


# Step 3 (2)
For each protected class variable, select two fairness metrics and compute the fairness metrics associated with your privileged/unprivileged groups as a function of each of your two dependent variables.

In [3]:
# print('Fairness metrics selected: Disparate Impact and Equal Opportunity Difference.')
# create a confusion matrix
confusion_matrix_age = pd.crosstab(fairness_data['age'], fairness_data['y'])

# calculate the number of individuals in each group
n_old = confusion_matrix_age.loc['old'].sum()
n_young = confusion_matrix_age.loc['young'].sum()

n_old_yes = confusion_matrix_age.loc['old', 'yes']
n_young_yes = confusion_matrix_age.loc['young', 'yes']

n_old_no = confusion_matrix_age.loc['old', 'no']
n_young_no = confusion_matrix_age.loc['young', 'no']

# calculate the overall proportion of individuals that were classified as 'yes'
p_yes = (n_old_yes + n_young_yes) / (n_old + n_young)

# calculate the proportion of individuals in each group that were classified as 'yes'
p_old_yes = n_old_yes / n_old
p_young_yes = n_young_yes / n_young

# calculate metrics
disparate_impact_age = p_old_yes / p_young_yes
equal_opportunity_difference_age = abs(p_old_yes - p_young_yes)
# print('Age group Disparate Impact:', disparate_impact_age)
# print('Age group Equal Opportunity Difference:', equal_opportunity_difference_age)

# create a confusion matrix
confusion_matrix_marital = pd.crosstab(fairness_data['marital'], fairness_data['y'])

# calculate the number of individuals in each group
n_married = confusion_matrix_marital.loc['married'].sum()
n_not_married = confusion_matrix_marital.loc['not married'].sum()

n_married_yes = confusion_matrix_marital.loc['married', 'yes']
n_not_married_yes = confusion_matrix_marital.loc['not married', 'yes']

n_married_no = confusion_matrix_marital.loc['married', 'no']
n_not_married_no = confusion_matrix_marital.loc['not married', 'no']

# calculate the overall proportion of individuals that were classified as 'yes'
p_yes = (n_married_yes + n_not_married_yes) / (n_married + n_not_married)

# calculate the proportion of individuals in each group that were classified as 'yes'
p_married_yes = n_married_yes / n_married
p_not_married_yes = n_not_married_yes / n_not_married

# calculate the Disparate Impact
disparate_impact_marital = p_married_yes / p_not_married_yes
equal_opportunity_difference_marital = abs(p_married_yes - p_not_married_yes)

# print the Disparate Impact and Equal Opportunity Difference
# print('Marital group Disparate Impact:', disparate_impact_marital)
# print('Marital group Equal Opportunity Difference:', equal_opportunity_difference_marital)

# Step 3 (3)
Select a pre-processing bias mitigation algorithm to transform the original dataset (e.g. Reweighting, Disparate Impact Remover, etc.) as a function of one of your dependent variables.

In [4]:
# calculate the base rate of the privileged and unprivileged groups
base_rate_priv = fairness_data.loc[fairness_data['age'] == 'old', 'y'].value_counts(normalize=True)['yes']
base_rate_unpriv = fairness_data.loc[fairness_data['age'] == 'young', 'y'].value_counts(normalize=True)['yes']

# calculate the ratio of the base rates
base_rate_ratio = base_rate_priv / base_rate_unpriv

# calculate the desired rate for the unprivileged group
unpriv_desired_rate = fairness_data.loc[fairness_data['age'] == 'young', 'y'].value_counts(normalize=True)['yes'] * base_rate_ratio

# adjust the y values for the unprivileged group
fairness_data.loc[fairness_data['age'] == 'young', 'y'] = fairness_data.loc[fairness_data['age'] == 'young', 'y'].apply(
    lambda x: 'yes' if x == 'yes' and pd.np.random.rand() < unpriv_desired_rate else 'no'
)

  lambda x: 'yes' if x == 'yes' and pd.np.random.rand() < unpriv_desired_rate else 'no'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(ilocs[0], value, pi)


# Step 3 (4)
Use the two fairness metrics identified in 3.2 and compute the fairness metrics on the transformed dataset.

In [5]:
# create a confusion matrix
confusion_matrix_age = pd.crosstab(fairness_data['age'], fairness_data['y'])

# calculate the number of individuals in each group
n_old = confusion_matrix_age.loc['old'].sum()
n_young = confusion_matrix_age.loc['young'].sum()

n_old_yes = confusion_matrix_age.loc['old', 'yes']
n_young_yes = confusion_matrix_age.loc['young', 'yes']

n_old_no = confusion_matrix_age.loc['old', 'no']
n_young_no = confusion_matrix_age.loc['young', 'no']

# calculate the overall proportion of individuals that were classified as 'yes'
p_yes = (n_old_yes + n_young_yes) / (n_old + n_young)

# calculate the proportion of individuals in each group that were classified as 'yes'
p_old_yes = n_old_yes / n_old
p_young_yes = n_young_yes / n_young

# calculate metrics
mitigated_disparate_impact_age = p_old_yes / p_young_yes
mitigated_equal_opportunity_difference_age = abs(p_old_yes - p_young_yes)
# print('Transformed dataset age group Disparate Impact:', mitigated_disparate_impact_age)
# print('Transformed dataset age group Equal Opportunity Difference:', mitigated_equal_opportunity_difference_age)

# create a confusion matrix
confusion_matrix_marital = pd.crosstab(fairness_data['marital'], fairness_data['y'])

# calculate the number of individuals in each group
n_married = confusion_matrix_marital.loc['married'].sum()
n_not_married = confusion_matrix_marital.loc['not married'].sum()

n_married_yes = confusion_matrix_marital.loc['married', 'yes']
n_not_married_yes = confusion_matrix_marital.loc['not married', 'yes']

n_married_no = confusion_matrix_marital.loc['married', 'no']
n_not_married_no = confusion_matrix_marital.loc['not married', 'no']

# calculate the overall proportion of individuals that were classified as 'yes'
p_yes = (n_married_yes + n_not_married_yes) / (n_married + n_not_married)

# calculate the proportion of individuals in each group that were classified as 'yes'
p_married_yes = n_married_yes / n_married
p_not_married_yes = n_not_married_yes / n_not_married

# calculate the Disparate Impact
mitigated_disparate_impact_marital = p_married_yes / p_not_married_yes
mitigated_equal_opportunity_difference_marital = abs(p_married_yes - p_not_married_yes)

# print the Disparate Impact and Equal Opportunity Difference
# print('Transformed dataset marital group Disparate Impact:', mitigated_disparate_impact_marital)
# print('Transformed dataset marital group Equal Opportunity Difference:', mitigated_equal_opportunity_difference_marital)

# Step 3 Output

In [6]:
print('Protected classes: age and marital')
print('Privileged groups: age -> old (age>45); marital -> married')
print('Unprivileged groups: age -> young (age<=45); marital -> not married')

print('Fairness metrics selected: Disparate Impact and Equal Opportunity Difference.')

print('Age group Disparate Impact:', disparate_impact_age)
print('Age group Equal Opportunity Difference:', equal_opportunity_difference_age)
print('Marital group Disparate Impact:', disparate_impact_marital)
print('Marital group Equal Opportunity Difference:', equal_opportunity_difference_marital)

print('Transformed dataset age group Disparate Impact:', mitigated_disparate_impact_age)
print('Transformed dataset age group Equal Opportunity Difference:', mitigated_equal_opportunity_difference_age)
print('Transformed dataset marital group Disparate Impact:', mitigated_disparate_impact_marital)
print('Transformed dataset marital group Equal Opportunity Difference:', mitigated_equal_opportunity_difference_marital)

table = pd.DataFrame({'Protected Class':['Age',"Marital"], 'Privileged Group':['Old','Married'], 'Unprivileged Group':['Young','Not Married'], 'Bias Mitigation Function':['Reweighting','Reweighting'], 'Disparate Impact':[disparate_impact_age,disparate_impact_marital], 
                      'Equal Opportunity Difference':[equal_opportunity_difference_age,equal_opportunity_difference_marital], 'Mitiaged Disparate Impact':[mitigated_disparate_impact_age,mitigated_disparate_impact_marital], 
                      'Mitiaged Equal Opportunity Difference':[mitigated_equal_opportunity_difference_age,mitigated_equal_opportunity_difference_marital]})
print(table)
table.to_csv('step3.csv', index=False)

Protected classes: age and marital
Privileged groups: age -> old (age>45); marital -> married
Unprivileged groups: age -> young (age<=45); marital -> not married
Fairness metrics selected: Disparate Impact and Equal Opportunity Difference.
Age group Disparate Impact: 1.227822355148859
Age group Equal Opportunity Difference: 0.024430949927147366
Marital group Disparate Impact: 0.6997368374780941
Marital group Equal Opportunity Difference: 0.04249664249149944
Transformed dataset age group Disparate Impact: 9.308606692523906
Transformed dataset age group Equal Opportunity Difference: 0.11752305519030526
Transformed dataset marital group Disparate Impact: 1.1510611627776748
Transformed dataset marital group Equal Opportunity Difference: 0.00727266618941242
  Protected Class Privileged Group Unprivileged Group  \
0             Age              Old              Young   
1         Marital          Married        Not Married   

  Bias Mitigation Function  Disparate Impact  Equal Opportunity D