In [97]:
import pandas as pd
!pip install aif360
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import ClassificationMetric
from aif360.algorithms.preprocessing import Reweighing
from tabulate import tabulate



In [98]:
file_path = '/home/Student_performance_data.csv'

df = pd.read_csv(file_path)
print(df.head())

   StudentID  Age  Gender  Ethnicity  ParentalEducation  StudyTimeWeekly  \
0       1001   17       1          0                  2        19.833723   
1       1002   18       0          0                  1        15.408756   
2       1003   15       0          2                  3         4.210570   
3       1004   17       1          0                  3        10.028829   
4       1005   17       1          0                  2         4.672495   

   Absences  Tutoring  ParentalSupport  Extracurricular  Sports  Music  \
0         7         1                2                0       0      1   
1         0         0                1                0       0      0   
2        26         0                2                0       0      0   
3        14         0                3                1       0      0   
4        17         1                3                0       0      0   

   Volunteering       GPA  GradeClass  
0             0  2.929196         2.0  
1             0  3

In [99]:
# 3.1
# Defining privileged and unprivileged groups
privileged_groups = {'Gender': [1], 'Ethnicity': [0]}
unprivileged_groups = {'Gender': [0], 'Ethnicity': [1, 2, 3]}

In [100]:
# 3.2
df['GradeClassBinary'] = df['GradeClass'].apply(lambda x: 1 if x in [0, 1] else 0)
df['GPABinary'] = df['GPA'].apply(lambda x: 1 if x in [0, 1] else 0)

def df_to_binary_label_dataset(df, label_name, protected_attribute_names):
    return BinaryLabelDataset(
        favorable_label=1,
        unfavorable_label=0,
        df=df,
        label_names=[label_name],
        protected_attribute_names=protected_attribute_names
    )

bld_gender_grade = df_to_binary_label_dataset(df, label_name='GradeClassBinary', protected_attribute_names=['Gender'])
bld_ethnicity_grade = df_to_binary_label_dataset(df, label_name='GradeClassBinary', protected_attribute_names=['Ethnicity'])
bld_gender_gpa = df_to_binary_label_dataset(df, label_name='GPABinary', protected_attribute_names=['Gender'])
bld_ethnicity_gpa = df_to_binary_label_dataset(df, label_name='GPABinary', protected_attribute_names=['Ethnicity'])

metric_gender_grade = ClassificationMetric(
    bld_gender_grade,
    bld_gender_grade,
    unprivileged_groups=[{'Gender': 0}],
    privileged_groups=[{'Gender': 1}]
)
disparate_impact_gender_grade = round(metric_gender_grade.disparate_impact(), 5)
statistical_parity_difference_gender_grade = round(metric_gender_grade.statistical_parity_difference(), 5)

metric_ethnicity_grade = ClassificationMetric(
    bld_ethnicity_grade,
    bld_ethnicity_grade,
    unprivileged_groups=[{'Ethnicity': 1}, {'Ethnicity': 2}, {'Ethnicity': 3}],
    privileged_groups=[{'Ethnicity': 0}]
)
disparate_impact_ethnicity_grade = round(metric_ethnicity_grade.disparate_impact(), 5)
statistical_parity_difference_ethnicity_grade = round(metric_ethnicity_grade.statistical_parity_difference(), 5)

metric_gender_gpa = ClassificationMetric(
    bld_gender_gpa,
    bld_gender_gpa,
    unprivileged_groups=[{'Gender': 0}],
    privileged_groups=[{'Gender': 1}]
)
disparate_impact_gender_gpa = round(metric_gender_gpa.disparate_impact(), 5)
statistical_parity_difference_gender_gpa = round(metric_gender_gpa.statistical_parity_difference(), 5)

metric_ethnicity_gpa = ClassificationMetric(
    bld_ethnicity_gpa,
    bld_ethnicity_gpa,
    unprivileged_groups=[{'Ethnicity': 1}, {'Ethnicity': 2}, {'Ethnicity': 3}],
    privileged_groups=[{'Ethnicity': 0}]
)
disparate_impact_ethnicity_gpa = round(metric_ethnicity_gpa.disparate_impact(), 5)
statistical_parity_difference_ethnicity_gpa = round(metric_ethnicity_gpa.statistical_parity_difference(), 5)

fairness_metrics_df = pd.DataFrame({
    'Protected Class': ['Gender', 'Gender', 'Ethnicity', 'Ethnicity', 'Gender', 'Gender', 'Ethnicity', 'Ethnicity'],
    'Outcome Variable': ['GradeClass', 'GradeClass', 'GradeClass', 'GradeClass', 'GPA', 'GPA', 'GPA', 'GPA'],
    'Fairness Metric': ['Disparate Impact', 'Statistical Parity Difference', 'Disparate Impact', 'Statistical Parity Difference', 'Disparate Impact', 'Statistical Parity Difference', 'Disparate Impact', 'Statistical Parity Difference'],
    'Value': [disparate_impact_gender_grade, statistical_parity_difference_gender_grade, disparate_impact_ethnicity_grade, statistical_parity_difference_ethnicity_grade, disparate_impact_gender_gpa, statistical_parity_difference_gender_gpa, disparate_impact_ethnicity_gpa, statistical_parity_difference_ethnicity_gpa]
})

print(tabulate(fairness_metrics_df, headers='keys', tablefmt='fancy_grid'))

╒════╤═══════════════════╤════════════════════╤═══════════════════════════════╤══════════╕
│    │ Protected Class   │ Outcome Variable   │ Fairness Metric               │    Value │
╞════╪═══════════════════╪════════════════════╪═══════════════════════════════╪══════════╡
│  0 │ Gender            │ GradeClass         │ Disparate Impact              │  1.06691 │
├────┼───────────────────┼────────────────────┼───────────────────────────────┼──────────┤
│  1 │ Gender            │ GradeClass         │ Statistical Parity Difference │  0.01018 │
├────┼───────────────────┼────────────────────┼───────────────────────────────┼──────────┤
│  2 │ Ethnicity         │ GradeClass         │ Disparate Impact              │  1.07422 │
├────┼───────────────────┼────────────────────┼───────────────────────────────┼──────────┤
│  3 │ Ethnicity         │ GradeClass         │ Statistical Parity Difference │  0.01125 │
├────┼───────────────────┼────────────────────┼───────────────────────────────┼──────────┤

In [101]:
# Step 3.3 & 3.4

# Applying reweighting to mitigate bias for Gender - GradeClass
rw_gender_grade = Reweighing(unprivileged_groups=[{'Gender': 0}], privileged_groups=[{'Gender': 1}])
rw_gender_grade.fit(bld_gender_grade)
bld_gender_grade_transf = rw_gender_grade.transform(bld_gender_grade)

# Applying reweighting to mitigate bias for Ethnicity - GradeClass
rw_ethnicity_grade = Reweighing(unprivileged_groups=[{'Ethnicity': 1}, {'Ethnicity': 2}, {'Ethnicity': 3}], privileged_groups=[{'Ethnicity': 0}])
rw_ethnicity_grade.fit(bld_ethnicity_grade)
bld_ethnicity_grade_transf = rw_ethnicity_grade.transform(bld_ethnicity_grade)

# Applying reweighting to mitigate bias for Gender - GPA
rw_gender_gpa = Reweighing(unprivileged_groups=[{'Gender': 0}], privileged_groups=[{'Gender': 1}])
rw_gender_gpa.fit(bld_gender_gpa)
bld_gender_gpa_transf = rw_gender_gpa.transform(bld_gender_gpa)

# Applying reweighting to mitigate bias for Ethnicity - GPA
rw_ethnicity_gpa = Reweighing(unprivileged_groups=[{'Ethnicity': 1}, {'Ethnicity': 2}, {'Ethnicity': 3}], privileged_groups=[{'Ethnicity': 0}])
rw_ethnicity_gpa.fit(bld_ethnicity_gpa)
bld_ethnicity_gpa_transf = rw_ethnicity_gpa.transform(bld_ethnicity_gpa)

# Computing fairness metrics for Gender - GradeClass after reweighting
metric_gender_grade_rw = ClassificationMetric(
    bld_gender_grade_transf,
    bld_gender_grade_transf,
    unprivileged_groups=[{'Gender': 0}],
    privileged_groups=[{'Gender': 1}]
)
disparate_impact_gender_grade_rw = metric_gender_grade_rw.disparate_impact()
statistical_parity_difference_gender_grade_rw = metric_gender_grade_rw.statistical_parity_difference()

# Computing fairness metrics for Ethnicity - GradeClass after reweighting
metric_ethnicity_grade_rw = ClassificationMetric(
    bld_ethnicity_grade_transf,
    bld_ethnicity_grade_transf,
    unprivileged_groups=[{'Ethnicity': 1}, {'Ethnicity': 2}, {'Ethnicity': 3}],
    privileged_groups=[{'Ethnicity': 0}]
)
disparate_impact_ethnicity_grade_rw = metric_ethnicity_grade_rw.disparate_impact()
statistical_parity_difference_ethnicity_grade_rw = metric_ethnicity_grade_rw.statistical_parity_difference()

# Computing fairness metrics for Gender - GPA after reweighting
metric_gender_gpa_rw = ClassificationMetric(
    bld_gender_gpa_transf,
    bld_gender_gpa_transf,
    unprivileged_groups=[{'Gender': 0}],
    privileged_groups=[{'Gender': 1}]
)
disparate_impact_gender_gpa_rw = metric_gender_gpa_rw.disparate_impact()
statistical_parity_difference_gender_gpa_rw = metric_gender_gpa_rw.statistical_parity_difference()

# Computing fairness metrics for Ethnicity - GPA after reweighting
metric_ethnicity_gpa_rw = ClassificationMetric(
    bld_ethnicity_gpa_transf,
    bld_ethnicity_gpa_transf,
    unprivileged_groups=[{'Ethnicity': 1}, {'Ethnicity': 2}, {'Ethnicity': 3}],
    privileged_groups=[{'Ethnicity': 0}]
)
disparate_impact_ethnicity_gpa_rw = metric_ethnicity_gpa_rw.disparate_impact()
statistical_parity_difference_ethnicity_gpa_rw = metric_ethnicity_gpa_rw.statistical_parity_difference()

fairness_metrics_df_rw = pd.DataFrame({
    'Protected Class': ['Gender', 'Gender', 'Ethnicity', 'Ethnicity', 'Gender', 'Gender', 'Ethnicity', 'Ethnicity'],
    'Outcome Variable': ['GradeClass', 'GradeClass', 'GradeClass', 'GradeClass', 'GPA', 'GPA', 'GPA', 'GPA'],
    'Fairness Metric': ['Disparate Impact', 'Statistical Parity Difference', 'Disparate Impact', 'Statistical Parity Difference', 'Disparate Impact', 'Statistical Parity Difference', 'Disparate Impact', 'Statistical Parity Difference'],
    'Value': [disparate_impact_gender_grade_rw, statistical_parity_difference_gender_grade_rw, disparate_impact_ethnicity_grade_rw, statistical_parity_difference_ethnicity_grade_rw, disparate_impact_gender_gpa_rw, statistical_parity_difference_gender_gpa_rw, disparate_impact_ethnicity_gpa_rw, statistical_parity_difference_ethnicity_gpa_rw]
})

print(tabulate(fairness_metrics_df_rw, headers='keys', tablefmt='fancy_grid'))

╒════╤═══════════════════╤════════════════════╤═══════════════════════════════╤══════════════╕
│    │ Protected Class   │ Outcome Variable   │ Fairness Metric               │        Value │
╞════╪═══════════════════╪════════════════════╪═══════════════════════════════╪══════════════╡
│  0 │ Gender            │ GradeClass         │ Disparate Impact              │  1           │
├────┼───────────────────┼────────────────────┼───────────────────────────────┼──────────────┤
│  1 │ Gender            │ GradeClass         │ Statistical Parity Difference │  2.77556e-17 │
├────┼───────────────────┼────────────────────┼───────────────────────────────┼──────────────┤
│  2 │ Ethnicity         │ GradeClass         │ Disparate Impact              │  1           │
├────┼───────────────────┼────────────────────┼───────────────────────────────┼──────────────┤
│  3 │ Ethnicity         │ GradeClass         │ Statistical Parity Difference │  2.77556e-17 │
├────┼───────────────────┼────────────────────┼───

# Step 4

In [102]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [103]:
# 4.1
# Split the data (80 - 20)
train_df, test_df = train_test_split(df, test_size=0.2, random_state=45)

X_train = train_df.drop(columns=['GradeClassBinary'])
Y_train = train_df['GradeClassBinary']

X_test = test_df.drop(columns=['GradeClassBinary'])
Y_test = test_df['GradeClassBinary']

In [104]:
#4.2
# Use random forest
classifier = RandomForestClassifier(random_state=42, n_estimators=100, max_depth=10)
classifier.fit(X_train, Y_train)

Y_predictions = classifier.predict(X_test)

accuracy =  accuracy_score(Y_test, Y_predictions)
print(accuracy)

1.0


In [105]:
conf_matrix = confusion_matrix(Y_test, Y_predictions)
print(conf_matrix)

[[403   0]
 [  0  76]]


In [106]:
# 4.3
# Use the fairness metrics

bld_gender_test = df_to_binary_label_dataset(test_df, label_name='GradeClassBinary', protected_attribute_names=['Gender'])
bld_ethnicity_test = df_to_binary_label_dataset(test_df, label_name='GradeClassBinary', protected_attribute_names=['Ethnicity'])

metric_gender_test = ClassificationMetric(
    bld_gender_test,
    bld_gender_test,
    unprivileged_groups=[{'Gender': 0}],
    privileged_groups=[{'Gender': 1}]
)
disparate_impact_gender_test = metric_gender_test.disparate_impact()
statistical_parity_difference_gender_test = metric_gender_test.statistical_parity_difference()

metric_ethnicity_test = ClassificationMetric(
    bld_ethnicity_test,
    bld_ethnicity_test,
    unprivileged_groups=[{'Ethnicity': 1}, {'Ethnicity': 2}, {'Ethnicity': 3}],
    privileged_groups=[{'Ethnicity': 0}]
)
disparate_impact_ethnicity_test = metric_ethnicity_test.disparate_impact()
statistical_parity_difference_ethnicity_test = metric_ethnicity_test.statistical_parity_difference()

In [107]:
print("Gender fairness metric results")
print(disparate_impact_gender_test)
print(statistical_parity_difference_gender_test)

print("\nEthnicity fairness metric results")
print(disparate_impact_ethnicity_test)
print(statistical_parity_difference_ethnicity_test)

Gender fairness metric results
1.1972477064220184
0.028718056873703823

Ethnicity fairness metric results
1.2490039840637452
0.034947927587894057


In [108]:
# 4.4
# transformed respect to gender
gender_transf_df = bld_gender_grade_transf.convert_to_dataframe()[0]
train_gender_transf_df, test_gender_transf_df = train_test_split(gender_transf_df, test_size=0.2, random_state=45)

X_gender_transf_train = test_gender_transf_df.drop(columns=['GradeClassBinary'])
Y_gender_transf_train = test_gender_transf_df['GradeClassBinary']
X_gender_transf_test = test_gender_transf_df.drop(columns=['GradeClassBinary'])
Y_gender_transf_test = test_gender_transf_df['GradeClassBinary']

# transformed respect to ethnicity
ethnicity_transf_df = bld_ethnicity_grade_transf.convert_to_dataframe()[0]
train_ethnicity_transf_df, test_ethnicity_transf_df = train_test_split(ethnicity_transf_df, test_size=0.2, random_state=45)

X_ethnicity_transf_train = test_ethnicity_transf_df.drop(columns=['GradeClassBinary'])
Y_ethnicity_transf_train = test_ethnicity_transf_df['GradeClassBinary']
X_ethnicity_transf_test = test_ethnicity_transf_df.drop(columns=['GradeClassBinary'])
Y_ethnicity_transf_test = test_ethnicity_transf_df['GradeClassBinary']

In [109]:
# 4.5
# gender transf
transf_gender_classifier = RandomForestClassifier(random_state=42)
transf_gender_classifier.fit(X_gender_transf_train, Y_gender_transf_train)

Y_gender_pred_tranf = transf_gender_classifier.predict(X_gender_transf_test)

gender_accuracy =  accuracy_score(Y_gender_transf_test, Y_gender_pred_tranf)
print(gender_accuracy)

# ethnicity transf
transf_ethnicity_classifier = RandomForestClassifier(random_state=42)
transf_ethnicity_classifier.fit(X_ethnicity_transf_train, Y_ethnicity_transf_train)

Y_ethnicity_pred_tranf = transf_ethnicity_classifier.predict(X_ethnicity_transf_test)

ethnicity_accuracy =  accuracy_score(Y_ethnicity_transf_test, Y_ethnicity_pred_tranf)
print(ethnicity_accuracy)

1.0
1.0


In [110]:
from sklearn.metrics import confusion_matrix, classification_report
# gender transf
conf_matrix_gender = confusion_matrix(Y_gender_transf_test, Y_gender_pred_tranf)
class_report_gender = classification_report(Y_gender_transf_test, Y_gender_pred_tranf)

# ethnicity transf
conf_matrix_ethnicity = confusion_matrix(Y_ethnicity_transf_test, Y_ethnicity_pred_tranf)
class_report_ethnicity = classification_report(Y_ethnicity_transf_test, Y_ethnicity_pred_tranf)

In [111]:
# gender transf
print("gender")
print(conf_matrix_gender)
print(class_report_gender)

# ethnicity transf
print("\ngender")
print(conf_matrix_ethnicity)
print(class_report_ethnicity)

gender
[[403   0]
 [  0  76]]
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       403
         1.0       1.00      1.00      1.00        76

    accuracy                           1.00       479
   macro avg       1.00      1.00      1.00       479
weighted avg       1.00      1.00      1.00       479


gender
[[403   0]
 [  0  76]]
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       403
         1.0       1.00      1.00      1.00        76

    accuracy                           1.00       479
   macro avg       1.00      1.00      1.00       479
weighted avg       1.00      1.00      1.00       479



In [112]:
# 4.6
# Use the fairness metrics for the transf dataset

bld_gender_transf_test = df_to_binary_label_dataset(test_gender_transf_df, label_name='GradeClassBinary', protected_attribute_names=['Gender'])
bld_ethnicity_transf_test = df_to_binary_label_dataset(ethnicity_transf_df, label_name='GradeClassBinary', protected_attribute_names=['Ethnicity'])

metric_gender_transf_test = ClassificationMetric(
    bld_gender_transf_test,
    bld_gender_transf_test,
    unprivileged_groups=[{'Gender': 0}],
    privileged_groups=[{'Gender': 1}]
)
disparate_impact_gender_transf_test = metric_gender_transf_test.disparate_impact()
statistical_parity_difference_gender_transf_test = metric_gender_transf_test.statistical_parity_difference()

metric_ethnicity_transf_test = ClassificationMetric(
    bld_ethnicity_transf_test,
    bld_ethnicity_transf_test,
    unprivileged_groups=[{'Ethnicity': 1}, {'Ethnicity': 2}, {'Ethnicity': 3}],
    privileged_groups=[{'Ethnicity': 0}]
)
disparate_impact_ethnicity_transf_test = metric_ethnicity_transf_test.disparate_impact()
statistical_parity_difference_ethnicity_transf_test = metric_ethnicity_transf_test.statistical_parity_difference()

In [113]:
print("Gender fairness metric results using transf dataset")
print(disparate_impact_gender_transf_test)
print(statistical_parity_difference_gender_transf_test)

print("\nEthnicity fairness metric results using transf dataset")
print(disparate_impact_ethnicity_transf_test)
print(statistical_parity_difference_ethnicity_transf_test)

Gender fairness metric results using transf dataset
1.1972477064220184
0.028718056873703823

Ethnicity fairness metric results using transf dataset
1.0742247123654056
0.01125362250444839
