In [1]:
import pandas as pd
pd.set_option("display.max_columns", None)  # sets the max
import numpy as np
import scipy

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")


from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import recall_score, f1_score, accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import StandardScaler

rs = 1  # random state seed for reproducibility

from pathlib import Path
import pickle

# Fair PCA Experiments



## Functions



### Fair PCA

In [2]:
def fair_PCA(X, protected_features, n_components):

    Z = np.copy(protected_features)

    # Removing the mean from the protected features
    Z = Z-np.mean(Z)

    # Finding the orthonormal null-space spanned by Z^t X

    R = scipy.linalg.null_space(np.matmul(Z.T, X))
    
    # Finding the orthonormal eigenvectors of R^T X^T X R
    vals, L = scipy.linalg.eig(np.linalg.multi_dot([np.transpose(R), np.transpose(X), X, R]))

    #sort by eigen values
    idx = vals.argsort()[::-1]
    L = L[:,idx]


    # Finding the projection matrix
    U = np.matmul(R, L[:n_components])

    # Projecting our data into fair space and returning X'
    return U, np.matmul(np.transpose(U),np.transpose(X))

### Fairness metrics (Equalized odds)

In [3]:
def fpr_and_tpr(cm):
    TN = cm[0][0]
    FN = cm[1][0]
    FP = cm[0][1]
    TP = cm[1][1]

    TPR = TP/(TP+FN)
    FPR = FP/(FP+TN)

    return FPR, TPR

def equalized_odds(
        model,
        X,
        y,
        groups,
        group_protected,
        group_non_protected
    ):
    
    X_protected = X[groups == group_protected]
    y_protected = y[groups == group_protected]
    predictions_protected = model.predict(X_protected)


    X_non_protected = X[groups == group_non_protected]
    y_non_protected = y[groups == group_non_protected]
    predictions_non_protected = model.predict(X_non_protected)

    cm_protected = confusion_matrix(y_protected, predictions_protected)
    cm_non_protected = confusion_matrix(y_non_protected, predictions_non_protected)

    FPR_protected, TPR_protected = fpr_and_tpr(cm_protected)
    FPR_non_protected, TPR_non_protected = fpr_and_tpr(cm_non_protected)

    return pd.DataFrame({"FPR":[FPR_protected, FPR_non_protected], "TPR":[TPR_protected,TPR_non_protected]}, index=[group_protected,group_non_protected])


### Main experiment class

In [4]:
class Experiment:
    def __init__(
            self, 
            data,
            course,
            grade_threshold,
            test_ratio,
            random_state
        ):

        self.course = course
        self.random_state = random_state
        self.test_ratio = test_ratio

        self.data = data[data["course"] == self.course]

        self.target = data[data["course"] == self.course]["G3"].apply(lambda x: 0 if x < grade_threshold else 1)

        self.groups = data[data["course"] == self.course]["SES"]

        self.protected_variables = [
            "internet",
            "traveltime",
            "address",
            "Mjob",
            "Fjob",
            "Medu",
            "Fedu",
            "SES"
        ]

        _groups_and_protected = data[data["course"] == self.course][self.protected_variables]
        self.groups_and_protected = pd.get_dummies(
            _groups_and_protected,
            prefix=None,
            prefix_sep="_",
            dummy_na=False,
            columns=[
                "traveltime",
                "address",
                "Mjob",
                "Fjob",
                "Medu",
                "Fedu"
            ],
            drop_first=False
        )

        self.standard_scaler = StandardScaler()

    def baseline_data_prep(self):

        one_hot_cols =[
            "school",
            "sex",
            "age",
            "address",
            "famsize",
            "Pstatus",
            "Mjob",
            "Fjob",
            "reason",
            "guardian"
        ]

        _data = pd.get_dummies(
                self.data,
                prefix=None,
                prefix_sep="_",
                dummy_na=False,  # dont add a column for missing values
                columns=one_hot_cols,  # the columns we create the dummies for
                drop_first=True,  # IMPORTANT to have true! removes the first dummy indicator. This is done to avoid multicollinearity. The category removed is indicated when all other dummy categories are 0.
            )

        _data = _data.replace({
            "schoolsup": {"no":False, "yes":True},
            "famsup": {"no":False, "yes":True},
            "paid": {"no":False, "yes":True},
            "activities": {"no":False, "yes":True},
            "nursery": {"no":False, "yes":True},
            "higher": {"no":False, "yes":True},
            "internet": {"no":False, "yes":True},
            "romantic": {"no":False, "yes":True},
        })

        _data = _data[_data["course"] == self.course][[
            'Medu', 'Fedu', 'traveltime', 'studytime', 'failures',
            'schoolsup', 'famsup', 'paid', 'activities', 'nursery', 'higher',
            'internet', 'romantic', 'famrel', 'freetime', 'goout', 'Dalc', 'Walc',
            'health', 'absences', 'school_MS', 'sex_M', 'age_16',
            'age_17', 'age_18', 'age_19', 'age_20', 'age_21', 'age_22', 'address_U',
            'famsize_LE3', 'Pstatus_T', 'Mjob_health', 'Mjob_other',
            'Mjob_services', 'Mjob_teacher', 'Fjob_health', 'Fjob_other',
            'Fjob_services', 'Fjob_teacher', 'reason_home', 'reason_other',
            'reason_reputation', 'guardian_mother', 'guardian_other'
            ]
        ]

        self.X_train, self.X_test, self.y_train, self.y_test, self.group_train, self.group_test = train_test_split(
            _data,
            self.target,
            self.groups,
            test_size=self.test_ratio,
            random_state=self.random_state
        )


    def no_protected_data_prep(self):

        one_hot_cols =[
            "school",
            "sex",
            "age",
            "famsize",
            "Pstatus",
            "reason",
            "guardian"
        ]

        _data = pd.get_dummies(
                self.data,
                prefix=None,
                prefix_sep="_",
                dummy_na=False,  # dont add a column for missing values
                columns=one_hot_cols,  # the columns we create the dummies for
                drop_first=True,  # IMPORTANT to have true! removes the first dummy indicator. This is done to avoid multicollinearity. The category removed is indicated when all other dummy categories are 0.
            )

        _data = _data.replace({
            "schoolsup": {"no":False, "yes":True},
            "famsup": {"no":False, "yes":True},
            "paid": {"no":False, "yes":True},
            "activities": {"no":False, "yes":True},
            "nursery": {"no":False, "yes":True},
            "higher": {"no":False, "yes":True},
            "romantic": {"no":False, "yes":True},
        })

        _data = _data[_data["course"] == self.course][[
            'studytime', 'failures','schoolsup', 'famsup', 'paid', 'activities', 
            'nursery', 'higher', 'romantic', 'famrel', 'freetime',
            'goout', 'Dalc', 'Walc', 'health', 'absences', 'school_MS', 'sex_M', 'age_16',
            'age_17', 'age_18', 'age_19', 'age_20', 'age_21', 'age_22', 
            'famsize_LE3', 'Pstatus_T', 'reason_home', 'reason_other',
            'reason_reputation', 'guardian_mother', 'guardian_other'
            ]
        ]

        self.X_train, self.X_test, self.y_train, self.y_test, self.group_train, self.group_test = train_test_split(
            _data,
            self.target,
            self.groups,
            test_size=self.test_ratio,
            random_state=self.random_state
        )


    def fair_pca_data_prep(self):
        one_hot_cols =[
            "school",
            "sex",
            "age",
            "famsize",
            "Pstatus",
            "reason",
            "guardian"
        ]

        _data = pd.get_dummies(
                self.data,
                prefix=None,
                prefix_sep="_",
                dummy_na=False,  # dont add a column for missing values
                columns=one_hot_cols,  # the columns we create the dummies for
                drop_first=True,  # IMPORTANT to have true! removes the first dummy indicator. This is done to avoid multicollinearity. The category removed is indicated when all other dummy categories are 0.
            )

        _data = _data.replace({
            "schoolsup": {"no":False, "yes":True},
            "famsup": {"no":False, "yes":True},
            "paid": {"no":False, "yes":True},
            "activities": {"no":False, "yes":True},
            "nursery": {"no":False, "yes":True},
            "higher": {"no":False, "yes":True},
            "romantic": {"no":False, "yes":True},
        })

        _data = _data[_data["course"] == self.course][[
            'studytime', 'failures','schoolsup', 'famsup', 'paid', 'activities', 
            'nursery', 'higher', 'romantic', 'famrel', 'freetime',
            'goout', 'Dalc', 'Walc', 'health', 'absences', 'school_MS', 'sex_M', 'age_16',
            'age_17', 'age_18', 'age_19', 'age_20', 'age_21', 'age_22', 
            'famsize_LE3', 'Pstatus_T', 'reason_home', 'reason_other',
            'reason_reputation', 'guardian_mother', 'guardian_other'
            ]
        ]

        self.X_train, self.X_test, self.y_train, self.y_test, self.group_train, self.group_test = train_test_split(
            _data,
            self.target,
            self.groups_and_protected,
            test_size=self.test_ratio,
            random_state=self.random_state
        )


    def train_and_test(
            self,
            model,
            model_parameters,
            eval
        ):

        _clf = model(random_state=self.random_state)
        self.clf = GridSearchCV(_clf, model_parameters, scoring=eval)
        self.clf.fit(self.X_train, self.y_train)

        
        self.predictions = self.clf.predict(self.X_test)

        self.performance_metrics = {
            "accuracy":accuracy_score(self.y_test, self.predictions),
            "recall":recall_score(self.y_test, self.predictions),
            "f1":f1_score(self.y_test, self.predictions)
        }

        self.fairness_metrics = equalized_odds(
            self.clf, 
            self.X_test,
            self.y_test,
            self.group_test,
            group_protected="lower",
            group_non_protected="middle/rich"
        )

#        print(self.performance_metrics)
#        print(self.fairness_metrics)

    def apply_fairpca(
            self,
            protected_features
        ):
        
        _X_train_standardized = self.standard_scaler.fit_transform(self.X_train)


        self.n_principal_components = _X_train_standardized.shape[1]

        self.projection_matrix, self.components_train = fair_PCA(_X_train_standardized, protected_features.to_numpy(), self.n_principal_components)


    def train_and_test_pca(
            self,
            model,
            model_parameters,
            eval,
            protected_features_to_suppress
        ):
        
        _clf = model(random_state=self.random_state)
        self.clf = GridSearchCV(_clf, model_parameters, scoring=eval)


        _features_to_suppress = self.group_train[protected_features_to_suppress].replace({"SES":{"lower":0,"middle/rich":1}})

        self.apply_fairpca(
            _features_to_suppress
        )


        self.clf.fit(self.components_train.T, self.y_train)

        _X_test_standard = self.standard_scaler.transform(self.X_test)

        self.components_test = np.matmul(_X_test_standard, self.projection_matrix)
        self.predictions = self.clf.predict(self.components_test)

        self.performance_metrics = {
            "accuracy":accuracy_score(self.y_test, self.predictions),
            "recall":recall_score(self.y_test, self.predictions),
            "f1":f1_score(self.y_test, self.predictions)
        }

        self.fairness_metrics = equalized_odds(
            self.clf, 
            self.components_test,
            self.y_test,
            self.group_test["SES"],
            group_protected="lower",
            group_non_protected="middle/rich"
        )

#        print(self.performance_metrics)
#        print(self.fairness_metrics)


### Run all experiments function

In [5]:
def run_experiments(
        data,
        grade_thresholds,
        test_ratio,
        random_state,
        model,
        parameters,
        eval,
        proxies_to_suppress
    ):

    courses = ["math", "portuguese"]

    experiments = list()

    
    for grade in grade_thresholds:
        for course in courses:

            # Baseline
            experiment = Experiment(
                data = data, 
                course = course, 
                grade_threshold = grade, 
                test_ratio = test_ratio, 
                random_state = random_state
            )
            experiment.baseline_data_prep()
            experiment.train_and_test(
                model = model, 
                model_parameters= parameters,
                eval=eval
            )


            experiments.append({
                "name":"baseline",
                "grade_threshold":grade,
                "course":course,
                "model":experiment.clf,
                "performance_metrics":experiment.performance_metrics,
                "fairness_metrics":experiment.fairness_metrics,
                "experiment_object":experiment,
                "suppressed_variables":None
            })

            # print(experiments[-1]["name"])
            # print(f"course: {course}, grade_threshold: {grade}")
            # print(f"peformance: {experiment.performance_metrics}")
            # print(f"fairness:   {experiment.fairness_metrics}")
            # print(f"suppressed proxies {None}")
            # print()

            # No proxies
            experiment = Experiment(
                data = data, 
                course = course, 
                grade_threshold = grade, 
                test_ratio = test_ratio, 
                random_state = random_state
            )
            experiment.no_protected_data_prep()
            experiment.train_and_test(
                model = model, 
                model_parameters= parameters,
                eval=eval
            )

            experiments.append({
                "name":"no_proxies",
                "grade_threshold":grade,
                "course":course,
                "model":experiment.clf,
                "performance_metrics":experiment.performance_metrics,
                "fairness_metrics":experiment.fairness_metrics,
                "experiment_object":experiment,
                "suppressed_variables":None
            })

            # print(experiments[-1]["name"])
            # print(f"course: {course}, grade_threshold: {grade}")
            # print(f"peformance: {experiment.performance_metrics}")
            # print(f"fairness:   {experiment.fairness_metrics}")
            # print(f"suppressed proxies {None}")
            # print()

            # Fair PCA Gradual

            currently_suppressed = list()

            for proxy in proxies_to_suppress:

                currently_suppressed.append(proxy)

                experiment = Experiment(
                    data = data, 
                    course = course, 
                    grade_threshold = grade, 
                    test_ratio = test_ratio, 
                    random_state = random_state
                )
                experiment.fair_pca_data_prep()
                experiment.train_and_test_pca(
                    model = model, 
                    model_parameters= parameters,
                    eval=eval,
                    protected_features_to_suppress=currently_suppressed
                )

                output_proxies = [i for i in currently_suppressed]

                experiments.append({
                    "name":"fairpca",
                    "grade_threshold":grade,
                    "course":course,
                    "model":experiment.clf,
                    "performance_metrics":experiment.performance_metrics,
                    "fairness_metrics":experiment.fairness_metrics,
                    "experiment_object":experiment,
                    "suppressed_variables":output_proxies
                })

                # print(experiments[-1]["name"])
                # print(f"course: {course}, grade_threshold: {grade}")
                # print(f"peformance: {experiment.performance_metrics}")
                # print(f"fairness:   {experiment.fairness_metrics}")
                # print(f"suppressed proxies {currently_suppressed}")
                # print()

            # Fair PCA SES
            experiment = Experiment(
                data = data, 
                course = course, 
                grade_threshold = grade, 
                test_ratio = test_ratio, 
                random_state = random_state
            )
            experiment.fair_pca_data_prep()
            experiment.train_and_test_pca(
                model = model, 
                model_parameters= parameters,
                eval=eval,
                protected_features_to_suppress=["SES"]
            )

            experiments.append({
                "name":"fairpca",
                "grade_threshold":grade,
                "course":course,
                "model":experiment.clf,
                "performance_metrics":experiment.performance_metrics,
                "fairness_metrics":experiment.fairness_metrics,
                "experiment_object":experiment,
                "suppressed_variables":"SES"
            })
            
            # print(experiments[-1]["name"])
            # print(f"course: {course}, grade_threshold: {grade}")
            # print(f"peformance: {experiment.performance_metrics}")
            # print(f"fairness:   {experiment.fairness_metrics}")
            # print(f"suppressed proxies SES")
            # print()

    return experiments

## Data Prep

In [6]:
df = pd.read_csv("data/all_students_and_SES.csv")

features = df.columns
df.head()

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,reason,guardian,traveltime,studytime,failures,schoolsup,famsup,paid,activities,nursery,higher,internet,romantic,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3,course,G_mean,SES_score,SES
0,GP,F,18,U,GT3,A,4,4,at_home,teacher,course,mother,2,2,0,yes,no,no,no,yes,yes,no,no,4,3,4,1,1,3,6,5,6,6,math,5.666667,-0.733077,middle/rich
1,GP,F,17,U,GT3,T,1,1,at_home,other,course,father,1,2,0,no,yes,no,no,no,yes,yes,no,5,3,3,1,1,3,4,5,5,6,math,5.333333,0.176471,lower
2,GP,F,15,U,LE3,T,1,1,at_home,other,other,mother,1,2,3,yes,no,yes,no,yes,yes,yes,no,4,3,2,2,3,3,10,7,8,10,math,8.333333,0.176471,lower
3,GP,F,15,U,GT3,T,4,2,health,services,home,mother,1,3,0,no,yes,yes,yes,yes,yes,yes,yes,3,2,2,1,1,5,2,15,14,15,math,14.666667,-1.234671,middle/rich
4,GP,F,16,U,GT3,T,3,3,other,other,home,father,1,2,0,no,yes,yes,no,yes,yes,no,no,4,3,2,1,2,5,4,6,10,10,math,8.666667,-0.323278,middle/rich


## Experiments

In [7]:
rfc_parameters = {
    "n_estimators": [10, 20, 50, 75, 100, 1000],
    "max_depth": [5,10,15,20, None]
}
svc_parameters = {
    'kernel':["linear","poly","rbf"],
    'C':[0.001, 0.01, 0.1, 1, 10, 100],
    'gamma':["scale"]
}

In [8]:
experiments = run_experiments(
    data=df,
    grade_thresholds=[10, 13],
    test_ratio=0.4,
    random_state=rs,
    model=SVC,
    parameters=svc_parameters,
    eval="f1",
    proxies_to_suppress=[
        "address_R",
        "Medu_1",
        "Fedu_1",
        "Mjob_at_home",
        "Fjob_other",
        "traveltime_2"
    ]
)

In [9]:
file_path = Path("./results/experiments.pickle")
with open(file_path.absolute(), "wb") as file:
    pickle.dump(experiments, file, protocol=pickle.HIGHEST_PROTOCOL)

## Results

In [16]:
experiment_data = list()
for experiment in experiments:


    FPR_delta = np.abs(experiment["fairness_metrics"]["FPR"]["lower"]-experiment["fairness_metrics"]["FPR"]["middle/rich"])
    TPR_delta = np.abs(experiment["fairness_metrics"]["TPR"]["lower"]-experiment["fairness_metrics"]["TPR"]["middle/rich"])

    experiment_data.append({
        "Type":                     experiment["name"],
        "Grade threshold":          experiment["grade_threshold"],
        "Course":                   experiment["course"],
        "Accuracy":                 experiment["performance_metrics"]["accuracy"],
        "Recall":                   experiment["performance_metrics"]["recall"],
        "F1":                       experiment["performance_metrics"]["f1"],
        "FPR lower":                experiment["fairness_metrics"]["FPR"]["lower"],
        "FPR middle/rich":          experiment["fairness_metrics"]["FPR"]["middle/rich"],
        "FPR delta":                FPR_delta,
        "TPR lower":                experiment["fairness_metrics"]["TPR"]["lower"],
        "TPR middle/rich":          experiment["fairness_metrics"]["TPR"]["middle/rich"],  
        "TPR delta":                TPR_delta,
        "PCA protected variables":  experiment["suppressed_variables"]
    })

df_results = pd.DataFrame(experiment_data)

### Math

#### Grade threshold 10 (Passing)

In [17]:
df_results[(df_results["Grade threshold"] == 10) & (df_results["Course"]=="math")]

Unnamed: 0,Type,Grade threshold,Course,Accuracy,Recall,F1,FPR lower,FPR middle/rich,FPR delta,TPR lower,TPR middle/rich,TPR delta,PCA protected variables
0,baseline,10,math,0.721519,0.917431,0.819672,0.6875,0.727273,0.039773,0.896552,0.925,0.028448,
1,no_proxies,10,math,0.721519,0.889908,0.815126,0.625,0.666667,0.041667,0.827586,0.9125,0.084914,
2,fairpca,10,math,0.721519,0.972477,0.828125,0.8125,0.848485,0.035985,0.896552,1.0,0.103448,[address_R]
3,fairpca,10,math,0.689873,0.825688,0.786026,0.625,0.606061,0.018939,0.724138,0.8625,0.138362,"[address_R, Medu_1]"
4,fairpca,10,math,0.689873,1.0,0.816479,1.0,1.0,0.0,1.0,1.0,0.0,"[address_R, Medu_1, Fedu_1]"
5,fairpca,10,math,0.689873,1.0,0.816479,1.0,1.0,0.0,1.0,1.0,0.0,"[address_R, Medu_1, Fedu_1, Mjob_at_home]"
6,fairpca,10,math,0.670886,0.834862,0.777778,0.625,0.727273,0.102273,0.758621,0.8625,0.103879,"[address_R, Medu_1, Fedu_1, Mjob_at_home, Fjob..."
7,fairpca,10,math,0.689873,1.0,0.816479,1.0,1.0,0.0,1.0,1.0,0.0,"[address_R, Medu_1, Fedu_1, Mjob_at_home, Fjob..."
8,fairpca,10,math,0.689873,0.834862,0.787879,0.6875,0.606061,0.081439,0.724138,0.875,0.150862,SES


#### Grade threshold 13 (Eligble for higher education)

In [18]:
df_results[(df_results["Grade threshold"] == 13) & (df_results["Course"]=="math")]

Unnamed: 0,Type,Grade threshold,Course,Accuracy,Recall,F1,FPR lower,FPR middle/rich,FPR delta,TPR lower,TPR middle/rich,TPR delta,PCA protected variables
18,baseline,13,math,0.651899,0.490196,0.47619,0.243243,0.285714,0.042471,0.375,0.511628,0.136628,
19,no_proxies,13,math,0.64557,0.45098,0.45098,0.243243,0.271429,0.028185,0.75,0.395349,0.354651,
20,fairpca,13,math,0.626582,0.27451,0.321839,0.297297,0.157143,0.140154,0.5,0.232558,0.267442,[address_R]
21,fairpca,13,math,0.607595,0.235294,0.27907,0.27027,0.185714,0.084556,0.5,0.186047,0.313953,"[address_R, Medu_1]"
22,fairpca,13,math,0.632911,0.235294,0.292683,0.243243,0.142857,0.100386,0.375,0.209302,0.165698,"[address_R, Medu_1, Fedu_1]"
23,fairpca,13,math,0.626582,0.254902,0.305882,0.27027,0.157143,0.113127,0.5,0.209302,0.290698,"[address_R, Medu_1, Fedu_1, Mjob_at_home]"
24,fairpca,13,math,0.582278,0.176471,0.214286,0.324324,0.171429,0.152896,0.5,0.116279,0.383721,"[address_R, Medu_1, Fedu_1, Mjob_at_home, Fjob..."
25,fairpca,13,math,0.664557,0.333333,0.390805,0.297297,0.114286,0.183012,0.5,0.302326,0.197674,"[address_R, Medu_1, Fedu_1, Mjob_at_home, Fjob..."
26,fairpca,13,math,0.601266,0.235294,0.275862,0.243243,0.214286,0.028958,0.5,0.186047,0.313953,SES


### Portuguese

#### Grade threshold 10 (Passing)

In [19]:
df_results[(df_results["Grade threshold"] == 10) & (df_results["Course"]=="portuguese")]

Unnamed: 0,Type,Grade threshold,Course,Accuracy,Recall,F1,FPR lower,FPR middle/rich,FPR delta,TPR lower,TPR middle/rich,TPR delta,PCA protected variables
9,baseline,10,portuguese,0.838462,0.977169,0.910638,0.933333,0.818182,0.115152,0.941176,1.0,0.058824,
10,no_proxies,10,portuguese,0.842308,0.968037,0.911828,0.833333,0.818182,0.015152,0.929412,0.992537,0.063126,
11,fairpca,10,portuguese,0.853846,0.972603,0.918103,0.833333,0.636364,0.19697,0.929412,1.0,0.070588,[address_R]
12,fairpca,10,portuguese,0.846154,0.981735,0.914894,0.9,0.818182,0.081818,0.952941,1.0,0.047059,"[address_R, Medu_1]"
13,fairpca,10,portuguese,0.85,0.986301,0.917197,0.9,0.818182,0.081818,0.964706,1.0,0.035294,"[address_R, Medu_1, Fedu_1]"
14,fairpca,10,portuguese,0.85,0.986301,0.917197,0.9,0.818182,0.081818,0.964706,1.0,0.035294,"[address_R, Medu_1, Fedu_1, Mjob_at_home]"
15,fairpca,10,portuguese,0.830769,0.96347,0.905579,0.933333,0.727273,0.206061,0.941176,0.977612,0.036435,"[address_R, Medu_1, Fedu_1, Mjob_at_home, Fjob..."
16,fairpca,10,portuguese,0.834615,0.968037,0.907923,0.933333,0.727273,0.206061,0.941176,0.985075,0.043898,"[address_R, Medu_1, Fedu_1, Mjob_at_home, Fjob..."
17,fairpca,10,portuguese,0.85,0.981735,0.916844,0.866667,0.818182,0.048485,0.952941,1.0,0.047059,SES


#### Grade threshold 13 (Eligble for higher education)

In [20]:
df_results[(df_results["Grade threshold"] == 13) & (df_results["Course"]=="portuguese")]

Unnamed: 0,Type,Grade threshold,Course,Accuracy,Recall,F1,FPR lower,FPR middle/rich,FPR delta,TPR lower,TPR middle/rich,TPR delta,PCA protected variables
27,baseline,13,portuguese,0.676923,0.608333,0.634783,0.131579,0.421875,0.290296,0.435897,0.691358,0.255461,
28,no_proxies,13,portuguese,0.688462,0.583333,0.633484,0.105263,0.359375,0.254112,0.589744,0.580247,0.009497,
29,fairpca,13,portuguese,0.673077,0.583333,0.622222,0.131579,0.390625,0.259046,0.564103,0.592593,0.02849,[address_R]
30,fairpca,13,portuguese,0.665385,0.583333,0.61674,0.131579,0.421875,0.290296,0.615385,0.567901,0.047483,"[address_R, Medu_1]"
31,fairpca,13,portuguese,0.669231,0.525,0.59434,0.171053,0.25,0.078947,0.538462,0.518519,0.019943,"[address_R, Medu_1, Fedu_1]"
32,fairpca,13,portuguese,0.646154,0.5,0.566038,0.184211,0.28125,0.097039,0.487179,0.506173,0.018993,"[address_R, Medu_1, Fedu_1, Mjob_at_home]"
33,fairpca,13,portuguese,0.642308,0.516667,0.571429,0.236842,0.265625,0.028783,0.487179,0.530864,0.043685,"[address_R, Medu_1, Fedu_1, Mjob_at_home, Fjob..."
34,fairpca,13,portuguese,0.642308,0.508333,0.567442,0.210526,0.28125,0.070724,0.512821,0.506173,0.006648,"[address_R, Medu_1, Fedu_1, Mjob_at_home, Fjob..."
35,fairpca,13,portuguese,0.673077,0.583333,0.622222,0.131579,0.390625,0.259046,0.564103,0.592593,0.02849,SES
