## Research: Debiasing Algorithms for Student Progress Monitoring
### RQ 1: Examining Bias as an Attribute of the Data Source

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Load the data
projectdata_df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Project files/studentinfo_120.csv')

In [None]:
projectdata_df.head(2)

Unnamed: 0,code_module,code_presentation,id_student,gender,region,highest_education,imd_band,age_band,num_of_prev_attempts,studied_credits,...,score_tma1,score_tma2,score_tma3,score_tma4,late_submission,sum_interaction_30,sum_interaction_60,sum_interaction_90,sum_interaction_120,final_result
0,AAA,2013J,11391,M,East Anglian Region,HE Qualification,90-100%,55<=,0,240,...,78.0,85.0,0.0,80.0,on-time,419.0,110.0,58.0,41.0,Pass
1,AAA,2013J,28400,F,Scotland,HE Qualification,20-30%,35-55,0,60,...,70.0,68.0,0.0,70.0,late,618.0,51.0,148.0,87.0,Pass


### Data Pre-processing

In [None]:
# drop identifiers
df = projectdata_df.drop(['code_module', 'code_presentation', 'id_student', 'late_submission'], axis=1)

In [None]:
# fill missing values in imd_band and convert to label
mapping = {'0-10%': 1, '10-20%': 2, '20-30%': 3, '30-40%': 4, '40-50%': 5,
           '50-60%': 6, '60-70%': 7, '70-80%': 8, '80-90%': 9, '90-100%': 10}
df['imd_band'] = df['imd_band'].map(mapping)

# fill missing values with mode
df['imd_band'].fillna(df['imd_band'].mode()[0], inplace=True)

# convert age_band to label as well
mapping = {'0-35': 0, '35-55': 1, '55<=': 1}
df['age_band'] = df['age_band'].map(mapping)

In [None]:
# Change gender and disability to binary

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['gender'] = le.fit_transform(df['gender'])
df['disability'] = le.fit_transform(df['disability'])

In [None]:
# one-hot encode region, highest education, and late submission

df = pd.get_dummies(df, columns=['region'], prefix = ['Reg'])

df = pd.get_dummies(df, columns=['highest_education'], prefix = ['Edu'])

In [None]:
# Combine pass and distinction to be 'Pass'; also combine Fail & Withdrawn to be 'At Risk'
df['final_result'] = df['final_result'].replace(['Pass', 'Distinction'], 'Pass')
df['final_result'] = df['final_result'].replace(['Fail', 'Withdrawn'], 'At Risk')
# final_result to binary
mapping = {'Pass': 0, 'At Risk': 1}
df['final_result'] = df['final_result'].map(mapping)

In [None]:
# get a list of all the columns
cols = list(df.columns)

# move 'final_result' to the end of the list
cols.append(cols.pop(cols.index('final_result')))

# reorder the dataframe
df = df[cols]

In [None]:
df_inst = df[['gender', 'imd_band', 'age_band', 'num_of_prev_attempts', 'studied_credits', 'disability',
                         'Reg_East Anglian Region', 'Reg_East Midlands Region', 'Reg_Ireland', 'Reg_London Region',
                         'Reg_North Region', 'Reg_North Western Region', 'Reg_Scotland', 'Reg_South East Region',
                         'Reg_South Region', 'Reg_South West Region', 'Reg_Wales', 'Reg_West Midlands Region',
                         'Reg_Yorkshire Region', 'Edu_A Level or Equivalent', 'Edu_HE Qualification', 'Edu_Lower Than A Level',
                         'Edu_No Formal quals','Edu_Post Graduate Qualification','final_result']]
df_vle = df[['gender', 'age_band', 'disability', 'sum_interaction_30', 'sum_interaction_60', 'sum_interaction_90', 'sum_interaction_120', 'final_result']]
df_perf = df[['gender', 'age_band', 'disability', 'score_tma1', 'score_tma2', 'score_tma3', 'score_tma4', 'final_result']]

## Model Building

### 1. Institutional Dataset

In [None]:
X = df_inst.drop(columns='final_result')
y = df_inst['final_result']

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

lr = LogisticRegression(solver='lbfgs', max_iter=3000)
lr.fit(X_train, y_train)

# Make predictions
y_pred_lr = lr.predict(X_test)

LR_score = accuracy_score(y_test, y_pred_lr)

In [None]:
pip install holisticai

In [None]:
def compute_fairness(sensitive_feature):

  # Group data by gender
  group_a = (X_test[sensitive_feature] == 1).values  # assuming 1 represents 'male'
  group_b = (X_test[sensitive_feature] == 0).values  # assuming 0 represents 'female'

  # Calculate ABROCA
  from holisticai.bias.metrics import abroca
  abroca_value = abroca(group_a, group_b, lr.predict_proba(X_test)[:, 1], y_test)
  print(f'ABROCA value: {abroca_value}')

  # Calculate Average Odds Difference
  from holisticai.bias.metrics import average_odds_diff
  aod_value = average_odds_diff(group_a, group_b, y_pred_lr, y_test)
  print(f'Average Odds Difference value: {aod_value}')

  # Calculate Equal Opportunity Difference
  from holisticai.bias.metrics import equal_opportunity_diff
  eod_value = equal_opportunity_diff(group_a, group_b, y_pred_lr, y_test)
  print(f'Equal Opportunity Difference value: {eod_value}')

In [None]:
# print the results for logistic regression
print('Logistic Regression accuracy:', LR_score)
print('\nGender')
compute_fairness('gender')
print(f'\nDisability')
compute_fairness('disability')
print(f'\nAge')
compute_fairness('age_band')

Logistic Regression accuracy: 0.60288402536306

Gender
ABROCA value: -0.02195433167305083
Average Odds Difference value: 0.0708504264051775
Equal Opportunity Difference value: 0.04791956215900073

Disability
ABROCA value: 0.0253639830609369
Average Odds Difference value: 0.2736658850659256
Equal Opportunity Difference value: 0.255111805072466

Age
ABROCA value: 0.011370795417411261
Average Odds Difference value: -0.23188721736400636
Equal Opportunity Difference value: -0.2209298180593945


In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gb = GradientBoostingClassifier(random_state=42)
gb.fit(X_train, y_train)

# Make predictions
y_pred_gb = gb.predict(X_test)

GB_score = accuracy_score(y_test, y_pred_gb)

In [None]:
def compute_fairness1(sensitive_feature):

  # Group data by gender
  group_a = (X_test[sensitive_feature] == 1).values  # assuming 1 represents 'male'
  group_b = (X_test[sensitive_feature] == 0).values  # assuming 0 represents 'female'

  # Calculate ABROCA
  from holisticai.bias.metrics import abroca
  abroca_value = abroca(group_a, group_b, gb.predict_proba(X_test)[:, 1], y_test)
  print(f'ABROCA value: {abroca_value}')

  # Calculate Average Odds Difference
  from holisticai.bias.metrics import average_odds_diff
  aod_value = average_odds_diff(group_a, group_b, y_pred_gb, y_test)
  print(f'Average Odds Difference value: {aod_value}')

  # Calculate Equal Opportunity Difference
  from holisticai.bias.metrics import equal_opportunity_diff
  eod_value = equal_opportunity_diff(group_a, group_b, y_pred_gb, y_test)
  print(f'Equal Opportunity Difference value: {eod_value}')

In [None]:
# print the results for Gradient Boosting
print('Gradient Boosting accuracy:', GB_score)
print('\nGender')
compute_fairness1('gender')
print(f'\nDisability')
compute_fairness1('disability')
print(f'\nAge')
compute_fairness1('age_band')

Gradient Boosting accuracy: 0.6074861934956024

Gender
ABROCA value: -0.02235633800363923
Average Odds Difference value: 0.04620970591992407
Equal Opportunity Difference value: 0.02691059935627349

Disability
ABROCA value: 0.01969909497009259
Average Odds Difference value: 0.21242863070569135
Equal Opportunity Difference value: 0.20894379824356213

Age
ABROCA value: 0.010824943129795317
Average Odds Difference value: -0.22363370179670397
Equal Opportunity Difference value: -0.2062961087362234


In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Make predictions
y_pred_rf = rf.predict(X_test)

RF_score = accuracy_score(y_test, y_pred_rf)

In [None]:
def compute_fairness2(sensitive_feature):

  # Group data by gender
  group_a = (X_test[sensitive_feature] == 1).values  # assuming 1 represents 'male'
  group_b = (X_test[sensitive_feature] == 0).values  # assuming 0 represents 'female'

  # Calculate ABROCA
  from holisticai.bias.metrics import abroca
  abroca_value = abroca(group_a, group_b, rf.predict_proba(X_test)[:, 1], y_test)
  print(f'ABROCA value: {abroca_value}')

  # Calculate Average Odds Difference
  from holisticai.bias.metrics import average_odds_diff
  aod_value = average_odds_diff(group_a, group_b, y_pred_rf, y_test)
  print(f'Average Odds Difference value: {aod_value}')

  # Calculate Equal Opportunity Difference
  from holisticai.bias.metrics import equal_opportunity_diff
  eod_value = equal_opportunity_diff(group_a, group_b, y_pred_rf, y_test)
  print(f'Equal Opportunity Difference value: {eod_value}')

In [None]:
# print the results for Random Forest
print('Random Forest accuracy:', RF_score)
print('\nGender')
compute_fairness2('gender')
print(f'\nDisability')
compute_fairness2('disability')
print(f'\nAge')
compute_fairness2('age_band')

Random Forest accuracy: 0.5617713233790141

Gender
ABROCA value: 0.005823502414503912
Average Odds Difference value: 0.04984604578348406
Equal Opportunity Difference value: 0.05002285994997557

Disability
ABROCA value: 0.022353864463591777
Average Odds Difference value: 0.126965699016538
Equal Opportunity Difference value: 0.14801375501611536

Age
ABROCA value: 0.009146827110555256
Average Odds Difference value: -0.11483072366529423
Equal Opportunity Difference value: -0.10445218365505127


## 2. VLE Dataset

In [None]:
def compute_fairness_vp(data, sensitive_feature, y_pred, y_test):
  # Group data by sensitive feature
  group_a = (data[sensitive_feature] == 1).values
  group_b = (data[sensitive_feature] == 0).values

  # Calculate ABROCA
  from holisticai.bias.metrics import abroca
  abroca_value = abroca(group_a, group_b, y_pred, y_test)
  print(f'{sensitive_feature} ABROCA value: {abroca_value}')

  # Calculate Average Odds Difference
  from holisticai.bias.metrics import average_odds_diff
  aod_value = average_odds_diff(group_a, group_b, y_pred, y_test)
  print(f'{sensitive_feature} Average Odds Difference value: {aod_value}')

  # Calculate Equal Opportunity Difference
  from holisticai.bias.metrics import equal_opportunity_diff
  eod_value = equal_opportunity_diff(group_a, group_b, y_pred, y_test)
  print(f'{sensitive_feature} Equal Opportunity Difference value: {eod_value}')

In [None]:
# Create a DataFrame only for the sensitive features
sensitive_features = df_vle[['gender', 'age_band', 'disability']]

# Split your data into training and test sets, keeping track of the indices
from sklearn.model_selection import train_test_split

X = df_vle.drop(columns=['final_result', 'gender', 'age_band', 'disability'])
y = df_vle['final_result']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Keep track of the indices of the training and test sets
train_indices = X_train.index
test_indices = X_test.index

# Define sensitive feature for the training and test sets based on the indices
train_sensitive = sensitive_features.iloc[train_indices]
test_sensitive = sensitive_features.iloc[test_indices]

# Train your model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

lr = LogisticRegression(solver='lbfgs', max_iter=3000)
lr.fit(X_train, y_train)

# Make predictions
y_pred_lr = lr.predict(X_test)

# Print out accuracy
print(f'Logistic Regression accuracy: {accuracy_score(y_test, y_pred_lr)}')

# Compute fairness for each sensitive feature
for feature in ['gender', 'age_band', 'disability']:
  print()
  compute_fairness_vp(test_sensitive, feature, y_pred_lr, y_test)

Logistic Regression accuracy: 0.7337901411331561

gender ABROCA value: 0.05124741778362052
gender Average Odds Difference value: -0.10231143901905518
gender Equal Opportunity Difference value: -0.05106402123543441

age_band ABROCA value: 0.04193191085675363
age_band Average Odds Difference value: -0.07203486247599955
age_band Equal Opportunity Difference value: -0.03010295161924592

disability ABROCA value: -0.02701565706071085
disability Average Odds Difference value: 0.038449817983463336
disability Equal Opportunity Difference value: 0.011434160922752512


In [None]:
# Gradient Boosting model
gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)

# Make predictions
y_pred_gb = gb.predict(X_test)

# Print out accuracy
print('Gradient Boosting accuracy:', accuracy_score(y_test, y_pred_gb))

# Compute fairness for each sensitive feature
for feature in ['gender', 'age_band', 'disability']:
  print()
  compute_fairness_vp(test_sensitive, feature, y_pred_gb, y_test)

Gradient Boosting accuracy: 0.8104929433421968

gender ABROCA value: -0.004763111866082581
gender Average Odds Difference value: -0.03467911751405996
gender Equal Opportunity Difference value: -0.039442229380142635

age_band ABROCA value: 0.0011040989981137006
age_band Average Odds Difference value: -0.012767611197743743
age_band Equal Opportunity Difference value: -0.011663512199630133

disability ABROCA value: -0.006026458710474025
disability Average Odds Difference value: 0.022605840160036396
disability Equal Opportunity Difference value: 0.01657938144956239


In [None]:
# Random Forest model
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

# Make predictions
y_pred_rf = rf.predict(X_test)

# Print out accuracy
print('Random Forest accuracy:', accuracy_score(y_test, y_pred_rf))

# Compute fairness for each sensitive feature
for feature in ['gender', 'age_band', 'disability']:
  print()
  compute_fairness_vp(test_sensitive, feature, y_pred_rf, y_test)

Random Forest accuracy: 0.7944364900797709

gender ABROCA value: -0.0026064807259251754
gender Average Odds Difference value: -0.029254270563898954
gender Equal Opportunity Difference value: -0.03186075128982402

age_band ABROCA value: 0.012401022368289327
age_band Average Odds Difference value: -0.026837636441644677
age_band Equal Opportunity Difference value: -0.01443661407335528

disability ABROCA value: -0.004695709411089766
disability Average Odds Difference value: 0.021913781703906524
disability Equal Opportunity Difference value: 0.01721807229281669


### 3. Performance Dataset

In [None]:
# Create a DataFrame only for the sensitive features
sensitive_features = df_perf[['gender', 'age_band', 'disability']]

# Split your data into training and test sets, keeping track of the indices
from sklearn.model_selection import train_test_split

X = df_perf.drop(columns=['final_result', 'gender', 'age_band', 'disability'])
y = df_perf['final_result']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Keep track of the indices of the training and test sets
train_indices = X_train.index
test_indices = X_test.index

# Define sensitive feature for the training and test sets based on the indices
train_sensitive = sensitive_features.iloc[train_indices]
test_sensitive = sensitive_features.iloc[test_indices]

# Train your model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

lr = LogisticRegression(solver='lbfgs', max_iter=3000)
lr.fit(X_train, y_train)

# Make predictions
y_pred_lr = lr.predict(X_test)

# Print out accuracy
print('Logistic Regression accuracy:', accuracy_score(y_test, y_pred_lr))

# Compute fairness for each sensitive feature
for feature in ['gender', 'age_band', 'disability']:
  print()
  compute_fairness_vp(test_sensitive, feature, y_pred_lr, y_test)

Logistic Regression accuracy: 0.7877889138883207

gender ABROCA value: 0.031081198538596033
gender Average Odds Difference value: -0.08404089328530676
gender Equal Opportunity Difference value: -0.052959694746710584

age_band ABROCA value: -0.008198378895587388
age_band Average Odds Difference value: 0.021293802230721998
age_band Equal Opportunity Difference value: 0.013095423335134582

disability ABROCA value: -0.0005234015011577053
disability Average Odds Difference value: 0.045608443832462736
disability Equal Opportunity Difference value: 0.04508504233130506


In [None]:
# Gradient Boosting model
gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)

# Make predictions
y_pred_gb = gb.predict(X_test)

# Print out accuracy
print('Gradient Boosting accuracy:', accuracy_score(y_test, y_pred_gb))

# Compute fairness for each sensitive feature
for feature in ['gender', 'age_band', 'disability']:
  print()
  compute_fairness_vp(test_sensitive, feature, y_pred_gb, y_test)

Gradient Boosting accuracy: 0.8176518715483739

gender ABROCA value: 0.0003718679450395168
gender Average Odds Difference value: -0.012270897491524851
gender Equal Opportunity Difference value: -0.011899029546485362

age_band ABROCA value: 0.010074434667711407
age_band Average Odds Difference value: 0.012411021306019218
age_band Equal Opportunity Difference value: 0.02248545597373075

disability ABROCA value: 0.020265188890952635
disability Average Odds Difference value: 0.03200797333500276
disability Equal Opportunity Difference value: 0.05227316222595535


In [None]:
# Random Forest model
rf = RandomForestClassifier()
rf.fit(X_train, y_train)

# Make predictions
y_pred_rf = rf.predict(X_test)

# Print out accuracy
print('Random Forest accuracy:', accuracy_score(y_test, y_pred_rf))

# Compute fairness for each sensitive feature
for feature in ['gender', 'age_band', 'disability']:
  print()
  compute_fairness_vp(test_sensitive, feature, y_pred_rf, y_test)

Random Forest accuracy: 0.7994477398240949

gender ABROCA value: -0.010577482478086653
gender Average Odds Difference value: 0.006255675180169201
gender Equal Opportunity Difference value: -0.004321807297917535

age_band ABROCA value: 0.0054053893401826425
age_band Average Odds Difference value: 0.009887881790267075
age_band Equal Opportunity Difference value: 0.01529327113044976

disability ABROCA value: 0.011773404101606988
disability Average Odds Difference value: 0.039157064413440856
disability Equal Opportunity Difference value: 0.05093046851504768
