# Equalised Odds post-processing method

In [1]:
import sys
import json
import os
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from confection import Config
from pathlib import Path
from torch.utils.data import DataLoader
from itertools import chain

# update the path so we can directly import code from the DVlog
sys.path.append(os.path.dirname(os.path.abspath(os.path.join(os.getcwd(), os.pardir, "DVlog"))))
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), os.pardir, "DVlog")))

from DVlog.evaluate import evaluate_model
from DVlog.models.model import UnimodalDVlogModel
from DVlog.utils.dataloaders import MultimodalEmbeddingsDataset
from DVlog.utils.metrics import calculate_performance_measures, calculate_gender_performance_measures
from DVlog.utils.util import ConfigDict, validate_config, process_config, set_seed

In [2]:
# Function to apply EqOddsPostprocessing
def apply_eqodds(y_train_true, y_train_pred, y_test_pred, protected_attr_train, protected_attr_test, seed):
    # Create BinaryLabelDataset for training data
    dataset_train_true = BinaryLabelDataset(favorable_label=1, unfavorable_label=0, df=pd.DataFrame({
        'label': y_train_true,
        'protected': protected_attr_train
    }), label_names=['label'], protected_attribute_names=['protected'])

    dataset_train_pred = BinaryLabelDataset(favorable_label=1, unfavorable_label=0, df=pd.DataFrame({
        'label': y_train_pred,
        'protected': protected_attr_train
    }), label_names=['label'], protected_attribute_names=['protected'])

    # Create BinaryLabelDataset for test data
    dataset_test_pred = BinaryLabelDataset(favorable_label=1, unfavorable_label=0, df=pd.DataFrame({
        'label': y_test_pred,
        'protected': protected_attr_test
    }), label_names=['label'], protected_attribute_names=['protected'])

    # Apply EqOddsPostprocessing
    eq_odds = EqOddsPostprocessing(unprivileged_groups=[{'protected': 0}],
                                   privileged_groups=[{'protected': 1}], seed=seed)

    eq_odds = eq_odds.fit(dataset_train_true, dataset_train_pred)
    dataset_transf_test_pred = eq_odds.predict(dataset_test_pred)

    # Extract the adjusted predictions
    adjusted_pred = dataset_transf_test_pred.labels.ravel()
    return adjusted_pred

# Evaluate unimodal sentence detection models
Load in both the normal and keyword filtered model and evaluate them using the test set in order to retrieve the prediction information

In [4]:
models_path = Path(r"../DVlog/trained_models")
model_config = Path(r"../DVlog/model_configs/unimodal/unimodal_mpnet_sent_keyw.cfg")
annotations_file = Path(r"../DVlog/dataset/dvlog_labels_v2.csv")

random_seeds = [0, 1, 42, 1123, 3107]

In [9]:
# load in the config file for the model
config = Config().from_disk(model_config)
config_dict = process_config(config)

# overwrite the annotations_file + data_dir
config_dict.annotations_file = annotations_file
config_dict.data_dir = Path("../DVlog/dataset/sent-embeddings-dataset")
config_dict.encoder1_data_dir = Path("../DVlog/dataset/sent-embeddings-dataset")

# setup the model paths
model_dir_path = Path(os.path.join(models_path, config_dict.model_name))


# setup the dataset + loader
train_data = MultimodalEmbeddingsDataset("train", config_dict, to_tensor=True, with_protected=True)
train_dataloader = DataLoader(train_data, batch_size=config_dict.batch_size, shuffle=True)

test_data = MultimodalEmbeddingsDataset("test", config_dict, to_tensor=True, with_protected=True)
test_dataloader = DataLoader(test_data, batch_size=config_dict.batch_size, shuffle=True)

# 
for seed in random_seeds:
    # set the exact model_path
    saved_model_path = Path(os.path.join(model_dir_path, f"model_{config_dict.model_name}_seed{seed}.pth"))

    # et the seed
    set_seed(seed)

    # setup the model
    saved_model = UnimodalDVlogModel((config_dict.sequence_length, config_dict.encoder1_dim),
                                      d_model=config_dict.dim_model, n_heads=config_dict.uni_n_heads, use_std=config_dict.detectlayer_use_std)

    # load in the parameters and set the model to evaluation mode
    saved_model.load_state_dict(torch.load(saved_model_path))
    saved_model.eval()

    # run the model on the training set
    train_pred, train_y, train_protected, _ = evaluate_model(saved_model, train_dataloader, config_dict,
                                                                 unpriv_feature="m", verbose=False, get_raw_preds=True, seed=seed)
    test_pred, test_y, test_protected, _ = evaluate_model(saved_model, train_dataloader, config_dict,
                                                              unpriv_feature="m", verbose=False, get_raw_preds=True, seed=seed)

    # reshape all predictions, ground truths, and protected
    train_pred = np.argmax(train_pred, axis=1)
    train_y = np.argmax(train_y, axis=1)
    train_protected_float = np.where(train_protected == "m", 0, 1)

    test_pred = np.argmax(test_pred, axis=1)
    test_y = np.argmax(test_y, axis=1)
    test_protected_float = np.where(test_protected == "m", 0, 1)

    # run the post-processing code
    new_preds = apply_eqodds(train_y, train_pred, test_pred, train_protected_float, test_protected_float, seed)

    break
    # evaluate the models
    _, w_precision, w_recall, w_fscore, _ = calculate_performance_measures(test_y, new_preds)
    gender_metrics = calculate_gender_performance_measures(norm_truth, norm_pred, norm_protected)


Model: unimodal_mpnet_sent_keyw with seed: 0
----------
Model: unimodal_mpnet_sent_keyw with seed: 0
----------
[[2.5600099e-03 9.9743998e-01]
 [6.8000652e-04 9.9932003e-01]
 [9.0582151e-04 9.9909425e-01]
 ...
 [6.6439662e-04 9.9933559e-01]
 [9.9943191e-01 5.6811841e-04]
 [2.0667081e-04 9.9979335e-01]] [[0. 1.]
 [0. 1.]
 [0. 1.]
 ...
 [0. 1.]
 [1. 0.]
 [0. 1.]] ['f' 'f' 'f' 'f' 'f' 'f' 'm' 'm' 'f' 'f' 'f' 'f' 'f' 'm' 'm' 'f' 'f' 'f'
 'm' 'm' 'm' 'f' 'f' 'm' 'm' 'f' 'f' 'f' 'm' 'f' 'f' 'f' 'm' 'm' 'f' 'f'
 'f' 'm' 'f' 'f' 'f' 'm' 'm' 'f' 'm' 'f' 'm' 'f' 'f' 'm' 'f' 'f' 'm' 'f'
 'f' 'm' 'm' 'm' 'f' 'f' 'm' 'f' 'm' 'f' 'f' 'f' 'f' 'f' 'f' 'm' 'm' 'f'
 'f' 'f' 'm' 'f' 'f' 'f' 'm' 'f' 'm' 'f' 'm' 'm' 'f' 'm' 'f' 'm' 'f' 'f'
 'f' 'm' 'f' 'f' 'f' 'f' 'f' 'm' 'f' 'm' 'f' 'f' 'f' 'f' 'm' 'f' 'm' 'f'
 'f' 'f' 'm' 'f' 'm' 'f' 'm' 'f' 'f' 'f' 'm' 'f' 'f' 'f' 'f' 'f' 'm' 'm'
 'm' 'f' 'm' 'f' 'f' 'm' 'f' 'm' 'm' 'm' 'm' 'f' 'f' 'f' 'f' 'f' 'f' 'f'
 'f' 'f' 'f' 'f' 'm' 'm' 'f' 'f' 'f' 'f' 'f' 'f' 'f'