In [None]:
!pip install datasets
!pip install evaluate
!pip install transformers[torch]
!pip install accelerate -U

In [None]:
from datasets import load_dataset, load_metric
from transformers import AutoTokenizer, DataCollatorWithPadding, AutoModelForSequenceClassification
from transformers import get_scheduler, AdamW
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, WeightedRandomSampler
from tqdm.auto import tqdm
import numpy as np
import evaluate
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gc
import os
from datasets import concatenate_datasets, Dataset
from huggingface_hub import HfApi
from huggingface_hub import Repository
from sklearn.metrics import confusion_matrix, multilabel_confusion_matrix, ConfusionMatrixDisplay
from torch.nn import MSELoss
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.utils import resample
import itertools

# Preparing the f dataset

In [None]:
bias_in_bios_dataset = load_dataset("LabHC/bias_in_bios")
bias_in_bios_dataset

In [None]:
df_pandas = pd.DataFrame(bias_in_bios_dataset['train'])
labels = ['0 : Accountant', '1 : Architect', '2 : Attorney', '3 : Chiropractor', '4 : Comedian', '5 : Composer',
          '6 : Dentist', '7 : Dietitian', '8 : DJ', '9 : Film Maker', '10 : Interior Designer', '11 : Journalist',
          '12 : Model', '13 : Nurse', '14 : Painter', '15 : Paralegal', '16 : Pastor', '17 : Personal Trainer',
          '18 : Photographer', '19 : Physician', '20 : Poet', '21 : Professor', '22 : Psychologist', '23 : Rapper',
          '24 : Software Engineer', '25 : Surgeon', '26 : Teacher', '27 : Yoga Teacher']
gen = ['Male', 'Female']
data = pd.crosstab(df_pandas['profession'], df_pandas['gender'])
plt.figure(figsize=(5,8))
sns.heatmap(data, annot=True, fmt = '.0f', cmap="YlGnBu" , yticklabels=labels, xticklabels=gen, annot_kws={"size":10})
plt.xlabel('Gender')
plt.ylabel('Profession')
plt.title('Value Counts Grouped by gender and proffession')

plt.show()

In [None]:
df_pandas = df_pandas.loc[df_pandas['profession'].isin([13,19])]
df_pandas
labels = ['0 : Nurse', '1 : Physician']
gen = ['Male', 'Female']
data = pd.crosstab(df_pandas['profession'], df_pandas['gender'])
plt.figure(figsize=(5,4))
sns.heatmap(data, annot=True, fmt = '.0f', cmap="YlGnBu" , yticklabels=labels, xticklabels=gen, annot_kws={"size":10})
plt.xlabel('Gender')
plt.ylabel('Profession')
plt.title('Value Counts Grouped by gender and proffession')

plt.show()

In [None]:
# Nurse = 0   Physician = 1
def filter_and_change_values(example):
    if example['profession'] == 13:
        example['profession'] = 0
    elif example['profession'] == 19:
        example['profession'] = 1
    else:
        example['profession'] = None
    return example

for split in ['train', 'dev', 'test']:
    bias_in_bios_dataset[split] = bias_in_bios_dataset[split].map(filter_and_change_values).filter(lambda x: x['profession'] is not None)


In [None]:
bias_in_bios_dataset

In [None]:
data = pd.DataFrame(bias_in_bios_dataset['train'])

majority_class = data[data['profession'] == 1]
minority_class = data[data['profession'] == 0]

# Oversample minority class
minority_class_oversampled = resample(minority_class, replace=True, n_samples=len(majority_class), random_state=42)

balanced_data = pd.concat([majority_class, minority_class_oversampled])

balanced_dataset = Dataset.from_pandas(balanced_data, split='train')

labels_pandas = pd.DataFrame(balanced_data['profession'])
class_counts = labels_pandas.value_counts().sort_values(ascending=False)
print(class_counts)

bias_in_bios_dataset['train'] = balanced_dataset

In [None]:
labels = ['0 : Nurse', '1 : Physician']
gen = ['0 : Male', '1 : Female']
data = pd.crosstab(balanced_data['profession'], balanced_data['gender'])
plt.figure(figsize=(5,4))
sns.heatmap(data, annot=True, fmt = '.0f', cmap="YlGnBu" , yticklabels=labels, xticklabels=gen, annot_kws={"size":10})
plt.xlabel('Gender')
plt.ylabel('Profession')
plt.title('Value Counts Grouped by gender and proffession')

plt.show()

In [None]:
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def tokenize_function(examples):
  return tokenizer(examples['hard_text'], truncation=True)

tokenized_datasets = bias_in_bios_dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['hard_text'])
tokenized_datasets = tokenized_datasets.rename_column('profession','labels')
tokenized_datasets = tokenized_datasets.with_format('torch')
data_collator = DataCollatorWithPadding(tokenizer)

In [None]:
train_dataloader = DataLoader(
    tokenized_datasets['train'], shuffle=True, batch_size=20, collate_fn=data_collator
#   tokenized_datasets['train'], batch_size=20, collate_fn=data_collator
)
eval_dataloader = DataLoader(
    tokenized_datasets['dev'], batch_size=20, collate_fn=data_collator
)
test_dataloader = DataLoader(
    tokenized_datasets['test'], batch_size=20, collate_fn=data_collator
)

# Fine-tuning f Model

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

checkpoint = "bert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

model.to(device)
print(device)

In [None]:
optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 1
num_training_steps = num_epochs*len(train_dataloader)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer,
    num_warmup_steps=0,
    num_training_steps=num_training_steps
)

In [None]:
torch.cuda.empty_cache()
gc.collect()
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

In [None]:
progress_bar = tqdm(range(num_training_steps))

model.train()
for epoch in range(num_epochs):
  for batch in train_dataloader:
    labels = batch['labels'].to(device)
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    token_type_ids = batch['token_type_ids'].to(device)

    outputs = model(input_ids, labels=labels, attention_mask=attention_mask, token_type_ids=token_type_ids)
    loss = outputs.loss
    loss.backward()

    optimizer.step()
    lr_scheduler.step()
    optimizer.zero_grad()
    progress_bar.update(1)

In [None]:
hub_repo_name = "MoGP/f_x"
access_token = "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"

# Save the model and tokenizer to the Hub
model.push_to_hub(hub_repo_name, use_auth_token=access_token)
tokenizer.push_to_hub(hub_repo_name, use_auth_token=access_token)

# Evaluation of f

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# Load the model saved on huggingface
checkpoint = "MoGP/f_x"
model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

model.to(device)
print(device)

In [None]:
y_true = []
y_pred = []
gender = []
metric = load_metric("glue","mrpc")
model.eval()
for batch in test_dataloader:
  labels = batch['labels'].to(device)
  input_ids = batch['input_ids'].to(device)
  attention_mask = batch['attention_mask'].to(device)
  token_type_ids = batch['token_type_ids'].to(device)
  token_type_ids = batch['token_type_ids'].to(device)
  sex = batch['gender'].to(device)
  with torch.no_grad():
    outputs = model(input_ids, labels=labels, attention_mask=attention_mask, token_type_ids=token_type_ids)
  logits = outputs.logits
  predictions = torch.argmax(logits, dim=-1)
  metric.add_batch(predictions=predictions, references=labels)
  pred = predictions.cpu().numpy()
  lab = labels.cpu().numpy()
  gen = sex.cpu().numpy()
  y_pred.append(pred)
  y_true.append(lab)
  gender.append(gen)
metric.compute()

In [None]:
y_pred = np.concatenate(y_pred)
y_true = np.concatenate(y_true)
gender = np.concatenate(gender)

conf_matrix = confusion_matrix(y_true,y_pred)
print("Confusion Matrix:")
print(conf_matrix)

In [None]:
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=[0,1]) #nurse,physician
disp.plot()
plt.savefig("conf.png")
plt.show()

In [None]:
# Fairness metrics : priviledged class - discriminated class
SP = np.mean(y_pred[gender==0]) - np.mean(y_pred[gender==1])
EO = np.mean(y_pred[(y_true==1) & (gender==0)]) - np.mean(y_pred[(y_true==1) & (gender==1)])
TNRD = np.mean(y_pred[(y_true==0) & (gender==1)]) - np.mean(y_pred[(y_true==0) & (gender==0)]) # (1-fpf)-(1-fpm) = fpm-fpf
print("Statistical Parity: ",SP)
print("True Positive Rate Difference (Equal Opportunity): ",EO)
print("True Negative Rate Difference: ",TNRD)

In [None]:
y_true_flat = y_true
y_pred_flat = y_pred
gender_flat = gender

unique_genders = set(gender_flat)

# Plot confusion matrix for each gender
for gender in unique_genders:
    y_true_gender = [y_true_flat[i] for i in range(len(y_true_flat)) if gender_flat[i] == gender]
    y_pred_gender = [y_pred_flat[i] for i in range(len(y_pred_flat)) if gender_flat[i] == gender]

    cm = confusion_matrix(y_true_gender, y_pred_gender)

    profession = ['0 : Nurse', '1 : Physician']
    plt.figure(figsize=(6, 4))
    sns.set(font_scale=1.5)
    sns.heatmap(cm/np.sum(cm), annot=True, yticklabels=profession, xticklabels=profession, fmt='.2%', cmap='Blues', cbar=False)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    if gender==0:
      gen = 'Male'
    else:
      gen = 'Female'
    plt.title(f'Confusion Matrix for Gender: {gen}')
    plt.show()

# Creating the g dataset

In [None]:
g_labels = []

def calculate_new_labels(predictions, labels, sensitive_attribute):
    new_labels = np.zeros_like(predictions)
    for i in range(len(predictions)):
      # False negative for women
      if labels[i] == 1 and predictions[i] == 0 and sensitive_attribute[i] == 1:
          new_labels[i] = 1
      # False positive for men
      elif labels[i] == 0 and predictions[i] == 1 and sensitive_attribute[i] == 0:
          new_labels[i] = -1
      # The rest of the wrong predictions (false negative for men and false positive for women) - should I add this?
      elif (labels[i] == 0 and predictions[i] == 1 and sensitive_attribute[i] == 1) or (labels[i] == 1 and predictions[i] == 0 and sensitive_attribute[i] == 0):
        new_labels[i] = -2
    return new_labels

for batch in train_dataloader:
  labels = batch['labels'].to(device)
  input_ids = batch['input_ids'].to(device)
  attention_mask = batch['attention_mask'].to(device)
  token_type_ids = batch['token_type_ids'].to(device)
  sensitive_attribute = batch['gender'].to(device)

  with torch.no_grad():
    outputs = model(input_ids, labels=labels, attention_mask=attention_mask, token_type_ids=token_type_ids)
  logits = outputs.logits
  predictions = torch.argmax(logits, dim=-1)


  predictions_np = predictions.cpu().numpy()
  labels_np = labels.cpu().numpy()
  sensitive_attribute_np = sensitive_attribute.cpu().numpy()


  new_labels = calculate_new_labels(predictions_np, labels_np, sensitive_attribute_np)
  g_labels.append(new_labels)

In [None]:
G_labels = np.concatenate(g_labels).ravel()
print(G_labels)

In [None]:
labels_pandas = pd.DataFrame(G_labels)
class_counts = labels_pandas.value_counts().sort_values(ascending=False)
print(class_counts)

In [None]:
texts = bias_in_bios_dataset['train']['hard_text']

data_dict = {
    'texts': texts,
    'labels': G_labels
}

datasets_g = Dataset.from_dict(data_dict)

In [None]:
datasets_g = datasets_g.filter(lambda x: x['labels'] != -2)
dataset_g = pd.DataFrame(datasets_g)
dataset_g.to_csv("datasetwt.csv", index=False)  # The index parameter controls whether to save the index column

In [None]:
hub_repo_name = "MoGP/g_dataset_text_new"
access_token = "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"
csv_file_path = "datasetwt.csv"

api = HfApi()

commit_message = "Add dataset file"
api.upload_file(
    path_or_fileobj=csv_file_path,
    path_in_repo="datasetwt.csv",
    repo_id=hub_repo_name,
    token=access_token,
    commit_message=commit_message,
    repo_type="dataset"
)

# Training the g function

In [None]:
datasets_g = load_dataset("MoGP/g_dataset_text_new")
datasets_g

In [None]:
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def tokenize_function(examples):
  return tokenizer(examples['texts'], truncation=True)

tokenized_datasets_g = datasets_g.map(tokenize_function, batched=True)
tokenized_datasets_g = tokenized_datasets_g.remove_columns(['texts'])
tokenized_datasets_g = tokenized_datasets_g.with_format('torch')
data_collator = DataCollatorWithPadding(tokenizer)

In [None]:
data = tokenized_datasets_g['train'].to_pandas()
zeros_data = data[data['labels'] == 0]

zeros_sample = zeros_data.sample(frac=0.03, random_state=42)
non_zeros_data = data[data['labels'] != 0]
new_data = pd.concat([zeros_sample, non_zeros_data])

labels_pandas = pd.DataFrame(new_data['labels'])
class_counts = labels_pandas.value_counts().sort_values(ascending=False)
print(class_counts)

new_dataset = Dataset.from_pandas(new_data)

tokenized_datasets_g['train'] = new_dataset
new_dataset

In [None]:
# Compute class weights
labels = tokenized_datasets_g['train']['labels']
labels = [2 if label == -1 else label for label in labels]

class_counts = np.bincount(labels)
class_weights = 1.0 / class_counts
weights = class_weights[labels]

# Create sampler
sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)

In [None]:
train_dataloader_g = DataLoader(
    tokenized_datasets_g['train'], batch_size=20, sampler=sampler, collate_fn=data_collator
)

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

checkpoint = "bert-base-uncased"
model_g = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=3)

model_g.to(device)
print(device)

In [None]:
optimizer_g = AdamW(model_g.parameters(), lr=5e-5)
num_epochs = 3
num_training_steps = num_epochs*len(train_dataloader_g)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer_g,
    num_warmup_steps=0,
    num_training_steps=num_training_steps
)

In [None]:
progress_bar = tqdm(range(num_training_steps))

model_g.train()
for epoch in range(num_epochs):
  for batch in train_dataloader_g:
    labels = batch['labels']
    labels[labels == -1] = 2
    labels = labels.to(device)
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    token_type_ids = batch['token_type_ids'].to(device)

    outputs = model_g(input_ids, labels=labels, attention_mask=attention_mask, token_type_ids=token_type_ids)
    loss = outputs.loss
    loss.backward()

    optimizer_g.step()
    lr_scheduler.step()
    optimizer_g.zero_grad()
    progress_bar.update(1)

In [None]:
hub_repo_name = "MoGP/g_x_new"
access_token = "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"

# Save the model and tokenizer to the Hub
model_g.push_to_hub(hub_repo_name, use_auth_token=access_token)
tokenizer.push_to_hub(hub_repo_name, use_auth_token=access_token)

# Evaluation of g

In [None]:
datasets_g = load_dataset("MoGP/g_test_set_new")
datasets_g

In [None]:
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def tokenize_function(examples):
  return tokenizer(examples['texts'], truncation=True)

tokenized_datasets_g = datasets_g.map(tokenize_function, batched=True)
tokenized_datasets_g = tokenized_datasets_g.remove_columns(['texts'])
tokenized_datasets_g = tokenized_datasets_g.with_format('torch')
data_collator = DataCollatorWithPadding(tokenizer)

In [None]:
test_dataloader_g = DataLoader(
    tokenized_datasets_g['train'], batch_size=20, collate_fn=data_collator
)

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

checkpoint = "MoGP/g_x_new"
model_g = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=3)

model_g.to(device)
print(device)

In [None]:
y_true = []
y_pred = []
accuracy_metric = load_metric("accuracy")
f1_metric = load_metric("f1")

model_g.eval()
for batch in test_dataloader_g:
  labels = batch['labels']
  labels[labels == -1] = 2
  labels = labels.to(device)
  input_ids = batch['input_ids'].to(device)
  attention_mask = batch['attention_mask'].to(device)
  token_type_ids = batch['token_type_ids'].to(device)
  with torch.no_grad():
    outputs = model_g(input_ids, labels=labels, attention_mask=attention_mask, token_type_ids=token_type_ids)
  logits = outputs.logits
  predictions = torch.argmax(logits, dim=-1)
  accuracy_metric.add_batch(predictions=predictions, references=labels)
  f1_metric.add_batch(predictions=predictions, references=labels)
  pred = predictions.cpu().numpy()
  lab = labels.cpu().numpy()
  y_pred.append(pred)
  y_true.append(lab)
acc = accuracy_metric.compute()
f1 = f1_metric.compute(average="weighted")
print(acc)
print(f1)

In [None]:
y_pred = np.concatenate(y_pred)
y_true = np.concatenate(y_true)

conf_matrix = confusion_matrix(y_true,y_pred)
multilabel_conf_matrix = multilabel_confusion_matrix(y_true, y_pred, labels=[2, 0, 1])
print("Confusion Matrix:")
print(conf_matrix)
print("Confusion Matrix for each class:")
print(multilabel_conf_matrix)

In [None]:
sns.set(font_scale=1.5)
sns.heatmap(conf_matrix/np.sum(conf_matrix), fmt='.2%', yticklabels=[0,1,-1], xticklabels=[0,1,-1], annot=True, cmap='Purples', cbar=False)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.savefig("conf.png")
plt.show()

# Training the g function - Regression

In [None]:
datasets_g = load_dataset("MoGP/g_dataset_text_new")
datasets_g

In [None]:
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def tokenize_function(examples):
  return tokenizer(examples['texts'], truncation=True)

tokenized_datasets_g = datasets_g.map(tokenize_function, batched=True)
tokenized_datasets_g = tokenized_datasets_g.remove_columns(['texts'])
tokenized_datasets_g = tokenized_datasets_g.with_format('torch')
data_collator = DataCollatorWithPadding(tokenizer)

In [None]:
data = tokenized_datasets_g['train'].to_pandas()
zeros_data = data[data['labels'] == 0]

zeros_sample = zeros_data.sample(frac=0.03, random_state=42)
non_zeros_data = data[data['labels'] != 0]
new_data = pd.concat([zeros_sample, non_zeros_data])

labels_pandas = pd.DataFrame(new_data['labels'])
class_counts = labels_pandas.value_counts().sort_values(ascending=False)
print(class_counts)

new_dataset = Dataset.from_pandas(new_data)

tokenized_datasets_g['train'] = new_dataset
new_dataset

In [None]:
# Bin the target values
labels = np.array(tokenized_datasets_g['train']['labels'])
num_bins = 10
bins = np.linspace(np.min(labels), np.max(labels), num_bins)
binned_labels = np.digitize(labels, bins) - 1  # Bin indices start at 0

# Compute class weights
class_counts = np.bincount(binned_labels)
class_weights = 1.0 / class_counts
weights = class_weights[binned_labels]

# Create sampler
sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)

In [None]:
train_dataloader_g = DataLoader(
    tokenized_datasets_g['train'], batch_size=20, sampler=sampler, collate_fn=data_collator
)

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

checkpoint = "bert-base-uncased"
model_g_reg = AutoModelForSequenceClassification.from_pretrained(checkpoint, problem_type="regression", num_labels=1)

model_g_reg.to(device)
print(device)

In [None]:
optimizer_g = AdamW(model_g_reg.parameters(), lr=5e-5)
num_epochs = 3
num_training_steps = num_epochs*len(train_dataloader_g)
lr_scheduler = get_scheduler(
    "linear",
    optimizer=optimizer_g,
    num_warmup_steps=0,
    num_training_steps=num_training_steps
)

In [None]:
progress_bar = tqdm(range(num_training_steps))
# Loss function for regression
loss_fn = MSELoss()

model_g_reg.train()
for epoch in range(num_epochs):
  for batch in train_dataloader_g:
    labels = batch['labels'].to(device)
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    token_type_ids = batch['token_type_ids'].to(device)

    outputs = model_g_reg(input_ids, labels=labels, attention_mask=attention_mask, token_type_ids=token_type_ids)
    loss = loss_fn(outputs.logits.squeeze(), labels.float())

    loss.backward()

    optimizer_g.step()
    lr_scheduler.step()
    optimizer_g.zero_grad()
    progress_bar.update(1)

In [None]:
hub_repo_name = "MoGP/g_x_reg_new"
access_token = "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"

# Save the model and tokenizer to the Hub
model_g_reg.push_to_hub(hub_repo_name, use_auth_token=access_token)
tokenizer.push_to_hub(hub_repo_name, use_auth_token=access_token)

# Evaluation of g - Regression

In [None]:
datasets_g = load_dataset("MoGP/g_test_set_new")
datasets_g

In [None]:
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def tokenize_function(examples):
  return tokenizer(examples['texts'], truncation=True)

tokenized_datasets_g = datasets_g.map(tokenize_function, batched=True)
tokenized_datasets_g = tokenized_datasets_g.remove_columns(['texts'])
tokenized_datasets_g = tokenized_datasets_g.with_format('torch')
data_collator = DataCollatorWithPadding(tokenizer)

In [None]:
test_dataloader_g = DataLoader(
    tokenized_datasets_g['train'], batch_size=20, collate_fn=data_collator
)

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

checkpoint = "MoGP/g_x_reg_new"
model_g_reg = AutoModelForSequenceClassification.from_pretrained(checkpoint, problem_type="regression", num_labels=1)

model_g_reg.to(device)
print(device)

In [None]:
y_true = []
y_pred = []

model_g_reg.eval()
for batch in test_dataloader_g:
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    labels = batch['labels'].to(device)

    with torch.no_grad():
        outputs = model_g_reg(input_ids, attention_mask=attention_mask)
    logits = outputs.logits

    # Since it's a regression task, logits should already be the predicted continuous values
    predictions = logits.squeeze().cpu().numpy()
    lab = labels.cpu().numpy()

    y_pred.append(predictions)
    y_true.append(lab)

In [None]:
y_pred = np.concatenate(y_pred)
y_true = np.concatenate(y_true)

mse = mean_squared_error(y_true, y_pred)
mae = mean_absolute_error(y_true, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")

In [None]:
for label in [-1, 0, 1]:
    y_pred_label = y_pred[(y_true==label)]
    y_true_label = y_true[(y_true==label)]
    mse = mean_squared_error(y_true_label, y_pred_label)
    mae = mean_absolute_error(y_true_label, y_pred_label)

    print(f"Mean Squared Error for label {label}: {mse}")
    print(f"Mean Absolute Error for label {label}: {mae}")
    print("----------------------------------------------------")

In [None]:
res = {
    'y_true': y_true,
    'y_pred': y_pred
}
df = pd.DataFrame(res)

In [None]:
labels = [-1, 0, 1]

for label in labels:
    fig, ax = plt.subplots(figsize=(4, 4))
    subset = df[df['y_true'] == label]
    predictions = subset['y_pred']

    # Scatter plot for predictions
    ax.scatter(subset.index, predictions, color='blue', label='Predictions', s=10)

    # Horizontal line for the actual label
    ax.axhline(y=label, color='red', linestyle='--', linewidth=2, label=f'Actual label: {label}')

    ax.set_xlabel('Data point index')
    ax.set_ylabel('Prediction')
    ax.set_title(f'Scatter plot for label {label}')
    ax.legend()

    plt.tight_layout()
    plt.show()


# Using g to create f' function

## Creating new dataset for f' with results of g and f

In [None]:
bias_in_bios_dataset = load_dataset("LabHC/bias_in_bios")
bias_in_bios_dataset

In [None]:
# Nurse = 0   Physician = 1
def filter_and_change_values(example):
    if example['profession'] == 13:
        example['profession'] = 0
    elif example['profession'] == 19:
        example['profession'] = 1
    else:
        example['profession'] = None
    return example

for split in ['train', 'dev', 'test']:
    bias_in_bios_dataset[split] = bias_in_bios_dataset[split].map(filter_and_change_values).filter(lambda x: x['profession'] is not None)


In [None]:
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def tokenize_function(examples):
  return tokenizer(examples['hard_text'], truncation=True)

tokenized_datasets = bias_in_bios_dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['hard_text'])
tokenized_datasets = tokenized_datasets.rename_column('profession','labels')
tokenized_datasets = tokenized_datasets.with_format('torch')
data_collator = DataCollatorWithPadding(tokenizer)

In [None]:
dev_dataloader = DataLoader(
   tokenized_datasets['dev'], batch_size=20, collate_fn=data_collator
)

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

checkpoint_f = "MoGP/f_x"
model_f = AutoModelForSequenceClassification.from_pretrained(checkpoint_f, num_labels=2)
model_f.to(device)

checkpoint_g = "MoGP/g_x_new"
model_g = AutoModelForSequenceClassification.from_pretrained(checkpoint_g, num_labels=3)
model_g.to(device)

In [None]:
f_labels = []
g_labels = []

model_f.eval()
model_g.eval()

for batch in dev_dataloader:
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    token_type_ids = batch['token_type_ids'].to(device)

    with torch.no_grad():
        outputs_f = model_f(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        outputs_g = model_g(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)

    f_logits = outputs_f.logits
    f_predictions = torch.nn.functional.softmax(f_logits, dim=-1)[:, 1].cpu().numpy()


    g_logits = outputs_g.logits
    g_predictions = torch.argmax(g_logits, dim=-1).cpu().numpy()

    f_labels.extend(f_predictions)
    g_labels.extend(g_predictions)

In [None]:
labels_pandas = pd.DataFrame(g_labels)
class_counts = labels_pandas.value_counts().sort_values(ascending=False)
print(class_counts)

In [None]:
f_labels = np.array(f_labels)
g_labels = np.array(g_labels)
texts = bias_in_bios_dataset['dev']['hard_text']
labels = bias_in_bios_dataset['dev']['profession']
genders = bias_in_bios_dataset['dev']['gender']

data_dict = {
    'hard_text': texts,
    'gender': genders,
    'labels': labels,
    'f_labels': f_labels,
    'g_labels': g_labels
}

datasets_f_prime = Dataset.from_dict(data_dict)
datasets_f_prime = datasets_f_prime.filter(lambda x: x['g_labels'] != -2)
datasets_f_prime

In [None]:
dataset_g = pd.DataFrame(datasets_f_prime)
dataset_g.to_csv("fpdataset.csv", index=False)

In [None]:
hub_repo_name = "MoGP/f_prime_dataset_dev_new"
access_token = "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"
csv_file_path = "fpdataset.csv"

api = HfApi()

commit_message = "Add dataset file"
api.upload_file(
    path_or_fileobj=csv_file_path,
    path_in_repo="fpdataset.csv",
    repo_id=hub_repo_name,
    token=access_token,
    commit_message=commit_message,
    repo_type="dataset"
)

## Preparing the training set for f'




In [None]:
f_prime_dataset_train = load_dataset("MoGP/f_prime_dataset_new")
f_prime_dataset_dev = load_dataset("MoGP/f_prime_dataset_dev_new")

In [None]:
data = f_prime_dataset_train['train'].to_pandas()

majority_class = data[data['g_labels'] == 1]
minority_class = data[data['g_labels'] == -1]
zero_class = data[data['g_labels'] == 0]

# remove some zeros randomly
zeros_sample = zero_class.sample(frac=0.113, random_state=42)

# Oversample minority class
minority_class_oversampled = resample(minority_class, replace=True, n_samples=len(majority_class), random_state=42)

balanced_data = pd.concat([majority_class, minority_class_oversampled, zeros_sample])
balanced_dataset = Dataset.from_pandas(balanced_data, split='train')

In [None]:
labels_pandas = pd.DataFrame(balanced_dataset['g_labels'])
class_counts = labels_pandas.value_counts().sort_values(ascending=False)
print(class_counts)

In [None]:
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def tokenize_function(examples):
  return tokenizer(examples['hard_text'], truncation=True)

tokenized_datasets = balanced_dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['hard_text'])
tokenized_datasets = tokenized_datasets.with_format('torch')


tokenized_datasets_dev = f_prime_dataset_dev.map(tokenize_function, batched=True)
tokenized_datasets_dev = tokenized_datasets_dev.remove_columns(['hard_text'])
tokenized_datasets_dev = tokenized_datasets_dev.with_format('torch')

data_collator = DataCollatorWithPadding(tokenizer)

In [None]:
train_dataloader = DataLoader(
   tokenized_datasets, shuffle=True, batch_size=20, collate_fn=data_collator
)

dev_dataloader = DataLoader(
   tokenized_datasets_dev['train'], batch_size=20, collate_fn=data_collator
)

## Fine-tuning the hyperparameters:
alpha, learning rate, number of epochs


In [None]:
def custom_loss(g_output, f_output, fprime_output, alpha=1.0):
    mse_loss = nn.MSELoss()
    regularization_term = mse_loss(f_output, fprime_output)
    left = torch.sum(g_output * fprime_output * -1)
    combined_loss = left + alpha * regularization_term
    return combined_loss

In [None]:
def train_fprime(dev_dataloader_fprime, num_epochs=20, alpha=1.0, learning_rate=5e-6):

    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    checkpoint = "MoGP/f_x"
    model_fprime = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
    model_fprime.to(device)

    optimizer_fprime = AdamW(model_fprime.parameters(), lr=learning_rate)
    num_training_steps = num_epochs * len(dev_dataloader_fprime)
    lr_scheduler_fprime = get_scheduler(
        "linear",
        optimizer=optimizer_fprime,
        num_warmup_steps=0,
        num_training_steps=num_training_steps
    )

    progress_bar = tqdm(range(num_training_steps))
    model_fprime.train()
    for epoch in range(num_epochs):
        for batch in dev_dataloader_fprime:
            labels = batch['labels'].to(device)
            y_g_output = batch['g_labels'].to(device)
            f_output = batch['f_labels'].to(device)
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            token_type_ids = batch['token_type_ids'].to(device)

            outputs_fprime = model_fprime(input_ids, labels=labels, attention_mask=attention_mask, token_type_ids=token_type_ids)
            fprime_logits = outputs_fprime.logits
            fprime_output = torch.nn.functional.softmax(fprime_logits, dim=-1)[:, 1]

            loss = custom_loss(y_g_output, f_output, fprime_output, alpha)

            loss.backward()

            optimizer_fprime.step()
            lr_scheduler_fprime.step()
            optimizer_fprime.zero_grad()
            progress_bar.update(1)

    return model_fprime

In [None]:
def evaluate_model(model, dataloader, device):

    y_true = []
    y_pred = []
    gender = []
    metric = load_metric("glue","mrpc")
    model.eval()

    for batch in dataloader:
      labels = batch['labels'].to(device)
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      token_type_ids = batch['token_type_ids'].to(device)
      token_type_ids = batch['token_type_ids'].to(device)
      sex = batch['gender'].to(device)
      with torch.no_grad():
        outputs = model(input_ids, labels=labels, attention_mask=attention_mask, token_type_ids=token_type_ids)
      logits = outputs.logits
      predictions = torch.argmax(logits, dim=-1)
      metric.add_batch(predictions=predictions, references=labels)
      pred = predictions.cpu().numpy()
      lab = labels.cpu().numpy()
      gen = sex.cpu().numpy()
      y_pred.append(pred)
      y_true.append(lab)
      gender.append(gen)

    eval = metric.compute()
    y_pred = np.concatenate(y_pred)
    y_true = np.concatenate(y_true)
    gender = np.concatenate(gender)
    EO = np.mean(y_pred[(y_true==1) & (gender==0)]) - np.mean(y_pred[(y_true==1) & (gender==1)])
    TNRD = np.mean(y_pred[(y_true==0) & (gender==1)]) - np.mean(y_pred[(y_true==0) & (gender==0)])

    return eval['accuracy'], eval['f1'], EO, TNRD

In [None]:
def grid_search(train_dataloader, val_dataloader, param_grid, num_trials=1):
    best_acc = -1
    best_f1 = -1
    best_equal_opportunity = 1
    best_TNRD = 1
    best_params = None
    results = []

    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

    for params in param_grid:
        alpha, lr, epochs = params
        accuracies = []
        f1_scores = []
        equal_opportunity_scores = []
        true_negative_rate_differences = []

        for _ in range(num_trials):
            model_fprime = train_fprime(train_dataloader, num_epochs=epochs, alpha=alpha, learning_rate=lr)
            accuracy, f1, EO, TNRD = evaluate_model(model_fprime, val_dataloader, device)
            accuracies.append(accuracy)
            f1_scores.append(f1)
            equal_opportunity_scores.append(EO)
            true_negative_rate_differences.append(TNRD)

        avg_acc = np.mean(accuracies)
        avg_f1 = np.mean(f1_scores)
        avg_equal_opportunity = np.mean(equal_opportunity_scores)
        avg_TNRD = np.mean(true_negative_rate_differences)
        results.append((params, avg_acc, avg_f1, avg_equal_opportunity, avg_TNRD))

        print(f"Params: Alpha={params[0]}, Learning Rate={params[1]}, Epochs={params[2]} -> Accuracy:{avg_acc}, F1 Score: {avg_f1}, True Positive Difference Rate: {avg_equal_opportunity}, True Negative Difference Rate: {avg_TNRD}")

        if (avg_f1 >= best_f1) and (avg_equal_opportunity <= best_equal_opportunity):
            best_f1 = avg_f1
            best_equal_opportunity = avg_equal_opportunity
            best_params = params

    return best_params, results

In [None]:
alphas = [0.01, 0.2, 1, 3, 5, 7, 10.0, 15.0, 17.0, 20.0, 50.0, 100.0]
learning_rates = [5e-7, 1e-6, 5e-6, 1e-5]
num_epochs_list = [1]

param_grid = list(itertools.product(alphas, learning_rates, num_epochs_list))

best_params, all_results = grid_search(train_dataloader, dev_dataloader, param_grid, num_trials=1)

In [None]:
alphas = []
lrs  = []
epochss  = []
accs  = []
f1s  = []
equal_opportunities  = []
TNRDs = []
for params, acc, f1, equal_opportunity, TNRD in all_results:
    print(f"Params: Alpha={params[0]}, Learning Rate={params[1]}, Epochs={params[2]} -> Accuracy:{acc}, F1 Score: {f1}, True Positive Difference Rate: {equal_opportunity}, True Negative Difference Rate: {TNRD}")
    alphas.append(params[0])
    lrs.append(params[1])
    epochss.append(params[2])
    accs.append(acc)
    f1s.append(f1)
    equal_opportunities.append(equal_opportunity)
    TNRDs.append(TNRD)
res_dict = {
    'Alpha': alphas,
    'Learning Rate': lrs,
    'Epochs': epochss,
    'Accuracy': accs,
    'F1 Score': f1s,
    'TPDR': equal_opportunities,
    'TNDR': TNRDs
}

index = np.arange(len(accs))
dataset_g = pd.DataFrame(res_dict, index=index)
dataset_g.to_csv("dataset.csv")

## Use the results from f and g classification to create f'

In [None]:
torch.cuda.empty_cache()
gc.collect()
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

Approaches:


*   First approach:

    argmin ∑ -g(x)f'(x)
    with a very small learning-rate

*   Second approach:

    argmin ∑ (-g(x)f'(x) + α|f(x)-f’(x)| )

*   Third approach:

    argmin ∑ (-g(x)logf'(x) + β(1-g(x)^2)|f(x)-f’(x)|)


In [None]:
def custom_loss(g_output, f_output, fprime_output, alpha=10.0): #alpha=1.0
    mse_loss = nn.MSELoss()
    regularization_term = mse_loss(f_output, fprime_output)
    left = torch.sum((g_output * fprime_output * -1))
    combined_loss = left + alpha * regularization_term
    return combined_loss, left, regularization_term

def train_fprime(train_dataloader_fprime, num_epochs=2, alpha=10.0): #alpha=1.0
    # Initialize f' model
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    checkpoint = "MoGP/f_x"
    model_fprime = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
    model_fprime.to(device)

    optimizer_fprime = AdamW(model_fprime.parameters(), lr=5e-6) # lr=5e-7
    num_training_steps = num_epochs * len(train_dataloader_fprime)
    lr_scheduler_fprime = get_scheduler(
        "linear",
        optimizer=optimizer_fprime,
        num_warmup_steps=0,
        num_training_steps=num_training_steps
    )

    progress_bar = tqdm(range(num_training_steps))
    left_losses = []
    right_losses = []
    model_fprime.train()
    for epoch in range(num_epochs):
        for batch in train_dataloader_fprime:
            labels = batch['labels'].to(device)
            g_output = batch['g_labels'].to(device) #y_g_labels
            f_output = batch['f_labels'].to(device)
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            token_type_ids = batch['token_type_ids'].to(device)

            # Get output from f'

            outputs_fprime = model_fprime(input_ids, labels=labels, attention_mask=attention_mask, token_type_ids=token_type_ids)
            fprime_logits = outputs_fprime.logits
            fprime_output = torch.nn.functional.softmax(fprime_logits, dim=-1)[:, 1]  # Taking softmax and then second column

            # Calculate custom loss
            loss, left, right = custom_loss(g_output, f_output, fprime_output, alpha)

            loss.backward()

            print(f"g(x):{g_output}")
            print(f"f(x):{f_output}")
            print(f"f'(x):{fprime_output}")
            print("______________________________________________")

            optimizer_fprime.step()
            lr_scheduler_fprime.step()
            optimizer_fprime.zero_grad()
            progress_bar.update(1)
            left_losses.append(left.item())
            right_losses.append(right.item())

    return model_fprime, left_losses, right_losses

model_fprime, left_losses, right_losses = train_fprime(train_dataloader, num_epochs=2, alpha=10.0)  #alpha=1.0


In [None]:
# Plot the losses
#lsosses = left_losses[0::100]
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.plot(left_losses, label='Left Loss')
plt.xlabel('Training Step')
plt.ylabel('Loss')
plt.title("Left Loss (Argmin -g(x)f'(x))")
plt.legend()

#rlosses = right_losses[0::100]
plt.subplot(1, 2, 2)
plt.plot(right_losses, label='Right Loss')
plt.xlabel('Training Step')
plt.ylabel('Loss')
plt.title("Regularization Term (|f(x)-f'(x)|)")
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
hub_repo_name = "MoGP/f_prime_second_nikolas"
access_token = "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"

# Save the model and tokenizer to the Hub
model_fprime.push_to_hub(hub_repo_name, use_auth_token=access_token)
tokenizer.push_to_hub(hub_repo_name, use_auth_token=access_token)

## Evaluation of f'

In [None]:
bias_in_bios_dataset = load_dataset("LabHC/bias_in_bios")
bias_in_bios_dataset

In [None]:
# Nurse = 0   Physician = 1
def filter_and_change_values(example):
    if example['profession'] == 13:
        example['profession'] = 0
    elif example['profession'] == 19:
        example['profession'] = 1
    else:
        example['profession'] = None
    return example

for split in ['train', 'dev', 'test']:
    bias_in_bios_dataset[split] = bias_in_bios_dataset[split].map(filter_and_change_values).filter(lambda x: x['profession'] is not None)


In [None]:
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def tokenize_function(examples):
  return tokenizer(examples['hard_text'], truncation=True)

tokenized_datasets = bias_in_bios_dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['hard_text'])
tokenized_datasets = tokenized_datasets.rename_column('profession','labels')
tokenized_datasets = tokenized_datasets.with_format('torch')
data_collator = DataCollatorWithPadding(tokenizer)

In [None]:
test_dataloader = DataLoader(
   tokenized_datasets['test'], batch_size=20, collate_fn=data_collator
)

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

checkpoint = "MoGP/f_prime_second_nikolas"
model_fprime = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

model_fprime.to(device)
print(device)

In [None]:
y_true = []
y_pred = []
gender = []
metric = load_metric("glue","mrpc")
model_fprime.eval()

for batch in test_dataloader:
  labels = batch['labels'].to(device)
  input_ids = batch['input_ids'].to(device)
  attention_mask = batch['attention_mask'].to(device)
  token_type_ids = batch['token_type_ids'].to(device)
  token_type_ids = batch['token_type_ids'].to(device)
  sex = batch['gender'].to(device)
  with torch.no_grad():
    outputs = model_fprime(input_ids, labels=labels, attention_mask=attention_mask, token_type_ids=token_type_ids)
  logits = outputs.logits
  predictions = torch.argmax(logits, dim=-1)
  metric.add_batch(predictions=predictions, references=labels)
  pred = predictions.cpu().numpy()
  lab = labels.cpu().numpy()
  gen = sex.cpu().numpy()
  y_pred.append(pred)
  y_true.append(lab)
  gender.append(gen)
metric.compute()

In [None]:
y_pred = np.concatenate(y_pred)
y_true = np.concatenate(y_true)
gender = np.concatenate(gender)

conf_matrix = confusion_matrix(y_true,y_pred)
print("Confusion Matrix:")
print(conf_matrix)

In [None]:
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=[0,1]) #nurse,physician
disp.plot()
plt.savefig("conf.png")
plt.show()

In [None]:
# Fairness metrics : female=1 , male=0
SP = np.mean(y_pred[gender==0]) - np.mean(y_pred[gender==1])
EO = np.mean(y_pred[(y_true==1) & (gender==0)]) - np.mean(y_pred[(y_true==1) & (gender==1)])
TNRD = np.mean(y_pred[(y_true==0) & (gender==1)]) - np.mean(y_pred[(y_true==0) & (gender==0)]) # (1-fpf)-(1-fpm) = fpm-fpf
print("Statistical Parity: ",SP)
print("True Positive Rate Difference (Equal Opportunity): ",EO)
print("True Negative Rate Difference: ",TNRD)

In [None]:
y_true_flat = y_true
y_pred_flat = y_pred
gender_flat = gender

unique_genders = set(gender_flat)

# Plot confusion matrix for each gender
for gender in unique_genders:
    y_true_gender = [y_true_flat[i] for i in range(len(y_true_flat)) if gender_flat[i] == gender]
    y_pred_gender = [y_pred_flat[i] for i in range(len(y_pred_flat)) if gender_flat[i] == gender]

    cm = confusion_matrix(y_true_gender, y_pred_gender)

    plt.figure(figsize=(6, 4))
    sns.set(font_scale=1.5)
    sns.heatmap(cm/np.sum(cm), annot=True, fmt='.2%', cmap='Blues', cbar=False)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    if gender==0:
      gen = 'Male'
    else:
      gen = 'Female'
    plt.title(f'Confusion Matrix for Gender: {gen}')
    plt.show()

# Use the results from f and g Regression to create f'

## Creating new dataset for f' with results of g and f

In [None]:
bias_in_bios_dataset = load_dataset("LabHC/bias_in_bios")
bias_in_bios_dataset

In [None]:
# Nurse = 0   Physician = 1
def filter_and_change_values(example):
    if example['profession'] == 13:
        example['profession'] = 0
    elif example['profession'] == 19:
        example['profession'] = 1
    else:
        example['profession'] = None
    return example

for split in ['train', 'dev', 'test']:
    bias_in_bios_dataset[split] = bias_in_bios_dataset[split].map(filter_and_change_values).filter(lambda x: x['profession'] is not None)


In [None]:
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def tokenize_function(examples):
  return tokenizer(examples['hard_text'], truncation=True)

tokenized_datasets = bias_in_bios_dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['hard_text'])
tokenized_datasets = tokenized_datasets.rename_column('profession','labels')
tokenized_datasets = tokenized_datasets.with_format('torch')
data_collator = DataCollatorWithPadding(tokenizer)

In [None]:
dev_dataloader = DataLoader(
   tokenized_datasets['dev'], batch_size=20, collate_fn=data_collator
)

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

checkpoint_f = "MoGP/f_x"
model_f = AutoModelForSequenceClassification.from_pretrained(checkpoint_f, num_labels=2)
model_f.to(device)

checkpoint_g = "MoGP/g_x_reg_new_4e"
model_g = AutoModelForSequenceClassification.from_pretrained(checkpoint_g, problem_type="regression", num_labels=1)
model_g.to(device)

In [None]:
def calculate_yg_labels(predictions, labels, sensitive_attribute):
    new_labels = np.zeros_like(predictions)
    for i in range(len(predictions)):
      # False negative for women
      if labels[i] == 1 and predictions[i] == 0 and sensitive_attribute[i] == 1:
          new_labels[i] = 1
      # False positive for men
      elif labels[i] == 0 and predictions[i] == 1 and sensitive_attribute[i] == 0:
          new_labels[i] = -1
      # The rest of the wrong predictions (false negative for men and false positive for women) - should I add this?
      elif (labels[i] == 0 and predictions[i] == 1 and sensitive_attribute[i] == 1) or (labels[i] == 1 and predictions[i] == 0 and sensitive_attribute[i] == 0):
        new_labels[i] = -2
    return new_labels

f_labels = []
g_labels = []
y_g = []

model_f.eval()
model_g.eval()

for batch in dev_dataloader:
    labels = batch['labels'].to(device)
    input_ids = batch['input_ids'].to(device)
    attention_mask = batch['attention_mask'].to(device)
    token_type_ids = batch['token_type_ids'].to(device)
    sensitive_attribute = batch['gender'].to(device)


    with torch.no_grad():
        outputs_f = model_f(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        outputs_g = model_g(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)

    f_logits = outputs_f.logits
    #f_predictions = torch.argmax(f_logits, dim=-1).cpu().numpy()
    f_predictions = torch.nn.functional.softmax(f_logits, dim=-1)[:, 1].cpu().numpy()
    f_predictions_for_g = torch.argmax(f_logits, dim=-1)

    predictions_np = f_predictions_for_g.cpu().numpy()
    labels_np = labels.cpu().numpy()
    sensitive_attribute_np = sensitive_attribute.cpu().numpy()

    g_logits = outputs_g.logits
    g_predictions = g_logits.squeeze().cpu().numpy()

    y_g_labels = calculate_yg_labels(predictions_np, labels_np, sensitive_attribute_np)

    f_labels.extend(f_predictions)
    g_labels.extend(g_predictions)
    y_g.extend(y_g_labels)

In [None]:
f_labels = np.array(f_labels)
g_labels = np.array(g_labels)
texts = bias_in_bios_dataset['dev']['hard_text']
labels = bias_in_bios_dataset['dev']['profession']
genders = bias_in_bios_dataset['dev']['gender']

data_dict = {
    'hard_text': texts,
    'gender': genders,
    'labels': labels,
    'f_labels': f_labels,
    'g_labels': g_labels,
    'y_g': y_g
}

datasets_f_prime = Dataset.from_dict(data_dict)
datasets_f_prime

In [None]:
dataset_g = pd.DataFrame(datasets_f_prime)
dataset_g.to_csv("fpdataset.csv", index=False)

In [None]:
hub_repo_name = "MoGP/f_prime_dataset_reg_dev"
access_token = "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"
csv_file_path = "fpdataset.csv"

api = HfApi()

commit_message = "Add dataset file"
api.upload_file(
    path_or_fileobj=csv_file_path,
    path_in_repo="fpdataset.csv",
    repo_id=hub_repo_name,
    token=access_token,
    commit_message=commit_message,
    repo_type="dataset"
)

## Training set for f'

In [None]:
f_prime_dataset_train = load_dataset("MoGP/f_prime_dataset_reg")
f_prime_dataset_dev = load_dataset("MoGP/f_prime_dataset_reg_dev")

In [None]:
data = f_prime_dataset_train['train'].to_pandas()

majority_class = data[data['y_g'] == 1]
minority_class = data[data['y_g'] == -1]
zero_class = data[data['y_g'] == 0]

# remove some zeros randomly
zeros_sample = zero_class.sample(frac=0.019, random_state=42)

# Oversample minority class
minority_class_oversampled = resample(minority_class, replace=True, n_samples=len(majority_class), random_state=42)

balanced_data = pd.concat([majority_class, minority_class_oversampled, zeros_sample])
balanced_dataset = Dataset.from_pandas(balanced_data, split='train')

In [None]:
labels_pandas = pd.DataFrame(balanced_dataset['y_g'])
class_counts = labels_pandas.value_counts().sort_values(ascending=False)
print(class_counts)

In [None]:
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def tokenize_function(examples):
  return tokenizer(examples['hard_text'], truncation=True)

tokenized_datasets = balanced_dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['hard_text'])
tokenized_datasets = tokenized_datasets.with_format('torch')


tokenized_datasets_dev = f_prime_dataset_dev.map(tokenize_function, batched=True)
tokenized_datasets_dev = tokenized_datasets_dev.remove_columns(['hard_text'])
tokenized_datasets_dev = tokenized_datasets_dev.with_format('torch')

data_collator = DataCollatorWithPadding(tokenizer)

In [None]:
train_dataloader = DataLoader(
   tokenized_datasets, shuffle=True, batch_size=20, collate_fn=data_collator
)
dev_dataloader = DataLoader(
   tokenized_datasets_dev['train'], batch_size=20, collate_fn=data_collator
)

## Use the results from f an g on the evaluation set to create f'

In [None]:
torch.cuda.empty_cache()
gc.collect()
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

In [None]:
def custom_loss(y_g_output, f_output, fprime_output, alpha=0.0): #alpha=1.0
    mse_loss = nn.MSELoss()
    regularization_term = mse_loss(f_output, fprime_output)
    epsilon = 1e-8
    log_fprime_output = torch.log(fprime_output + epsilon)
    beta=10.0
    left = torch.sum((y_g_output * log_fprime_output * -1) + (beta * (1 - y_g_output**2) * regularization_term))
    combined_loss = left + alpha * regularization_term
    return combined_loss, left, regularization_term

def train_fprime(train_dataloader_fprime, num_epochs=2, alpha=0.0): #alpha=1.0
    # Initialize f' model
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    checkpoint = "MoGP/f_x"
    model_fprime = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)
    model_fprime.to(device)

    optimizer_fprime = AdamW(model_fprime.parameters(), lr=5e-6) # lr=5e-7
    num_training_steps = num_epochs * len(train_dataloader_fprime)
    lr_scheduler_fprime = get_scheduler(
        "linear",
        optimizer=optimizer_fprime,
        num_warmup_steps=0,
        num_training_steps=num_training_steps
    )

    progress_bar = tqdm(range(num_training_steps))
    left_losses = []
    right_losses = []
    model_fprime.train()
    for epoch in range(num_epochs):
        for batch in train_dataloader_fprime:
            labels = batch['labels'].to(device)
            g_output = batch['g_labels'].to(device)
            f_output = batch['f_labels'].to(device)
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            token_type_ids = batch['token_type_ids'].to(device)

            # Get output from f'

            outputs_fprime = model_fprime(input_ids, labels=labels, attention_mask=attention_mask, token_type_ids=token_type_ids)
            fprime_logits = outputs_fprime.logits
            fprime_output = torch.nn.functional.softmax(fprime_logits, dim=-1)[:, 1]  # Taking softmax and then second column

            # Calculate custom loss
            loss, left, right = custom_loss(g_output, f_output, fprime_output, alpha)

            loss.backward()

            print(f"g(x):{g_output}")
            print(f"f(x):{f_output}")
            print(f"f'(x):{fprime_output}")
            print("______________________________________________")

            optimizer_fprime.step()
            lr_scheduler_fprime.step()
            optimizer_fprime.zero_grad()
            progress_bar.update(1)
            left_losses.append(left.item())
            right_losses.append(right.item())

    return model_fprime, left_losses, right_losses

model_fprime, left_losses, right_losses = train_fprime(train_dataloader, num_epochs=2, alpha=0.0)  #alpha=1.0


In [None]:
# Plot the losses
#lsosses = left_losses[0::100]
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.plot(left_losses, label='Left Loss')
plt.xlabel('Training Step')
plt.ylabel('Loss')
plt.title("Left Loss (Argmin -g(x)f'(x)) over Training Steps")
plt.legend()

#rlosses = right_losses[0::100]
plt.subplot(1, 2, 2)
plt.plot(right_losses, label='Right Loss')
plt.xlabel('Training Step')
plt.ylabel('Loss')
plt.title("Regularization Term (|f(x)-f'(x)|) over Training Steps")
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
hub_repo_name = "MoGP/f_prime_reg_4"
access_token = "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"

# Save the model and tokenizer to the Hub
model_fprime.push_to_hub(hub_repo_name, use_auth_token=access_token)
tokenizer.push_to_hub(hub_repo_name, use_auth_token=access_token)

## Evaluation of f'

In [None]:
bias_in_bios_dataset = load_dataset("LabHC/bias_in_bios")
bias_in_bios_dataset

In [None]:
# Nurse = 0   Physician = 1
def filter_and_change_values(example):
    if example['profession'] == 13:
        example['profession'] = 0
    elif example['profession'] == 19:
        example['profession'] = 1
    else:
        example['profession'] = None
    return example

for split in ['train', 'dev', 'test']:
    bias_in_bios_dataset[split] = bias_in_bios_dataset[split].map(filter_and_change_values).filter(lambda x: x['profession'] is not None)


In [None]:
checkpoint = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
def tokenize_function(examples):
  return tokenizer(examples['hard_text'], truncation=True)

tokenized_datasets = bias_in_bios_dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['hard_text'])
tokenized_datasets = tokenized_datasets.rename_column('profession','labels')
tokenized_datasets = tokenized_datasets.with_format('torch')
data_collator = DataCollatorWithPadding(tokenizer)

In [None]:
test_dataloader = DataLoader(
   tokenized_datasets['test'], batch_size=20, collate_fn=data_collator
)

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

checkpoint = "MoGP/f_prime_reg_4"
model_fprime = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=2)

model_fprime.to(device)
print(device)

In [None]:
y_true = []
y_pred = []
gender = []
metric = load_metric("glue","mrpc")
model_fprime.eval()

for batch in test_dataloader:
  labels = batch['labels'].to(device)
  input_ids = batch['input_ids'].to(device)
  attention_mask = batch['attention_mask'].to(device)
  token_type_ids = batch['token_type_ids'].to(device)
  token_type_ids = batch['token_type_ids'].to(device)
  sex = batch['gender'].to(device)
  with torch.no_grad():
    outputs = model_fprime(input_ids, labels=labels, attention_mask=attention_mask, token_type_ids=token_type_ids)
  logits = outputs.logits
  predictions = torch.argmax(logits, dim=-1)
  metric.add_batch(predictions=predictions, references=labels)
  pred = predictions.cpu().numpy()
  lab = labels.cpu().numpy()
  gen = sex.cpu().numpy()
  y_pred.append(pred)
  y_true.append(lab)
  gender.append(gen)
metric.compute()

In [None]:
y_pred = np.concatenate(y_pred)
y_true = np.concatenate(y_true)
gender = np.concatenate(gender)

conf_matrix = confusion_matrix(y_true,y_pred)
print("Confusion Matrix:")
print(conf_matrix)

In [None]:
disp = ConfusionMatrixDisplay(confusion_matrix=conf_matrix, display_labels=[0,1]) #nurse,physician
disp.plot()
plt.savefig("conf.png")
plt.show()

In [None]:
# Fairness metrics : female=1 , male=0
SP = np.mean(y_pred[gender==0]) - np.mean(y_pred[gender==1])
EO = np.mean(y_pred[(y_true==1) & (gender==0)]) - np.mean(y_pred[(y_true==1) & (gender==1)])
TNRD = np.mean(y_pred[(y_true==0) & (gender==1)]) - np.mean(y_pred[(y_true==0) & (gender==0)]) # (1-fpf)-(1-fpm) = fpm-fpf
print("Statistical Parity: ",SP)
print("True Positive Rate Difference (Equal Opportunity): ",EO)
print("True Negative Rate Difference: ",TNRD)

In [None]:
y_true_flat = y_true
y_pred_flat = y_pred
gender_flat = gender

unique_genders = set(gender_flat)

# Plot confusion matrix for each gender
for gender in unique_genders:
    y_true_gender = [y_true_flat[i] for i in range(len(y_true_flat)) if gender_flat[i] == gender]
    y_pred_gender = [y_pred_flat[i] for i in range(len(y_pred_flat)) if gender_flat[i] == gender]

    cm = confusion_matrix(y_true_gender, y_pred_gender)

    plt.figure(figsize=(6, 4))
    sns.set(font_scale=1.5)
    sns.heatmap(cm/np.sum(cm), annot=True, fmt='.2%', cmap='Blues', cbar=False)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    if gender==0:
      gen = 'Male'
    else:
      gen = 'Female'
    plt.title(f'Confusion Matrix for Gender: {gen}')
    plt.show()