# Necessities

In [1]:
# ------------------------------------------------------------------
# Imports
# ------------------------------------------------------------------
# Basic data processing libraries
import pandas as pd
import numpy as np
import os
import torch

# Graph data processing libraries
import networkx as nx
from torch_geometric.data import Data
from torch_geometric.utils import from_networkx

# Libraries for (G)NNs
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
import torch.nn as nn
from sklearn.metrics import roc_auc_score, f1_score

# ------------------------------------------------------------------
# Helper functions
# ------------------------------------------------------------------
def show_df_info(df):
    print(df.info())
    print('####### Repeat ####### \n', df.duplicated().any())
    print('####### Count ####### \n', df.nunique())
    print('####### Example ####### \n',df.head())

def label_statics(label_df, label_list):
    print("####### nCount #######")
    for label in label_list:
        print(label_df[label].value_counts())
    print("####### nPercent #######")
    for label in label_list:
        print(label_df[label].value_counts()/label_df.shape[0])

# ------------------------------------------------------------------
# Data stuff
# ------------------------------------------------------------------
base_path = os.getcwd()
input_ali_data_path = base_path

# Load the data files
user_labels_path = os.path.join(input_ali_data_path, "region_job.csv")
user_edges_path = os.path.join(input_ali_data_path, "region_job_relationship.csv")

# Create dataframes to store the information from the .csv files
user_labels = pd.read_csv(user_labels_path)
user_edges = pd.read_csv(user_edges_path)

user_edges = user_edges[user_edges['uid1'].isin(user_labels['user_id']) & user_edges['uid2'].isin(user_labels['user_id'])]
user_labels_train = user_labels
user_labels_train = user_labels_train.drop(columns=['I_am_working_in_field'])

# Extract node features from user_labels dataframe
node_features = user_labels_train.iloc[:, 1:] # Replace 'attribute1', 'attribute2', ... with the actual attribute columns you want to use
node_features = torch.tensor(node_features.values, dtype=torch.float)

# Extract edges from user_edges dataframe
edges = user_edges[['uid1', 'uid2']]
edges['uid1'] = edges['uid1'].map(dict(zip(user_labels['user_id'], range(len(user_labels)))))
edges['uid2'] = edges['uid2'].map(dict(zip(user_labels['user_id'], range(len(user_labels)))))

# Convert edges dataframe to tensor
edges_tensor = torch.tensor(edges.values, dtype=torch.long).t().contiguous()

# Create edge_index tensor
edge_index = edges_tensor

user_labels['I_am_working_in_field'] = user_labels['I_am_working_in_field'].map({-1: 0, 0: 1, 1: 1, 2: 1, 3: 1, 4: 1})

# Create torch-geometric data
data = Data(x=node_features, edge_index=edge_index)

num_nodes = node_features.size(0)
num_classes = 2 
num_node_features = data.num_node_features

# Create masks for training, and testing
train_mask = torch.zeros(num_nodes, dtype=torch.bool)
test_mask = torch.zeros(num_nodes, dtype=torch.bool)
val_mask = torch.zeros(num_nodes, dtype=torch.bool)

# 60-20-20 Train and Test data split
num_train = int(num_nodes * 0.6)
num_val = int(num_nodes * 0.8)
train_mask[:num_train] = True
val_mask[num_train:num_val] = True
test_mask[num_val:] = True

data.train_mask = train_mask
data.test_mask = test_mask
data.val_mask = val_mask

# Labels from the data (in this case: Job Classification)
data.y = torch.tensor(user_labels['I_am_working_in_field'].values, dtype=torch.long)


# ------------------------------------------------------------------
# Set Device
# ------------------------------------------------------------------

def set_device():
    return torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# ------------------------------------------------------------------
# Loss
# ------------------------------------------------------------------

def fairness_aware_loss(output, data, sensitive_attr, alpha=0, beta=0, gamma=0, delta=0):
    target = data.y[data.train_mask]
    # standard_loss = F.cross_entropy(output, target)
    standard_loss = F.nll_loss(output, target)

    labels = data.y[train_mask]
    pos_prob = torch.exp(output[:, 1])
    neg_prob = torch.exp(output[:, 0])
    # pos_prob = torch.sigmoid(output[:, 1])
    # neg_prob = 1 - pos_prob
    predictions = output.argmax(dim=1)

    # Statistical Parity Regularization
    sp_reg = torch.abs(pos_prob[sensitive_attr == 1].mean() - pos_prob[sensitive_attr == 0].mean())

    # # Calculating FPR and TPR for each group
    # fpr_group1 = ((predictions == 1) & (labels == 0) & (sensitive_attr == 1)).float().mean()
    # fpr_group0 = ((predictions == 1) & (labels == 0) & (sensitive_attr == 0)).float().mean()
    # tpr_group1 = ((predictions == 1) & (labels == 1) & (sensitive_attr == 1)).float().mean()
    # tpr_group0 = ((predictions == 1) & (labels == 1) & (sensitive_attr == 0)).float().mean()

    # Treatment Equality Regularization
    fp_diff = (neg_prob * (labels == 0) * (sensitive_attr == 1)).float().mean() - \
              (neg_prob * (labels == 0) * (sensitive_attr == 0)).float().mean()
    fn_diff = (pos_prob * (labels == 1) * (sensitive_attr == 1)).float().mean() - \
              (pos_prob * (labels == 1) * (sensitive_attr == 0)).float().mean()
    treatment_reg = torch.abs(fp_diff) + torch.abs(fn_diff)
    # treatment_reg = torch.abs(fn_diff)

    # fn_group_1 = ((predictions == 0) & (labels == 1) & (sensitive_attr == 1)).sum()
    # fp_group_1 = ((predictions == 1) & (labels == 0) & (sensitive_attr == 1)).sum()

    # fn_group_0 = ((predictions == 0) & (labels == 1) & (sensitive_attr == 0)).sum()
    # fp_group_0 = ((predictions == 1) & (labels == 0) & (sensitive_attr == 0)).sum()
    
    # ratio_group_1 = fn_group_1 / fp_group_1 if fp_group_1 != 0 else torch.tensor(float('inf'))
    # ratio_group_0 = fn_group_0 / fp_group_0 if fp_group_0 != 0 else torch.tensor(float('inf'))
    # treatment_reg = torch.abs(ratio_group_1 - ratio_group_0)

    # Equal Opportunity Difference Regularization
    eod_reg = torch.abs((pos_prob * (labels == 1) * (sensitive_attr == 1)).float().mean() - \
                        (pos_prob * (labels == 1) * (sensitive_attr == 0)).float().mean())

    # Overall Accuracy Equality Difference Regularization
    oaed_reg = torch.abs((pos_prob * (sensitive_attr == 1)).float().mean() - \
                         (pos_prob * (sensitive_attr == 0)).float().mean())

    penalty = alpha + beta + gamma + delta
    
    # Combine losses
    combined_loss = (1-penalty)*standard_loss
    + alpha * sp_reg
    + beta * treatment_reg
    + gamma * eod_reg
    + delta * oaed_reg
    
    return combined_loss

# ------------------------------------------------------------------
# Fairness Metrics
# ------------------------------------------------------------------

def calculate_fairness(label, predictions, sens_attr='Gender', balanced=False):
    """
    Calculate various fairness metrics.

    Args:
    label: Actual labels (binary).
    predictions: Model predictions (binary).
    sens_attr: Binary sensitive attribute for fairness evaluation.

    Returns:
    A dictionary containing SPD, EOD, OAED, and TED values.
    """
    if balanced is False:
        labels = torch.tensor(user_labels[label].values, dtype=torch.long)
        sensitive_attribute = torch.tensor(user_labels[sens_attr].values, dtype=torch.long)
    else:
        labels = torch.tensor(filtered_user_labels[label].values, dtype=torch.long)
        sensitive_attribute = torch.tensor(filtered_user_labels[sens_attr].values, dtype=torch.long)
    
    labels = labels.to(set_device())
    sensitive_attribute = sensitive_attribute.to(set_device())

    predictions = predictions.float()
    labels = labels.float()
    sensitive_attribute = sensitive_attribute.float()

    def statistical_parity_difference():
        prob_group_1 = predictions[sensitive_attribute == 1].mean()
        prob_group_0 = predictions[sensitive_attribute == 0].mean()
        return abs(prob_group_1 - prob_group_0), prob_group_0, prob_group_1

    def equal_opportunity_difference():
        tpr_group_1 = predictions[(labels == 1) & (sensitive_attribute == 1)].mean()
        tpr_group_0 = predictions[(labels == 1) & (sensitive_attribute == 0)].mean()
        return abs(tpr_group_1 - tpr_group_0), tpr_group_0, tpr_group_1

    def overall_accuracy_equality_difference():
        acc_group_1 = (predictions[sensitive_attribute == 1] == labels[sensitive_attribute == 1]).float().mean()
        acc_group_0 = (predictions[sensitive_attribute == 0] == labels[sensitive_attribute == 0]).float().mean()
        return abs(acc_group_1 - acc_group_0), acc_group_0, acc_group_1

    def treatment_equality_difference():
        fn_group_1 = ((predictions == 0) & (labels == 1) & (sensitive_attribute == 1)).sum()
        fp_group_1 = ((predictions == 1) & (labels == 0) & (sensitive_attribute == 1)).sum()

        fn_group_0 = ((predictions == 0) & (labels == 1) & (sensitive_attribute == 0)).sum()
        fp_group_0 = ((predictions == 1) & (labels == 0) & (sensitive_attribute == 0)).sum()

        ratio_group_1 = fn_group_1 / fp_group_1 if fp_group_1 != 0 else float('inf')
        ratio_group_0 = fn_group_0 / fp_group_0 if fp_group_0 != 0 else float('inf')

        return abs(ratio_group_1 - ratio_group_0), ratio_group_0, ratio_group_1, fn_group_1, fp_group_1, fn_group_0, fp_group_0

    # Calculating each fairness metric
    spd, sp_g0, sp_g1 = statistical_parity_difference()
    eod, eod_g0, eod_g1 = equal_opportunity_difference()
    oaed, oaed_g0, oaed_g1 = overall_accuracy_equality_difference()
    ted, ted_g0, ted_g1, fn_group_1, fp_group_1, fn_group_0, fp_group_0 = treatment_equality_difference()

    return {
        'Statistical Parity Difference': spd,
        'Statistical Parity Group with S=0': sp_g0,
        'Statistical Parity Group S=1': sp_g1,
        'Equal Opportunity Difference': eod,
        'Equal Opportunity Group with S=0': eod_g0,
        'Equal Opportunity Group S=1': eod_g1,
        'Overall Accuracy Equality Difference': oaed,
        'Overall Accuracy Group with S=0': oaed_g0,
        'Overall Accuracy Group S=1': oaed_g1,
        'Treatment Equality Difference': ted,
        'Treatment Equality Group with S=0': ted_g0,
        'Treatment Equality Group S=1': ted_g1
        # 'False Negatives Group 1': fn_group_1,
        # 'False Positives Group 1': fp_group_1,
        # 'False Negatives Group 0': fn_group_0,
        # 'False Positives Group 0': fp_group_0
    }

# ------------------------------------------------------------------
# Model Training
# ------------------------------------------------------------------

# Train the model
def training(model, data, optimizer, epochs=1000, fairness=False, alpha=0, beta=0, gamma=0, delta=0):
    model.to(set_device())
    data.to(set_device())
    
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        
        if fairness:
            loss = fairness_aware_loss(out[data.train_mask], data, data.x[data.train_mask, -1],
                                       alpha=alpha, beta=beta, gamma=gamma, delta=delta)
            
        else:
            # criterion = torch.nn.CrossEntropyLoss()
            # criterion = torch.nn.BCELoss()
            criterion = torch.nn.NLLLoss()
            loss = criterion(out[data.train_mask], data.y[data.train_mask])

        loss.backward()
        optimizer.step()

        metrics = test(model, data)

        if epoch % 10 == 0:
            print(f'Epoch {epoch} | Loss: {loss.item()} | \n AUC_ROC: {metrics["AUC_ROC"]} | F1 Score: {metrics["F1_Score"]} | SPD: {metrics["parity"]} | EOD: {metrics["equality"]}')

# ------------------------------------------------------------------
# Model Testing
# ------------------------------------------------------------------

# Test the model
def test(model, data, balanced=False):
    # model.to('cpu')
    # data.to('cpu')
    model.to(set_device())
    data.to(set_device())
    
    model.eval()
    with torch.inference_mode():
      out = model(data.x, data.edge_index)

    _, pred = model(data.x, data.edge_index).max(dim=1)
    correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
    accuracy = correct / int(data.test_mask.sum())
    # print(f'Accuracy: {accuracy}')

    # Convert model outputs to binary predictions
    predictions = out.argmax(dim=1)

    fairness_metrics = calculate_fairness(label='GoodCustomer', predictions=predictions, sens_attr='Gender', balanced=balanced)
    fairness_metrics['Accuracy'] = accuracy

    return fairness_metrics

# ------------------------------------------------------------------
# Print Metrics
# ------------------------------------------------------------------

# def print_metrics(metrics):
#     for key, value in metrics.items():
#         print(f"\n{key} : {value:.5f}")

def print_metrics(metrics):
    count = -1

    for key, value in metrics.items():
        count += 1
        if count == 3:
            print(f"\n\n{key} : {value:.5f}")
            count = 0
        else:
            print(f"{key} : {value:.5f}")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def test(model, data, val=True, balanced=False):
    model.to(set_device())
    data.to(set_device())
    
    if val==True:
      mask = data.val_mask
    else:
      mask = data.test_mask

    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        predictions = out.argmax(dim=1)

    # Compute accuracy
    correct = int(predictions[mask].eq(data.y[mask]).sum().item())
    accuracy = correct / int(mask.sum())
    
    # Extract the predictions and the true labels
    y_true = data.y[mask].cpu().numpy()
    y_pred = predictions[mask].cpu().numpy()
    
    # Compute F1 score
    f1 = f1_score(y_true, y_pred, average='binary')

    # Compute AUC-ROC score
    y_probs = out[mask][:, 1].cpu().numpy() 
    auc_roc = roc_auc_score(y_true, y_probs)
    
    fairness_metrics = fair_metric('I_am_working_in_field', predictions, 'region')
    fairness_metrics['Accuracy'] = accuracy
    fairness_metrics['F1_Score'] = f1
    fairness_metrics['AUC_ROC'] = auc_roc

    return fairness_metrics

def fair_metric(labels, pred, sens):
	
	labels = user_labels[labels].values
	sens = user_labels[sens].values
	
	idx_s0 = sens==0
	idx_s1 = sens==1

	idx_s0_y1 = np.bitwise_and(idx_s0, labels==1)
	idx_s1_y1 = np.bitwise_and(idx_s1, labels==1)

	parity = abs(sum(pred[idx_s0])/sum(idx_s0)-sum(pred[idx_s1])/sum(idx_s1))
	equality = abs(sum(pred[idx_s0_y1])/sum(idx_s0_y1)-sum(pred[idx_s1_y1])/sum(idx_s1_y1))
    
	return {"parity": parity.item(), "equality": equality.item()}

In [3]:
sens_attribute_tensor = torch.tensor(user_labels['region'].values, dtype=torch.long)
sens_attribute_tensor = sens_attribute_tensor.to(set_device())

# GCN Model

In [4]:
class GCN(nn.Module):
	def __init__(self, nfeat, nhid=128, nclass=2, dropout=0):
		super(GCN, self).__init__()
		self.body = GCN_Body(nfeat,nhid,dropout)
		self.fc = nn.Linear(nhid, nclass)

		for m in self.modules():
			self.weights_init(m)

	def weights_init(self, m):
		if isinstance(m, nn.Linear):
			torch.nn.init.xavier_uniform_(m.weight.data)
			if m.bias is not None:
				m.bias.data.fill_(0.0)

	def forward(self, x, edge_index):
		x = self.body(x, edge_index)
		x = self.fc(x)
		return F.log_softmax(x, dim=1)
		# return x

In [5]:
class GCN_Body(nn.Module):
	def __init__(self, nfeat, nhid, dropout):
		super(GCN_Body, self).__init__()
		self.gc1 = GCNConv(nfeat, nhid)

	def forward(self, x, edge_index):
		x = self.gc1(x, edge_index)
		return x

In [6]:
gcn_model = GCN(data.num_node_features, nhid=128, nclass=2)
optimizer_gcn_model = torch.optim.Adam(gcn_model.parameters(), lr=1e-4, weight_decay=1e-5)

In [7]:
training(model=gcn_model, 
         data=data, 
         optimizer=optimizer_gcn_model, 
         fairness=False,  
         epochs=500)

Epoch 0 | Loss: 1.9603071212768555 | 
 AUC_ROC: 0.691802188770655 | F1 Score: 0.25793139025019346 | SPD: 0.0012488961219787598 | EOD: 0.0006324052810668945
Epoch 10 | Loss: 0.591144859790802 | 
 AUC_ROC: 0.5118493381780431 | F1 Score: 0.16104575163398693 | SPD: 0.012405738234519958 | EOD: 0.015292197465896606
Epoch 20 | Loss: 0.5250619053840637 | 
 AUC_ROC: 0.40460513452720825 | F1 Score: 0.006407322654462243 | SPD: 0.004926184192299843 | EOD: 0.0008556335233151913
Epoch 30 | Loss: 0.5436067581176758 | 
 AUC_ROC: 0.4064464270265594 | F1 Score: 0.005535055350553505 | SPD: 0.004169499967247248 | EOD: 0.0014996497193351388
Epoch 40 | Loss: 0.5076047778129578 | 
 AUC_ROC: 0.43962291288173716 | F1 Score: 0.01069995541685243 | SPD: 0.003594045527279377 | EOD: 0.004201175644993782
Epoch 50 | Loss: 0.48936641216278076 | 
 AUC_ROC: 0.4875456354355913 | F1 Score: 0.043356081894821354 | SPD: 0.0033786147832870483 | EOD: 0.01945755071938038
Epoch 60 | Loss: 0.4889358580112457 | 
 AUC_ROC: 0.508590

In [8]:
print("Here are the values for the GCN model")

metrics_gcn_model = test(gcn_model, data)

print_metrics(metrics_gcn_model)

Here are the values for the GCN model
parity : 0.00103
equality : 0.00603
Accuracy : 0.85021


F1_Score : 0.05491
AUC_ROC : 0.62982


# FAME

In [9]:
class FairnessAwareMessagePassingLayer(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(FairnessAwareMessagePassingLayer, self).__init__(aggr='mean')  
        self.lin = nn.Linear(in_channels, out_channels)
        self.sensitive_attr = sens_attribute_tensor
        self.bias_correction = nn.Parameter(torch.rand(1))

    def forward(self, x, edge_index):        
        # Add self-loops 
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        x = self.lin(x)

        return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x)
    
    def message(self, x_j, edge_index, size):
        row, col = edge_index
        deg = degree(row, size[0], dtype=x_j.dtype)
        deg_inv_sqrt = deg.pow(-0.5)
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
        
        group_difference = self.sensitive_attr[row] - self.sensitive_attr[col]
        
        # Adjust messages based on statistical parity
        fairness_adjustment = (1 + self.bias_correction * group_difference.view(-1, 1))

        return fairness_adjustment * norm.view(-1, 1) * x_j

    def update(self, aggr_out):
        return aggr_out

In [10]:
class FairMP_GCN(torch.nn.Module):
    def __init__(self, data, layers=1, hidden=128, dropout=0):
        super(FairMP_GCN, self).__init__()
        self.conv1 = FairnessAwareMessagePassingLayer(data.num_node_features, hidden)
        self.convs = torch.nn.ModuleList()
        
        for i in range(layers - 1):
            self.convs.append(FairnessAwareMessagePassingLayer(hidden, hidden))
        
        # self.conv2 = FairnessAwareMessagePassingLayer(hidden, 2)
        self.fc = nn.Linear(hidden, 2)
        self.dropout = dropout

    def forward(self, x, edge_index, *args, **kwargs):
        x = F.relu(self.conv1(x, edge_index))
        # x = F.dropout(x, p=self.dropout, training=self.training)

        for conv in self.convs:
            x = F.relu(conv(x, edge_index))
            x = F.dropout(x, p=self.dropout, training=self.training)

        # x = self.conv2(x, edge_index)
        x = self.fc(x)
        
        return F.log_softmax(x, dim=1)

In [11]:
Fair_gcn_model = FairMP_GCN(data, hidden=128)
optimizer_Fair_gcn_model = torch.optim.Adam(Fair_gcn_model.parameters(), lr=1e-4, weight_decay=1e-5)

In [12]:
training(model=Fair_gcn_model, 
         data=data, 
         optimizer=optimizer_Fair_gcn_model, 
         fairness=False,  
         epochs=500)

Epoch 0 | Loss: 0.6845083236694336 | 
 AUC_ROC: 0.5216423133489057 | F1 Score: 0.02887241513850956 | SPD: 0.004457160830497742 | EOD: 0.0038131820037961006
Epoch 10 | Loss: 0.6579487323760986 | 
 AUC_ROC: 0.5001376416645038 | F1 Score: 0.0 | SPD: 0.0 | EOD: 0.0
Epoch 20 | Loss: 0.636222779750824 | 
 AUC_ROC: 0.4991384419067393 | F1 Score: 0.0 | SPD: 0.0 | EOD: 0.0
Epoch 30 | Loss: 0.6187978982925415 | 
 AUC_ROC: 0.49890003460506965 | F1 Score: 0.0 | SPD: 0.0 | EOD: 0.0
Epoch 40 | Loss: 0.6048751473426819 | 
 AUC_ROC: 0.4990359460160913 | F1 Score: 0.0 | SPD: 0.0 | EOD: 0.0
Epoch 50 | Loss: 0.593803346157074 | 
 AUC_ROC: 0.49941305476252273 | F1 Score: 0.0 | SPD: 0.0 | EOD: 0.0
Epoch 60 | Loss: 0.5850485563278198 | 
 AUC_ROC: 0.5000618565619863 | F1 Score: 0.0 | SPD: 0.0 | EOD: 0.0
Epoch 70 | Loss: 0.5781196355819702 | 
 AUC_ROC: 0.500913314300545 | F1 Score: 0.0 | SPD: 0.0 | EOD: 0.0
Epoch 80 | Loss: 0.5726077556610107 | 
 AUC_ROC: 0.501968379617614 | F1 Score: 0.0 | SPD: 0.0 | EOD: 0.

In [13]:
print("Here are the values for the GCN model")

metrics_Fair_gcn_model = test(Fair_gcn_model, data)

print_metrics(metrics_Fair_gcn_model)

Here are the values for the GCN model
parity : 0.00007
equality : 0.00072
Accuracy : 0.85220


F1_Score : 0.00694
AUC_ROC : 0.61655


# A-FAME

In [14]:
import torch
from torch_geometric.nn import MessagePassing
from torch.nn import Linear, Parameter
from torch_geometric.utils import add_self_loops, softmax
import torch.nn.functional as F

class Attention_FairMessagePassing(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(Attention_FairMessagePassing, self).__init__(aggr='add') 
        self.lin = Linear(in_channels, out_channels) 
        self.att = Linear(2 * out_channels, 1) 
        
        self.sensitive_attr = sens_attribute_tensor 
        self.bias_correction = Parameter(torch.rand(1))  

    def forward(self, x, edge_index):
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        x = self.lin(x)

        return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x)

    def message(self, edge_index, x_i, x_j, size_i):
        x_cat = torch.cat([x_i, x_j], dim=-1)  
        alpha = self.att(x_cat)

        row, col = edge_index
        group_difference = self.sensitive_attr[row] - self.sensitive_attr[col]

        fairness_adjustment = self.bias_correction * group_difference.view(-1, 1)
        alpha = alpha + fairness_adjustment

        alpha = softmax(alpha, edge_index[0], num_nodes=size_i)

        return alpha * x_j

    def update(self, aggr_out):
        return aggr_out
    
# GCN class that takes in the data as an input for dimensions of the convolutions
class Fair_Attention_MP_GCN(torch.nn.Module):
    def __init__(self, data, layers=1, hidden=128, dropout=0):
        super(Fair_Attention_MP_GCN, self).__init__()
        self.conv1 = Attention_FairMessagePassing(data.num_node_features, hidden)
        self.convs = torch.nn.ModuleList()
        
        for i in range(layers - 1):
            self.convs.append(Attention_FairMessagePassing(hidden, hidden))
        
        # self.conv2 = Attention_FairMessagePassing(hidden, 2)
        self.fc = Linear(hidden, 2)
        self.dropout = dropout

    def forward(self, x, edge_index, *args, **kwargs):
        x = F.relu(self.conv1(x, edge_index))
        # x = F.dropout(x, p=self.dropout, training=self.training)

        for conv in self.convs:
            x = F.relu(conv(x, edge_index))
            # x = F.dropout(x, p=self.dropout, training=self.training)

        # x = self.conv2(x, edge_index)
        x = self.fc(x)
        
        return F.log_softmax(x, dim=1)

In [15]:
Fair_gat_model = Fair_Attention_MP_GCN(data, hidden=128)
optimizer_Fair_gat_model = torch.optim.Adam(Fair_gat_model.parameters(), lr=1e-4, weight_decay=1e-5)

training(model=Fair_gat_model, 
         data=data, 
         optimizer=optimizer_Fair_gat_model, 
         fairness=False,  
         epochs=500)

Epoch 0 | Loss: 0.7189450263977051 | 
 AUC_ROC: 0.6755909248204861 | F1 Score: 0.3103574702108158 | SPD: 0.021946489810943604 | EOD: 0.010221362113952637
Epoch 10 | Loss: 0.5846805572509766 | 
 AUC_ROC: 0.4397590838307812 | F1 Score: 0.0 | SPD: 0.0009568306850269437 | EOD: 0.0003022517776116729
Epoch 20 | Loss: 0.5340754985809326 | 
 AUC_ROC: 0.44997839345964186 | F1 Score: 0.0 | SPD: 0.00047969710431061685 | EOD: 0.0
Epoch 30 | Loss: 0.5040113925933838 | 
 AUC_ROC: 0.4994765550653171 | F1 Score: 0.0 | SPD: 0.0003553515998646617 | EOD: 0.0
Epoch 40 | Loss: 0.4814510941505432 | 
 AUC_ROC: 0.5564986806817198 | F1 Score: 0.0 | SPD: 0.0001243454753421247 | EOD: 0.0
Epoch 50 | Loss: 0.46669647097587585 | 
 AUC_ROC: 0.6001628601090059 | F1 Score: 0.0 | SPD: 9.45246429182589e-05 | EOD: 0.0
Epoch 60 | Loss: 0.45777827501296997 | 
 AUC_ROC: 0.6269331689592526 | F1 Score: 0.0 | SPD: 1.5672980225645006e-05 | EOD: 0.00015112588880583644
Epoch 70 | Loss: 0.45212167501449585 | 
 AUC_ROC: 0.642547841

In [16]:
print("Here are the values for the FairGAT model")

metrics_Fair_gat_model = test(Fair_gat_model, data)

print_metrics(metrics_Fair_gat_model)

Here are the values for the FairGAT model
parity : 0.00007
equality : 0.00045
Accuracy : 0.85257


F1_Score : 0.00100
AUC_ROC : 0.75148


# GAT Model

In [17]:
class GAT(nn.Module):
	def __init__(self, nfeat, nhid=128, nclass=2, dropout=0):
		super(GAT, self).__init__()
		self.body = GAT_Body(nfeat,nhid,dropout)
		self.fc = nn.Linear(nhid, nclass)

		for m in self.modules():
			self.weights_init(m)

	def weights_init(self, m):
		if isinstance(m, nn.Linear):
			torch.nn.init.xavier_uniform_(m.weight.data)
			if m.bias is not None:
				m.bias.data.fill_(0.0)

	def forward(self, x, edge_index):
		x = self.body(x, edge_index)
		x = self.fc(x)
		return F.log_softmax(x, dim=1)
		# return x

In [18]:
from torch_geometric.nn import GATConv

class GAT_Body(nn.Module):
	def __init__(self, nfeat, nhid, dropout):
		super(GAT_Body, self).__init__()
		self.gc1 = GATConv(nfeat, nhid)

	def forward(self, x, edge_index):
		x = self.gc1(x, edge_index)
		return x

In [19]:
gat_model = GAT(data.num_node_features, nhid=128)
optimizer_gat_model = torch.optim.Adam(gat_model.parameters(), lr=1e-4, weight_decay=1e-5)

training(model=gat_model, 
         data=data, 
         optimizer=optimizer_gat_model, 
         fairness=False,  
         epochs=500)

Epoch 0 | Loss: 0.6486591696739197 | 
 AUC_ROC: 0.5046597023964011 | F1 Score: 0.14988978692138136 | SPD: 0.044061556458473206 | EOD: 0.03822830319404602
Epoch 10 | Loss: 0.5072559714317322 | 
 AUC_ROC: 0.4513836188251579 | F1 Score: 0.002771362586605081 | SPD: 0.007024255581200123 | EOD: 3.9512524381279945e-05
Epoch 20 | Loss: 0.4761475622653961 | 
 AUC_ROC: 0.5032413487325893 | F1 Score: 0.04045801526717557 | SPD: 0.0035303160548210144 | EOD: 0.0032547004520893097
Epoch 30 | Loss: 0.4680342376232147 | 
 AUC_ROC: 0.4997633662081495 | F1 Score: 0.009361702127659575 | SPD: 0.0005128607153892517 | EOD: 0.001727561466395855
Epoch 40 | Loss: 0.46490606665611267 | 
 AUC_ROC: 0.5189113677653776 | F1 Score: 0.018937834499794155 | SPD: 0.0017570611089468002 | EOD: 0.0015579024329781532
Epoch 50 | Loss: 0.4625028669834137 | 
 AUC_ROC: 0.5270663335928714 | F1 Score: 0.020618556701030927 | SPD: 0.001274801790714264 | EOD: 0.0014346800744533539
Epoch 60 | Loss: 0.4601212739944458 | 
 AUC_ROC: 0.53

In [20]:
print("Here are the values for the GAT model")

metrics_gat_model = test(gat_model, data)

print_metrics(metrics_gat_model)

Here are the values for the GAT model
parity : 0.00443
equality : 0.00099
Accuracy : 0.84896


F1_Score : 0.11876
AUC_ROC : 0.73169


# Compare results

In [21]:
print(f"GCN model: {metrics_gcn_model}\n")
print(f"Fair GCN model: {metrics_Fair_gcn_model}\n")
print(f"GAT model: {metrics_gat_model}\n")
print(f"Fair GAT model: {metrics_Fair_gat_model}\n")

GCN model: {'parity': 0.001032860018312931, 'equality': 0.006028328090906143, 'Accuracy': 0.8502101924920716, 'F1_Score': 0.05490926012098651, 'AUC_ROC': 0.6298239034518556}

Fair GCN model: {'parity': 6.72023743391037e-05, 'equality': 0.0007230408955365419, 'Accuracy': 0.8522014897853823, 'F1_Score': 0.006937561942517344, 'AUC_ROC': 0.6165462842806472}

GAT model: {'parity': 0.004432922229170799, 'equality': 0.0009912922978401184, 'Accuracy': 0.8489564127148019, 'F1_Score': 0.1187607573149742, 'AUC_ROC': 0.7316886408858898}

Fair GAT model: {'parity': 7.177771476563066e-05, 'equality': 0.0004533776664175093, 'Accuracy': 0.8525702485434029, 'F1_Score': 0.0009995002498750627, 'AUC_ROC': 0.7514803183666406}



In [None]:
Values for the FairGAT model - same model specifications as the one from Carlos,
but with one A-FAME instead of one GCNConv layer

parity : 0.00706
equality : 0.00207
Accuracy : 0.68000
F1_Score : 0.80952
AUC_ROC : 0.67496