In [4]:
# ------------------------------------------------------------------
# Imports
# ------------------------------------------------------------------
# Basic data processing libraries
import pandas as pd
import numpy as np
import os
import torch

# Graph data processing libraries
import networkx as nx
from torch_geometric.data import Data
from torch_geometric.utils import from_networkx

# Libraries for (G)NNs
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
import torch.nn as nn
from sklearn.metrics import roc_auc_score, f1_score

# ------------------------------------------------------------------
# Helper functions
# ------------------------------------------------------------------
def show_df_info(df):
    print(df.info())
    print('####### Repeat ####### \n', df.duplicated().any())
    print('####### Count ####### \n', df.nunique())
    print('####### Example ####### \n',df.head())

def label_statics(label_df, label_list):
    print("####### nCount #######")
    for label in label_list:
        print(label_df[label].value_counts())
    print("####### nPercent #######")
    for label in label_list:
        print(label_df[label].value_counts()/label_df.shape[0])

# ------------------------------------------------------------------
# Data stuff
# ------------------------------------------------------------------
base_path = os.getcwd()
input_ali_data_path = base_path

# Load the data files
user_labels_path = os.path.join(input_ali_data_path, "german.csv")
user_edges_path = os.path.join(input_ali_data_path, "german_edges.csv")

# Create dataframes to store the information from the .csv files
user_labels = pd.read_csv(user_labels_path)
user_edges = pd.read_csv(user_edges_path)

user_labels['Gender'] = user_labels['Gender'].replace({'Female': 1, 'Male': 0})
user_labels['GoodCustomer'] = user_labels['GoodCustomer'].replace({1: 1, -1: 0})
user_labels.insert(0, 'user_id', user_labels.index)
user_labels = user_labels.drop('PurposeOfLoan', axis=1)

user_edges = user_edges[user_edges['uid1'].isin(user_labels['user_id']) & user_edges['uid2'].isin(user_labels['user_id'])]
user_labels_train = user_labels
user_labels_train = user_labels_train.drop(columns=['GoodCustomer'])

# Extract node features from user_labels dataframe
node_features = user_labels_train.iloc[:, 1:] # Replace 'attribute1', 'attribute2', ... with the actual attribute columns you want to use
node_features = torch.tensor(node_features.values, dtype=torch.float)

# Extract edges from user_edges dataframe
edges = user_edges[['uid1', 'uid2']]
edges['uid1'] = edges['uid1'].map(dict(zip(user_labels['user_id'], range(len(user_labels)))))
edges['uid2'] = edges['uid2'].map(dict(zip(user_labels['user_id'], range(len(user_labels)))))

# Convert edges dataframe to tensor
edges_tensor = torch.tensor(edges.values, dtype=torch.long).t().contiguous()

# Create edge_index tensor
edge_index = edges_tensor

# Create torch-geometric data
data = Data(x=node_features, edge_index=edge_index)

num_nodes = node_features.size(0)
num_classes = 2 
num_node_features = data.num_node_features

# Create masks for training, and testing
train_mask = torch.zeros(num_nodes, dtype=torch.bool)
test_mask = torch.zeros(num_nodes, dtype=torch.bool)
val_mask = torch.zeros(num_nodes, dtype=torch.bool)

# 80 - 20 Train and Test data split
num_train = int(num_nodes * 0.6)
num_val = int(num_nodes * 0.8)
train_mask[:num_train] = True
val_mask[num_train:num_val] = True
test_mask[num_val:] = True


data.train_mask = train_mask
data.test_mask = test_mask
data.val_mask = val_mask

# Labels from the data (in this case: Job Classification)
data.y = torch.tensor(user_labels['GoodCustomer'].values, dtype=torch.long)


# ------------------------------------------------------------------
# Set Device
# ------------------------------------------------------------------

def set_device():
    return torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ------------------------------------------------------------------
# Loss
# ------------------------------------------------------------------

def fairness_aware_loss(output, data, sensitive_attr, alpha=0, beta=0, gamma=0, delta=0):
    target = data.y[data.train_mask]
    # standard_loss = F.cross_entropy(output, target)
    standard_loss = F.nll_loss(output, target)

    labels = data.y[train_mask]
    pos_prob = torch.exp(output[:, 1])
    neg_prob = torch.exp(output[:, 0])
    # pos_prob = torch.sigmoid(output[:, 1])
    # neg_prob = 1 - pos_prob
    predictions = output.argmax(dim=1)

    # Statistical Parity Regularization
    sp_reg = torch.abs(pos_prob[sensitive_attr == 1].mean() - pos_prob[sensitive_attr == 0].mean())

    # # Calculating FPR and TPR for each group
    # fpr_group1 = ((predictions == 1) & (labels == 0) & (sensitive_attr == 1)).float().mean()
    # fpr_group0 = ((predictions == 1) & (labels == 0) & (sensitive_attr == 0)).float().mean()
    # tpr_group1 = ((predictions == 1) & (labels == 1) & (sensitive_attr == 1)).float().mean()
    # tpr_group0 = ((predictions == 1) & (labels == 1) & (sensitive_attr == 0)).float().mean()

    # Treatment Equality Regularization
    fp_diff = (neg_prob * (labels == 0) * (sensitive_attr == 1)).float().mean() - \
              (neg_prob * (labels == 0) * (sensitive_attr == 0)).float().mean()
    fn_diff = (pos_prob * (labels == 1) * (sensitive_attr == 1)).float().mean() - \
              (pos_prob * (labels == 1) * (sensitive_attr == 0)).float().mean()
    treatment_reg = torch.abs(fp_diff) + torch.abs(fn_diff)
    # treatment_reg = torch.abs(fn_diff)

    # fn_group_1 = ((predictions == 0) & (labels == 1) & (sensitive_attr == 1)).sum()
    # fp_group_1 = ((predictions == 1) & (labels == 0) & (sensitive_attr == 1)).sum()

    # fn_group_0 = ((predictions == 0) & (labels == 1) & (sensitive_attr == 0)).sum()
    # fp_group_0 = ((predictions == 1) & (labels == 0) & (sensitive_attr == 0)).sum()
    
    # ratio_group_1 = fn_group_1 / fp_group_1 if fp_group_1 != 0 else torch.tensor(float('inf'))
    # ratio_group_0 = fn_group_0 / fp_group_0 if fp_group_0 != 0 else torch.tensor(float('inf'))
    # treatment_reg = torch.abs(ratio_group_1 - ratio_group_0)

    # Equal Opportunity Difference Regularization
    eod_reg = torch.abs((pos_prob * (labels == 1) * (sensitive_attr == 1)).float().mean() - \
                        (pos_prob * (labels == 1) * (sensitive_attr == 0)).float().mean())

    # Overall Accuracy Equality Difference Regularization
    oaed_reg = torch.abs((pos_prob * (sensitive_attr == 1)).float().mean() - \
                         (pos_prob * (sensitive_attr == 0)).float().mean())

    penalty = alpha + beta + gamma + delta
    
    # Combine losses
    combined_loss = (1-penalty)*standard_loss
    + alpha * sp_reg
    + beta * treatment_reg
    + gamma * eod_reg
    + delta * oaed_reg
    
    return combined_loss

# ------------------------------------------------------------------
# Fairness Metrics
# ------------------------------------------------------------------

def calculate_fairness(label, predictions, sens_attr='Gender', balanced=False):
    """
    Calculate various fairness metrics.

    Args:
    label: Actual labels (binary).
    predictions: Model predictions (binary).
    sens_attr: Binary sensitive attribute for fairness evaluation.

    Returns:
    A dictionary containing SPD, EOD, OAED, and TED values.
    """
    if balanced is False:
        labels = torch.tensor(user_labels[label].values, dtype=torch.long)
        sensitive_attribute = torch.tensor(user_labels[sens_attr].values, dtype=torch.long)
    else:
        labels = torch.tensor(filtered_user_labels[label].values, dtype=torch.long)
        sensitive_attribute = torch.tensor(filtered_user_labels[sens_attr].values, dtype=torch.long)
    
    labels = labels.to(set_device())
    sensitive_attribute = sensitive_attribute.to(set_device())

    predictions = predictions.float()
    labels = labels.float()
    sensitive_attribute = sensitive_attribute.float()

    def statistical_parity_difference():
        prob_group_1 = predictions[sensitive_attribute == 1].mean()
        prob_group_0 = predictions[sensitive_attribute == 0].mean()
        return abs(prob_group_1 - prob_group_0), prob_group_0, prob_group_1

    def equal_opportunity_difference():
        tpr_group_1 = predictions[(labels == 1) & (sensitive_attribute == 1)].mean()
        tpr_group_0 = predictions[(labels == 1) & (sensitive_attribute == 0)].mean()
        return abs(tpr_group_1 - tpr_group_0), tpr_group_0, tpr_group_1

    def overall_accuracy_equality_difference():
        acc_group_1 = (predictions[sensitive_attribute == 1] == labels[sensitive_attribute == 1]).float().mean()
        acc_group_0 = (predictions[sensitive_attribute == 0] == labels[sensitive_attribute == 0]).float().mean()
        return abs(acc_group_1 - acc_group_0), acc_group_0, acc_group_1

    def treatment_equality_difference():
        fn_group_1 = ((predictions == 0) & (labels == 1) & (sensitive_attribute == 1)).sum()
        fp_group_1 = ((predictions == 1) & (labels == 0) & (sensitive_attribute == 1)).sum()

        fn_group_0 = ((predictions == 0) & (labels == 1) & (sensitive_attribute == 0)).sum()
        fp_group_0 = ((predictions == 1) & (labels == 0) & (sensitive_attribute == 0)).sum()

        ratio_group_1 = fn_group_1 / fp_group_1 if fp_group_1 != 0 else float('inf')
        ratio_group_0 = fn_group_0 / fp_group_0 if fp_group_0 != 0 else float('inf')

        return abs(ratio_group_1 - ratio_group_0), ratio_group_0, ratio_group_1, fn_group_1, fp_group_1, fn_group_0, fp_group_0

    # Calculating each fairness metric
    spd, sp_g0, sp_g1 = statistical_parity_difference()
    eod, eod_g0, eod_g1 = equal_opportunity_difference()
    oaed, oaed_g0, oaed_g1 = overall_accuracy_equality_difference()
    ted, ted_g0, ted_g1, fn_group_1, fp_group_1, fn_group_0, fp_group_0 = treatment_equality_difference()

    return {
        'Statistical Parity Difference': spd,
        'Statistical Parity Group with S=0': sp_g0,
        'Statistical Parity Group S=1': sp_g1,
        'Equal Opportunity Difference': eod,
        'Equal Opportunity Group with S=0': eod_g0,
        'Equal Opportunity Group S=1': eod_g1,
        'Overall Accuracy Equality Difference': oaed,
        'Overall Accuracy Group with S=0': oaed_g0,
        'Overall Accuracy Group S=1': oaed_g1,
        'Treatment Equality Difference': ted,
        'Treatment Equality Group with S=0': ted_g0,
        'Treatment Equality Group S=1': ted_g1
        # 'False Negatives Group 1': fn_group_1,
        # 'False Positives Group 1': fp_group_1,
        # 'False Negatives Group 0': fn_group_0,
        # 'False Positives Group 0': fp_group_0
    }

# ------------------------------------------------------------------
# Model Training
# ------------------------------------------------------------------

# Train the model
def training(model, data, optimizer, epochs=2000, fairness=False, alpha=0, beta=0, gamma=0, delta=0):
    model.to(set_device())
    data.to(set_device())
    
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        
        if fairness:
            loss = fairness_aware_loss(out[data.train_mask], data, data.x[data.train_mask, -1],
                                       alpha=alpha, beta=beta, gamma=gamma, delta=delta)
            
        else:
            # criterion = torch.nn.CrossEntropyLoss()
            # criterion = torch.nn.BCELoss()
            criterion = torch.nn.NLLLoss()
            loss = criterion(out[data.train_mask], data.y[data.train_mask])

        loss.backward()
        optimizer.step()

        metrics = test(model, data)

        if epoch % 10 == 0:
            print(f'Epoch {epoch} | Loss: {loss.item()} | \n AUC_ROC: {metrics["AUC_ROC"]} | F1 Score: {metrics["F1_Score"]} | SPD: {metrics["parity"]} | EOD: {metrics["equality"]}')

# ------------------------------------------------------------------
# Model Testing
# ------------------------------------------------------------------

# Test the model
def test(model, data, balanced=False):
    # model.to('cpu')
    # data.to('cpu')
    model.to(set_device())
    data.to(set_device())
    
    model.eval()
    with torch.inference_mode():
      out = model(data.x, data.edge_index)

    _, pred = model(data.x, data.edge_index).max(dim=1)
    correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
    accuracy = correct / int(data.test_mask.sum())
    # print(f'Accuracy: {accuracy}')

    # Convert model outputs to binary predictions
    predictions = out.argmax(dim=1)

    fairness_metrics = calculate_fairness(label='GoodCustomer', predictions=predictions, sens_attr='Gender', balanced=balanced)
    fairness_metrics['Accuracy'] = accuracy

    return fairness_metrics

# ------------------------------------------------------------------
# Print Metrics
# ------------------------------------------------------------------

# def print_metrics(metrics):
#     for key, value in metrics.items():
#         print(f"\n{key} : {value:.5f}")

def print_metrics(metrics):
    count = -1

    for key, value in metrics.items():
        count += 1
        if count == 3:
            print(f"\n\n{key} : {value:.5f}")
            count = 0
        else:
            print(f"{key} : {value:.5f}")

In [6]:
# def test(model, data, val=True, balanced=False):
#     model.to(set_device())
#     data.to(set_device())
    
#     model.eval()
#     with torch.inference_mode():
#       out = model(data.x, data.edge_index)

#     _, pred = model(data.x, data.edge_index).max(dim=1)
#     correct = int(pred[data.test_mask].eq(data.y[data.val_mask]).sum().item())
#     correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
#     accuracy = correct / int(data.test_mask.sum())
    
#     predictions = out.argmax(dim=1)

#     fairness_metrics = fair_metric('GoodCustomer', predictions, 'Gender')
#     fairness_metrics['Accuracy'] = accuracy
#     # val_auc_roc = roc_auc_score(data.y[data.test_mask].cpu().numpy(), out[data.test_mask].detach().cpu().numpy())
#     fairness_metrics['AUC_ROC'] = val_auc_roc

#     return fairness_metrics

def test(model, data, val=True, balanced=False):
    model.to(set_device())
    data.to(set_device())
    
    if val==True:
      mask = data.val_mask
    else:
      mask = data.test_mask

    model.eval()
    with torch.no_grad():
        out = model(data.x, data.edge_index)
        predictions = out.argmax(dim=1)

    # Compute accuracy
    correct = int(predictions[mask].eq(data.y[mask]).sum().item())
    accuracy = correct / int(mask.sum())
    
    # Extract the predictions and the true labels
    y_true = data.y[mask].cpu().numpy()
    y_pred = predictions[mask].cpu().numpy()
    
    # Compute F1 score
    f1 = f1_score(y_true, y_pred, average='binary')

    # Compute AUC-ROC score
    y_probs = out[mask][:, 1].cpu().numpy() 
    auc_roc = roc_auc_score(y_true, y_probs)
    
    fairness_metrics = fair_metric('GoodCustomer', predictions, 'Gender')
    fairness_metrics['Accuracy'] = accuracy
    fairness_metrics['F1_Score'] = f1
    fairness_metrics['AUC_ROC'] = auc_roc

    return fairness_metrics

def fair_metric(labels, pred, sens):
	
	labels = user_labels[labels].values
	sens = user_labels[sens].values
	
	idx_s0 = sens==0
	idx_s1 = sens==1

	idx_s0_y1 = np.bitwise_and(idx_s0, labels==1)
	idx_s1_y1 = np.bitwise_and(idx_s1, labels==1)

	parity = abs(sum(pred[idx_s0])/sum(idx_s0)-sum(pred[idx_s1])/sum(idx_s1))
	equality = abs(sum(pred[idx_s0_y1])/sum(idx_s0_y1)-sum(pred[idx_s1_y1])/sum(idx_s1_y1))
    
	return {"parity": parity.item(), "equality": equality.item()}

In [7]:
class GCN(nn.Module):
	def __init__(self, nfeat, nhid=128, nclass=2, dropout=0):
		super(GCN, self).__init__()
		self.body = GCN_Body(nfeat,nhid,dropout)
		self.fc = nn.Linear(nhid, nclass)

		for m in self.modules():
			self.weights_init(m)

	def weights_init(self, m):
		if isinstance(m, nn.Linear):
			torch.nn.init.xavier_uniform_(m.weight.data)
			if m.bias is not None:
				m.bias.data.fill_(0.0)

	def forward(self, x, edge_index):
		x = self.body(x, edge_index)
		x = self.fc(x)
		return F.log_softmax(x, dim=1)
		# return x


In [8]:
class GCN_Body(nn.Module):
	def __init__(self, nfeat, nhid, dropout):
		super(GCN_Body, self).__init__()
		self.gc1 = GCNConv(nfeat, nhid)

	def forward(self, x, edge_index):
		x = self.gc1(x, edge_index)
		return x

In [9]:
sens_attribute_tensor = torch.tensor(user_labels['Gender'].values, dtype=torch.long)
sens_attribute_tensor = sens_attribute_tensor.to(set_device())

In [10]:
gcn_model = GCN(data.num_node_features, nhid=128, nclass=2)
optimizer_gcn_model = torch.optim.Adam(gcn_model.parameters(), lr=1e-4, weight_decay=1e-5)

In [11]:
training(model=gcn_model, 
         data=data, 
         optimizer=optimizer_gcn_model, 
         fairness=False,  
         epochs=2000)

Epoch 0 | Loss: 51.73440933227539 | 
 AUC_ROC: 0.45511921458625526 | F1 Score: 0.0 | SPD: 0.0 | EOD: 0.0
Epoch 10 | Loss: 13.230936050415039 | 
 AUC_ROC: 0.5134992987377278 | F1 Score: 0.8165680473372781 | SPD: 0.0 | EOD: 0.0
Epoch 20 | Loss: 3.110295534133911 | 
 AUC_ROC: 0.4628330995792426 | F1 Score: 0.7450980392156864 | SPD: 0.04001861810684204 | EOD: 0.033041179180145264
Epoch 30 | Loss: 3.046635866165161 | 
 AUC_ROC: 0.4586255259467041 | F1 Score: 0.31693989071038253 | SPD: 0.062178581953048706 | EOD: 0.06382915377616882
Epoch 40 | Loss: 2.0159919261932373 | 
 AUC_ROC: 0.4600280504908836 | F1 Score: 0.5236051502145923 | SPD: 0.08092564344406128 | EOD: 0.07417821884155273
Epoch 50 | Loss: 1.0788615942001343 | 
 AUC_ROC: 0.4646446937821412 | F1 Score: 0.8 | SPD: 0.015567958354949951 | EOD: 0.009192466735839844
Epoch 60 | Loss: 1.0960620641708374 | 
 AUC_ROC: 0.4633006077606358 | F1 Score: 0.7074829931972789 | SPD: 0.056849002838134766 | EOD: 0.062343597412109375
Epoch 70 | Loss: 1.

In [12]:
print("Here are the values for the GCN model")

metrics_gcn_model = test(gcn_model, data)

print_metrics(metrics_gcn_model)

Here are the values for the GCN model
parity : 0.03469
equality : 0.02881
Accuracy : 0.69000


F1_Score : 0.81548
AUC_ROC : 0.73779


In [20]:
training(model=gcn_model, 
         data=data, 
         optimizer=optimizer_gcn_model, 
         fairness=False,  
         epochs=2000)

Epoch 0 | Loss: 1730.3828125
Epoch 10 | Loss: 1586.6728515625
Epoch 20 | Loss: 1443.138916015625
Epoch 30 | Loss: 1299.7718505859375
Epoch 40 | Loss: 1156.541259765625
Epoch 50 | Loss: 1013.3958740234375
Epoch 60 | Loss: 870.273193359375
Epoch 70 | Loss: 727.1024780273438
Epoch 80 | Loss: 583.81201171875
Epoch 90 | Loss: 440.3348083496094
Epoch 100 | Loss: 296.6080322265625
Epoch 110 | Loss: 152.5740203857422
Epoch 120 | Loss: 8.214092254638672
Epoch 130 | Loss: 28.87628746032715
Epoch 140 | Loss: 19.84130096435547
Epoch 150 | Loss: 7.131844520568848
Epoch 160 | Loss: 5.081916809082031
Epoch 170 | Loss: 1.500609278678894
Epoch 180 | Loss: 1.5741924047470093
Epoch 190 | Loss: 1.0022238492965698
Epoch 200 | Loss: 0.9399318099021912
Epoch 210 | Loss: 0.9147445559501648
Epoch 220 | Loss: 0.8742117285728455
Epoch 230 | Loss: 0.849051296710968
Epoch 240 | Loss: 0.8311452269554138
Epoch 250 | Loss: 0.8118543028831482
Epoch 260 | Loss: 0.7938451170921326
Epoch 270 | Loss: 0.7771266102790833
Ep

In [25]:
print("Here are the values for the GCN model")

metrics_gcn_model = test(gcn_model, data)

print_metrics(metrics_gcn_model)

Here are the values for the GCN model
parity : 0.01599
equality : 0.01188
Accuracy : 0.71000


F1_Score : 0.82635
AUC_ROC : 0.65739


In [None]:
Values for a standard GCN model - same model specifications as the one from Carlos

parity : 0.01599
equality : 0.01188
Accuracy : 0.71000
F1_Score : 0.82635
AUC_ROC : 0.65739

In [None]:
Values for the FairGCN model - same model specifications as the one from Carlos,
but with one FAME layer instead of one GCNConv layer

parity : 0.00402
equality : 0.00297
Accuracy : 0.70000
F1_Score : 0.82249
AUC_ROC : 0.68935

In [None]:
Values for the FairGAT model - same model specifications as the one from Carlos,
but with one A-FAME instead of one GCNConv layer

parity : 0.00547
equality : 0.00200
Accuracy : 0.70000
F1_Score : 0.82143
AUC_ROC : 0.67248

In [13]:
class FairnessAwareMessagePassingLayer(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(FairnessAwareMessagePassingLayer, self).__init__(aggr='mean')  
        self.lin = nn.Linear(in_channels, out_channels)
        # self.a_fair = nn.Parameter(torch.rand(out_channels)) 
        # self.sensitive_attr = torch.tensor(user_labels['bin_age'].values, dtype=torch.float) 
        # self.sensitive_attr = data.x[:, 5]
        self.sensitive_attr = sens_attribute_tensor
        self.bias_correction = nn.Parameter(torch.rand(1))

    def forward(self, x, edge_index):        
        # Add self-loops 
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        x = self.lin(x)

        return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x)
    
    def message(self, x_j, edge_index, size):
        row, col = edge_index
        deg = degree(row, size[0], dtype=x_j.dtype)
        deg_inv_sqrt = deg.pow(-0.5)
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
        # print("sensitive attribute is on device: ", self.sensitive_attr.device)
        # Compute statistical parity difference for each edge
        group_difference = self.sensitive_attr[row] - self.sensitive_attr[col]
        
        # Adjust messages based on statistical parity
        fairness_adjustment = (1 + self.bias_correction * group_difference.view(-1, 1))

        return fairness_adjustment * norm.view(-1, 1) * x_j

    def update(self, aggr_out):
        return aggr_out

In [14]:
class FairMP_GCN(torch.nn.Module):
    def __init__(self, data, layers=1, hidden=128, dropout=0):
        super(FairMP_GCN, self).__init__()
        self.conv1 = FairnessAwareMessagePassingLayer(data.num_node_features, hidden)
        self.convs = torch.nn.ModuleList()
        
        for i in range(layers - 1):
            self.convs.append(FairnessAwareMessagePassingLayer(hidden, hidden))
        
        # self.conv2 = FairnessAwareMessagePassingLayer(hidden, 2)
        self.fc = nn.Linear(hidden, 2)
        self.dropout = dropout

    def forward(self, x, edge_index, *args, **kwargs):
        x = F.relu(self.conv1(x, edge_index))
        # x = F.dropout(x, p=self.dropout, training=self.training)

        for conv in self.convs:
            x = F.relu(conv(x, edge_index))
            x = F.dropout(x, p=self.dropout, training=self.training)

        # x = self.conv2(x, edge_index)
        x = self.fc(x)
        
        return F.log_softmax(x, dim=1)

In [15]:
Fair_gcn_model = FairMP_GCN(data, hidden=128)
optimizer_Fair_gcn_model = torch.optim.Adam(Fair_gcn_model.parameters(), lr=1e-4, weight_decay=1e-5)

In [16]:
training(model=Fair_gcn_model, 
         data=data, 
         optimizer=optimizer_Fair_gcn_model, 
         fairness=False,  
         epochs=2000)

Epoch 0 | Loss: 6.185372829437256 | 
 AUC_ROC: 0.624240299205236 | F1 Score: 0.8165680473372781 | SPD: 0.0 | EOD: 0.0
Epoch 10 | Loss: 5.22127628326416 | 
 AUC_ROC: 0.6256428237494156 | F1 Score: 0.8165680473372781 | SPD: 0.0 | EOD: 0.0
Epoch 20 | Loss: 4.260575771331787 | 
 AUC_ROC: 0.6319541841982235 | F1 Score: 0.8165680473372781 | SPD: 0.0 | EOD: 0.0
Epoch 30 | Loss: 3.305295467376709 | 
 AUC_ROC: 0.6370383356708742 | F1 Score: 0.8165680473372781 | SPD: 0.0 | EOD: 0.0
Epoch 40 | Loss: 2.3625898361206055 | 
 AUC_ROC: 0.636570827489481 | F1 Score: 0.8165680473372781 | SPD: 0.0 | EOD: 0.0
Epoch 50 | Loss: 1.4611992835998535 | 
 AUC_ROC: 0.6369798971482001 | F1 Score: 0.8165680473372781 | SPD: 0.0 | EOD: 0.0
Epoch 60 | Loss: 0.7461188435554504 | 
 AUC_ROC: 0.6392005610098177 | F1 Score: 0.8165680473372781 | SPD: 0.0 | EOD: 0.0
Epoch 70 | Loss: 0.7494667172431946 | 
 AUC_ROC: 0.38475923328658246 | F1 Score: 0.014388489208633093 | SPD: 0.0014492754125967622 | EOD: 0.0020040080416947603
E

In [17]:
print("Here are the values for the GCN model")

metrics_Fair_gcn_model = test(Fair_gcn_model, data)

print_metrics(metrics_Fair_gcn_model)

Here are the values for the GCN model
parity : 0.00514
equality : 0.00104
Accuracy : 0.68500


F1_Score : 0.81081
AUC_ROC : 0.65802


In [33]:
training(model=Fair_gcn_model, 
         data=data, 
         optimizer=optimizer_Fair_gcn_model, 
         fairness=False,  
         epochs=2000)

Epoch 0 | Loss: 1.5836987495422363
Epoch 10 | Loss: 0.6697207689285278
Epoch 20 | Loss: 0.7156093716621399
Epoch 30 | Loss: 0.6384361982345581
Epoch 40 | Loss: 0.6393753290176392
Epoch 50 | Loss: 0.6395338177680969
Epoch 60 | Loss: 0.6352814435958862
Epoch 70 | Loss: 0.6339787840843201
Epoch 80 | Loss: 0.6336237788200378
Epoch 90 | Loss: 0.633116602897644
Epoch 100 | Loss: 0.6325951218605042
Epoch 110 | Loss: 0.6320903897285461
Epoch 120 | Loss: 0.6315902471542358
Epoch 130 | Loss: 0.6310877799987793
Epoch 140 | Loss: 0.630581796169281
Epoch 150 | Loss: 0.6300761103630066
Epoch 160 | Loss: 0.6295708417892456
Epoch 170 | Loss: 0.6290642619132996
Epoch 180 | Loss: 0.6285581588745117
Epoch 190 | Loss: 0.6280514597892761
Epoch 200 | Loss: 0.6275433301925659
Epoch 210 | Loss: 0.6270341277122498
Epoch 220 | Loss: 0.6265242099761963
Epoch 230 | Loss: 0.626014232635498
Epoch 240 | Loss: 0.6255046725273132
Epoch 250 | Loss: 0.6249959468841553
Epoch 260 | Loss: 0.624488353729248
Epoch 270 | Loss

In [34]:
print("Here are the values for the GCN model")

metrics_Fair_gcn_model = test(Fair_gcn_model, data)

print_metrics(metrics_Fair_gcn_model)

Here are the values for the GCN model
parity : 0.00402
equality : 0.00297
Accuracy : 0.70000


F1_Score : 0.82249
AUC_ROC : 0.68935


In [18]:
import torch
from torch_geometric.nn import MessagePassing
from torch.nn import Linear, Parameter
from torch_geometric.utils import add_self_loops, softmax
import torch.nn.functional as F

class Attention_FairMessagePassing(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(Attention_FairMessagePassing, self).__init__(aggr='add') 
        self.lin = Linear(in_channels, out_channels) 
        self.att = Linear(2 * out_channels, 1) 
        
        self.sensitive_attr = sens_attribute_tensor 
        self.bias_correction = Parameter(torch.rand(1))  

    def forward(self, x, edge_index):
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        x = self.lin(x)

        return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x)

    def message(self, edge_index, x_i, x_j, size_i):
        x_cat = torch.cat([x_i, x_j], dim=-1)  
        alpha = self.att(x_cat)

        row, col = edge_index
        group_difference = self.sensitive_attr[row] - self.sensitive_attr[col]

        fairness_adjustment = self.bias_correction * group_difference.view(-1, 1)
        alpha = alpha + fairness_adjustment

        alpha = softmax(alpha, edge_index[0], num_nodes=size_i)

        return alpha * x_j

    def update(self, aggr_out):
        return aggr_out
    
# GCN class that takes in the data as an input for dimensions of the convolutions
class Fair_Attention_MP_GCN(torch.nn.Module):
    def __init__(self, data, layers=1, hidden=128, dropout=0):
        super(Fair_Attention_MP_GCN, self).__init__()
        self.conv1 = Attention_FairMessagePassing(data.num_node_features, hidden)
        self.convs = torch.nn.ModuleList()
        
        for i in range(layers - 1):
            self.convs.append(Attention_FairMessagePassing(hidden, hidden))
        
        # self.conv2 = Attention_FairMessagePassing(hidden, 2)
        self.fc = Linear(hidden, 2)
        self.dropout = dropout

    def forward(self, x, edge_index, *args, **kwargs):
        x = F.relu(self.conv1(x, edge_index))
        # x = F.dropout(x, p=self.dropout, training=self.training)

        for conv in self.convs:
            x = F.relu(conv(x, edge_index))
            # x = F.dropout(x, p=self.dropout, training=self.training)

        # x = self.conv2(x, edge_index)
        x = self.fc(x)
        
        return F.log_softmax(x, dim=1)

In [19]:
Fair_gat_model = Fair_Attention_MP_GCN(data, hidden=128)
optimizer_Fair_gat_model = torch.optim.Adam(Fair_gat_model.parameters(), lr=1e-4, weight_decay=1e-5)

training(model=Fair_gat_model, 
         data=data, 
         optimizer=optimizer_Fair_gat_model, 
         fairness=False,  
         epochs=2000)

Epoch 0 | Loss: 12.548128128051758 | 
 AUC_ROC: 0.5091164095371669 | F1 Score: 0.602510460251046 | SPD: 0.03496965765953064 | EOD: 0.027328312397003174
Epoch 10 | Loss: 3.4877965450286865 | 
 AUC_ROC: 0.515135577372604 | F1 Score: 0.8130563798219584 | SPD: 0.0014492273330688477 | EOD: 0.002003967761993408
Epoch 20 | Loss: 1.8146874904632568 | 
 AUC_ROC: 0.47755960729312763 | F1 Score: 0.6615384615384615 | SPD: 0.0298270583152771 | EOD: 0.005972087383270264
Epoch 30 | Loss: 1.565403699874878 | 
 AUC_ROC: 0.48013090229079003 | F1 Score: 0.8012048192771084 | SPD: 0.008274972438812256 | EOD: 0.022662222385406494
Epoch 40 | Loss: 1.177954077720642 | 
 AUC_ROC: 0.4879032258064516 | F1 Score: 0.7902735562310031 | SPD: 0.0010284781455993652 | EOD: 0.007457733154296875
Epoch 50 | Loss: 1.103583574295044 | 
 AUC_ROC: 0.51986909770921 | F1 Score: 0.7712418300653594 | SPD: 0.02019631862640381 | EOD: 0.022263407707214355
Epoch 60 | Loss: 0.9573491215705872 | 
 AUC_ROC: 0.46797568957456753 | F1 Scor

In [20]:
print("Here are the values for the FairGAT model")

metrics_Fair_gat_model = test(Fair_gat_model, data)

print_metrics(metrics_Fair_gat_model)

Here are the values for the FairGAT model
parity : 0.00706
equality : 0.00207
Accuracy : 0.68000


F1_Score : 0.80952
AUC_ROC : 0.67496


In [39]:
Fair_gat_model = Fair_Attention_MP_GCN(data, hidden=128)
optimizer_Fair_gat_model = torch.optim.Adam(Fair_gat_model.parameters(), lr=1e-4, weight_decay=1e-5)

training(model=Fair_gat_model, 
         data=data, 
         optimizer=optimizer_Fair_gat_model, 
         fairness=False,  
         epochs=2000)

Epoch 0 | Loss: 72.10214233398438
Epoch 10 | Loss: 58.275054931640625
Epoch 20 | Loss: 45.41349792480469
Epoch 30 | Loss: 32.867794036865234
Epoch 40 | Loss: 20.466737747192383
Epoch 50 | Loss: 8.136582374572754
Epoch 60 | Loss: 5.021334171295166
Epoch 70 | Loss: 3.2869226932525635
Epoch 80 | Loss: 2.451176404953003
Epoch 90 | Loss: 2.107731819152832
Epoch 100 | Loss: 1.943284273147583
Epoch 110 | Loss: 1.8548871278762817
Epoch 120 | Loss: 1.8105360269546509
Epoch 130 | Loss: 1.7649245262145996
Epoch 140 | Loss: 1.705875277519226
Epoch 150 | Loss: 1.6368602514266968
Epoch 160 | Loss: 1.5607243776321411
Epoch 170 | Loss: 1.5071605443954468
Epoch 180 | Loss: 1.458755373954773
Epoch 190 | Loss: 1.4107089042663574
Epoch 200 | Loss: 1.3624742031097412
Epoch 210 | Loss: 1.3142322301864624
Epoch 220 | Loss: 1.2660164833068848
Epoch 230 | Loss: 1.217871904373169
Epoch 240 | Loss: 1.1698365211486816
Epoch 250 | Loss: 1.121995210647583
Epoch 260 | Loss: 1.0744487047195435
Epoch 270 | Loss: 1.027

In [40]:
print("Here are the values for the FairGAT model")

metrics_Fair_gat_model = test(Fair_gat_model, data)

print_metrics(metrics_Fair_gat_model)

Here are the values for the FairGAT model
parity : 0.00547
equality : 0.00200
Accuracy : 0.70000


F1_Score : 0.82143
AUC_ROC : 0.67248


In [None]:
Values for a standard GCN model - same model specifications as the one from Carlos

parity : 0.03469
equality : 0.02881
Accuracy : 0.69000
F1_Score : 0.81548
AUC_ROC : 0.73779

In [None]:
Values for the FairGCN model - same model specifications as the one from Carlos,
but with one FAME layer instead of one GCNConv layer

parity : 0.00514
equality : 0.00104
Accuracy : 0.68500
F1_Score : 0.81081
AUC_ROC : 0.65802

In [None]:
Values for the FairGAT model - same model specifications as the one from Carlos,
but with one A-FAME instead of one GCNConv layer

parity : 0.00706
equality : 0.00207
Accuracy : 0.68000
F1_Score : 0.80952
AUC_ROC : 0.67496