# Necessities

In [1]:
# ------------------------------------------------------------------
# Imports
# ------------------------------------------------------------------

# Basic data processing libraries
import pandas as pd
import numpy as np
import os
import torch

# Graph data processing libraries
import networkx as nx
from torch_geometric.data import Data
from torch_geometric.utils import from_networkx

# Libraries for (G)NNs
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
import torch.nn as nn


# ------------------------------------------------------------------
# Helper functions
# ------------------------------------------------------------------

def show_df_info(df):
    print(df.info())
    print('####### Repeat ####### \n', df.duplicated().any())
    print('####### Count ####### \n', df.nunique())
    print('####### Example ####### \n',df.head())

def label_statics(label_df, label_list):
    print("####### nCount #######")
    for label in label_list:
        print(label_df[label].value_counts())
    print("####### nPercent #######")
    for label in label_list:
        print(label_df[label].value_counts()/label_df.shape[0])

# ------------------------------------------------------------------
# Data stuff
# ------------------------------------------------------------------

base_path = os.getcwd()
input_ali_data_path = os.path.join(base_path, "input_ali_data")

# Load the data files
user_labels_path = os.path.join(input_ali_data_path, "user_labels.csv")
user_edges_path = os.path.join(input_ali_data_path, "user_edge.csv")

# Create dataframes to store the information from the .csv files
user_labels = pd.read_csv(user_labels_path)
user_edges = pd.read_csv(user_edges_path)

# Prepare the data for GNNs
node_features = torch.tensor(user_labels.iloc[:, 1:].values, dtype=torch.float)
edge_index = torch.tensor(user_edges.values, dtype=torch.long).t().contiguous()

# node_features = torch.tensor(filtered_user_labels.iloc[:, 1:].values, dtype=torch.float)
# edge_index = torch.tensor(filtered_edges.values, dtype=torch.long).t().contiguous()

# Create torch-geometric data
data = Data(x=node_features, edge_index=edge_index)

num_nodes = node_features.size(0)
num_classes = 2 # Binarised gender values from the data
num_node_features = data.num_node_features

# Create masks for training, and testing
train_mask = torch.zeros(num_nodes, dtype=torch.bool)
test_mask = torch.zeros(num_nodes, dtype=torch.bool)

# 80 - 20 Train and Test data split
num_train = int(num_nodes * 0.8)
train_mask[:num_train] = True
test_mask[num_train:] = True

data.train_mask = train_mask
data.test_mask = test_mask

# Labels from the data (in this case: Age Classification)
data.y = torch.tensor(user_labels['bin_age'].values, dtype=torch.long)

# ------------------------------------------------------------------
# Set Device
# ------------------------------------------------------------------

def set_device():
    return torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ------------------------------------------------------------------
# Loss
# ------------------------------------------------------------------

def fairness_aware_loss(output, data, sensitive_attr, alpha=0, beta=0, gamma=0, delta=0):
    target = data.y[data.train_mask]
    # standard_loss = F.cross_entropy(output, target)
    standard_loss = F.nll_loss(output, target)

    labels = data.y[train_mask]
    pos_prob = torch.exp(output[:, 1])
    neg_prob = torch.exp(output[:, 0])
    # pos_prob = torch.sigmoid(output[:, 1])
    # neg_prob = 1 - pos_prob
    predictions = output.argmax(dim=1)

    # Statistical Parity Regularization
    sp_reg = torch.abs(pos_prob[sensitive_attr == 1].mean() - pos_prob[sensitive_attr == 0].mean())

    # # Calculating FPR and TPR for each group
    # fpr_group1 = ((predictions == 1) & (labels == 0) & (sensitive_attr == 1)).float().mean()
    # fpr_group0 = ((predictions == 1) & (labels == 0) & (sensitive_attr == 0)).float().mean()
    # tpr_group1 = ((predictions == 1) & (labels == 1) & (sensitive_attr == 1)).float().mean()
    # tpr_group0 = ((predictions == 1) & (labels == 1) & (sensitive_attr == 0)).float().mean()

    # Treatment Equality Regularization
    fp_diff = (neg_prob * (labels == 0) * (sensitive_attr == 1)).float().mean() - \
              (neg_prob * (labels == 0) * (sensitive_attr == 0)).float().mean()
    fn_diff = (pos_prob * (labels == 1) * (sensitive_attr == 1)).float().mean() - \
              (pos_prob * (labels == 1) * (sensitive_attr == 0)).float().mean()
    treatment_reg = torch.abs(fp_diff) + torch.abs(fn_diff)
    # treatment_reg = torch.abs(fn_diff)

    # fn_group_1 = ((predictions == 0) & (labels == 1) & (sensitive_attr == 1)).sum()
    # fp_group_1 = ((predictions == 1) & (labels == 0) & (sensitive_attr == 1)).sum()

    # fn_group_0 = ((predictions == 0) & (labels == 1) & (sensitive_attr == 0)).sum()
    # fp_group_0 = ((predictions == 1) & (labels == 0) & (sensitive_attr == 0)).sum()
    
    # ratio_group_1 = fn_group_1 / fp_group_1 if fp_group_1 != 0 else torch.tensor(float('inf'))
    # ratio_group_0 = fn_group_0 / fp_group_0 if fp_group_0 != 0 else torch.tensor(float('inf'))
    # treatment_reg = torch.abs(ratio_group_1 - ratio_group_0)

    # Equal Opportunity Difference Regularization
    eod_reg = torch.abs((pos_prob * (labels == 1) * (sensitive_attr == 1)).float().mean() - \
                        (pos_prob * (labels == 1) * (sensitive_attr == 0)).float().mean())

    # Overall Accuracy Equality Difference Regularization
    oaed_reg = torch.abs((pos_prob * (sensitive_attr == 1)).float().mean() - \
                         (pos_prob * (sensitive_attr == 0)).float().mean())

    penalty = alpha + beta + gamma + delta
    
    # Combine losses
    combined_loss = (1-penalty)*standard_loss
    + alpha * sp_reg
    + beta * treatment_reg
    + gamma * eod_reg
    + delta * oaed_reg
    
    return combined_loss

# ------------------------------------------------------------------
# Fairness Metrics
# ------------------------------------------------------------------

def calculate_fairness(label, predictions, sens_attr='gender', balanced=False):
    """
    Calculate various fairness metrics.

    Args:
    label: Actual labels (binary).
    predictions: Model predictions (binary).
    sens_attr: Binary sensitive attribute for fairness evaluation.

    Returns:
    A dictionary containing SPD, EOD, OAED, and TED values.
    """
    if balanced is False:
        labels = torch.tensor(user_labels[label].values, dtype=torch.long)
        sensitive_attribute = torch.tensor(user_labels[sens_attr].values, dtype=torch.long)
    else:
        labels = torch.tensor(filtered_user_labels[label].values, dtype=torch.long)
        sensitive_attribute = torch.tensor(filtered_user_labels[sens_attr].values, dtype=torch.long)
    
    labels = labels.to(set_device())
    sensitive_attribute = sensitive_attribute.to(set_device())

    predictions = predictions.float()
    labels = labels.float()
    sensitive_attribute = sensitive_attribute.float()

    def statistical_parity_difference():
        prob_group_1 = predictions[sensitive_attribute == 1].mean()
        prob_group_0 = predictions[sensitive_attribute == 0].mean()
        return abs(prob_group_1 - prob_group_0), prob_group_0, prob_group_1

    def equal_opportunity_difference():
        tpr_group_1 = predictions[(labels == 1) & (sensitive_attribute == 1)].mean()
        tpr_group_0 = predictions[(labels == 1) & (sensitive_attribute == 0)].mean()
        return abs(tpr_group_1 - tpr_group_0), tpr_group_0, tpr_group_1

    def overall_accuracy_equality_difference():
        acc_group_1 = (predictions[sensitive_attribute == 1] == labels[sensitive_attribute == 1]).float().mean()
        acc_group_0 = (predictions[sensitive_attribute == 0] == labels[sensitive_attribute == 0]).float().mean()
        return abs(acc_group_1 - acc_group_0), acc_group_0, acc_group_1

    def treatment_equality_difference():
        fn_group_1 = ((predictions == 0) & (labels == 1) & (sensitive_attribute == 1)).sum()
        fp_group_1 = ((predictions == 1) & (labels == 0) & (sensitive_attribute == 1)).sum()

        fn_group_0 = ((predictions == 0) & (labels == 1) & (sensitive_attribute == 0)).sum()
        fp_group_0 = ((predictions == 1) & (labels == 0) & (sensitive_attribute == 0)).sum()

        ratio_group_1 = fn_group_1 / fp_group_1 if fp_group_1 != 0 else float('inf')
        ratio_group_0 = fn_group_0 / fp_group_0 if fp_group_0 != 0 else float('inf')

        return abs(ratio_group_1 - ratio_group_0), ratio_group_0, ratio_group_1, fn_group_1, fp_group_1, fn_group_0, fp_group_0

    # Calculating each fairness metric
    spd, sp_g0, sp_g1 = statistical_parity_difference()
    eod, eod_g0, eod_g1 = equal_opportunity_difference()
    oaed, oaed_g0, oaed_g1 = overall_accuracy_equality_difference()
    ted, ted_g0, ted_g1, fn_group_1, fp_group_1, fn_group_0, fp_group_0 = treatment_equality_difference()

    return {
        'Statistical Parity Difference': spd,
        'Statistical Parity Group with S=0': sp_g0,
        'Statistical Parity Group S=1': sp_g1,
        'Equal Opportunity Difference': eod,
        'Equal Opportunity Group with S=0': eod_g0,
        'Equal Opportunity Group S=1': eod_g1,
        'Overall Accuracy Equality Difference': oaed,
        'Overall Accuracy Group with S=0': oaed_g0,
        'Overall Accuracy Group S=1': oaed_g1,
        'Treatment Equality Difference': ted,
        'Treatment Equality Group with S=0': ted_g0,
        'Treatment Equality Group S=1': ted_g1
        # 'False Negatives Group 1': fn_group_1,
        # 'False Positives Group 1': fp_group_1,
        # 'False Negatives Group 0': fn_group_0,
        # 'False Positives Group 0': fp_group_0
    }

# ------------------------------------------------------------------
# Model Training
# ------------------------------------------------------------------

# Train the model
def training(model, data, optimizer, epochs, fairness=False, alpha=0, beta=0, gamma=0, delta=0):
    model.to(set_device())
    data.to(set_device())
    
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        
        if fairness:
            loss = fairness_aware_loss(out[data.train_mask], data, data.x[data.train_mask, -1],
                                       alpha=alpha, beta=beta, gamma=gamma, delta=delta)
            
        else:
            # criterion = torch.nn.CrossEntropyLoss()
            # criterion = torch.nn.BCELoss()
            criterion = torch.nn.NLLLoss()
            loss = criterion(out[data.train_mask], data.y[data.train_mask])

        loss.backward()
        optimizer.step()

        if epoch % 10 == 0:
            print(f'Epoch {epoch} | Loss: {loss.item()}')

# ------------------------------------------------------------------
# Model Testing
# ------------------------------------------------------------------

# Test the model
def test(model, data, balanced=False):
    # model.to('cpu')
    # data.to('cpu')
    model.to(set_device())
    data.to(set_device())
    
    model.eval()
    with torch.inference_mode():
      out = model(data.x, data.edge_index)

    _, pred = model(data.x, data.edge_index).max(dim=1)
    correct = int(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
    accuracy = correct / int(data.test_mask.sum())
    # print(f'Accuracy: {accuracy}')

    # Convert model outputs to binary predictions
    predictions = out.argmax(dim=1)

    # Fairness calculated for gender-classification task with bin_age as the sensitive attribute
    fairness_metrics = calculate_fairness(label='bin_age', predictions=predictions, sens_attr='gender', balanced=balanced)
    fairness_metrics['Accuracy'] = accuracy

    return fairness_metrics

# ------------------------------------------------------------------
# Print Metrics
# ------------------------------------------------------------------

# def print_metrics(metrics):
#     for key, value in metrics.items():
#         print(f"\n{key} : {value:.5f}")

def print_metrics(metrics):
    count = -1

    for key, value in metrics.items():
        count += 1
        if count == 3:
            print(f"\n\n{key} : {value:.5f}")
            count = 0
        else:
            print(f"{key} : {value:.5f}")

# Baseline

In [2]:
# GCN class that takes in the data as an input for dimensions of the convolutions
class GCN(torch.nn.Module):
    def __init__(self, data, layers=2, hidden=16, dropout=0):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(data.num_node_features, hidden)
        self.convs = torch.nn.ModuleList()
        
        for i in range(layers - 1):
            self.convs.append(GCNConv(hidden, hidden))
        
        self.conv2 = GCNConv(hidden, 2)
        self.dropout = dropout

    def forward(self, x, edge_index, *args, **kwargs):
        x = F.relu(self.conv1(x, edge_index))
        # x = F.dropout(x, p=self.dropout, training=self.training)

        for conv in self.convs:
            x = F.relu(conv(x, edge_index))
            # x = F.dropout(x, p=self.dropout, training=self.training)

        x = self.conv2(x, edge_index)
        
        return F.log_softmax(x, dim=1)

In [3]:
from torch_geometric.nn import GATConv

# GAT class that takes in the data as an input for dimensions of the convolutions
class GAT(torch.nn.Module):
    def __init__(self, data, layers=2, hidden=16, dropout=0):
        super(GAT, self).__init__()
        self.conv1 = GATConv(data.num_node_features, hidden)
        self.convs = torch.nn.ModuleList()
        
        for i in range(layers - 1):
            self.convs.append(GATConv(hidden, hidden))
        
        self.conv2 = GATConv(hidden, 2)
        self.dropout = dropout

    def forward(self, x, edge_index, *args, **kwargs):
        x = F.relu(self.conv1(x, edge_index))
        # x = F.dropout(x, p=self.dropout, training=self.training)

        for conv in self.convs:
            x = F.relu(conv(x, edge_index))
            # x = F.dropout(x, p=self.dropout, training=self.training)

        x = self.conv2(x, edge_index)
        
        return F.log_softmax(x, dim=1)

# Fair MP

In [4]:
class FairnessAwareMessagePassingLayer(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(FairnessAwareMessagePassingLayer, self).__init__(aggr='mean')  
        self.lin = nn.Linear(in_channels, out_channels)
        # self.a_fair = nn.Parameter(torch.rand(out_channels)) 
        # self.sensitive_attr = torch.tensor(user_labels['bin_age'].values, dtype=torch.float) 
        self.sensitive_attr = data.x[:, 5]
        self.bias_correction = nn.Parameter(torch.rand(1))

    def forward(self, x, edge_index):        
        # Add self-loops 
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        x = self.lin(x)

        return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x)
    
    def message(self, x_j, edge_index, size):
        row, col = edge_index
        deg = degree(row, size[0], dtype=x_j.dtype)
        deg_inv_sqrt = deg.pow(-0.5)
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
        # print("sensitive attribute is on device: ", self.sensitive_attr.device)
        # Compute statistical parity difference for each edge
        group_difference = self.sensitive_attr[row] - self.sensitive_attr[col]
        
        # Adjust messages based on statistical parity
        fairness_adjustment = (1 + self.bias_correction * group_difference.view(-1, 1))

        return fairness_adjustment * norm.view(-1, 1) * x_j

    def update(self, aggr_out):
        return aggr_out

In [5]:
# GCN class that takes in the data as an input for dimensions of the convolutions
class FairMP_GCN(torch.nn.Module):
    def __init__(self, data, layers=2, hidden=16, dropout=0):
        super(FairMP_GCN, self).__init__()
        self.conv1 = FairnessAwareMessagePassingLayer(data.num_node_features, hidden)
        self.convs = torch.nn.ModuleList()
        
        for i in range(layers - 1):
            self.convs.append(FairnessAwareMessagePassingLayer(hidden, hidden))
        
        self.conv2 = FairnessAwareMessagePassingLayer(hidden, 2)
        self.dropout = dropout

    def forward(self, x, edge_index, *args, **kwargs):
        x = F.relu(self.conv1(x, edge_index))
        # x = F.dropout(x, p=self.dropout, training=self.training)

        for conv in self.convs:
            x = F.relu(conv(x, edge_index))
            # x = F.dropout(x, p=self.dropout, training=self.training)

        x = self.conv2(x, edge_index)
        
        return F.log_softmax(x, dim=1)

# Fair Attention MP

In [6]:
class Attention_FairMessagePassing(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super(Attention_FairMessagePassing, self).__init__(aggr='mean')
        self.lin = nn.Linear(in_channels, out_channels)
        self.att = nn.Linear(out_channels, 1)
        # self.sensitive_attr = torch.tensor(user_labels['bin_age'].values, dtype=torch.float)
        self.bias_correction = nn.Parameter(torch.rand(1))

    def forward(self, x, edge_index):
        # Add self-loops
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        x = self.lin(x)
        x = self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x)

        return x

    def message(self, x_i, x_j):
        # Calculate attention weights
        alpha = self.att(torch.abs(x_i - x_j))
        alpha = torch.exp(alpha) / (torch.exp(alpha).sum(dim=1, keepdim=True) + self.bias_correction)

        # Apply attention weights to messages
        return x_j * alpha

    def update(self, aggr_out):
        return aggr_out

In [7]:
# GCN class that takes in the data as an input for dimensions of the convolutions
class Fair_Attention_MP_GCN(torch.nn.Module):
    def __init__(self, data, layers=2, hidden=16, dropout=0):
        super(Fair_Attention_MP_GCN, self).__init__()
        self.conv1 = Attention_FairMessagePassing(data.num_node_features, hidden)
        self.convs = torch.nn.ModuleList()
        
        for i in range(layers - 1):
            self.convs.append(Attention_FairMessagePassing(hidden, hidden))
        
        self.conv2 = Attention_FairMessagePassing(hidden, 2)
        self.dropout = dropout

    def forward(self, x, edge_index, *args, **kwargs):
        x = F.relu(self.conv1(x, edge_index))
        # x = F.dropout(x, p=self.dropout, training=self.training)

        for conv in self.convs:
            x = F.relu(conv(x, edge_index))
            # x = F.dropout(x, p=self.dropout, training=self.training)

        x = self.conv2(x, edge_index)
        
        return F.log_softmax(x, dim=1)

# HParam Tuning

## Non-Attention-Based Models

In [8]:
from sklearn.model_selection import ParameterGrid
from copy import deepcopy
import sys

# Define the hyperparameters
hyperparameters = {
    'learning_rate': [0.001, 0.01, 0.1],
    'layers': [2],
    'dimensions': [4, 8, 16],
    'Fairness': [True, False],
    'epochs': [50, 100],
    'model': [GCN, FairMP_GCN]
}

# Create a list to store the results
non_attention_results = []

# Create a parameter grid
param_grid = ParameterGrid(hyperparameters)

data.to(set_device())

# For each combination of hyperparameters
for params in param_grid:
    
    print("*" * 50)
    print(f"Model: {params['model'].__name__}\n")
    print("Parameters:")
    for key, value in params.items():
        print(f"{key}: {value}")
    print("*" * 50)

    # Create a new model with the current parameters
    model = params['model'](data, layers=params['layers'], hidden=params['dimensions'])
    
    optimizer = torch.optim.Adam(model.parameters(), lr=params['learning_rate'])

    model.to(set_device())
    # data.to(set_device())
    
    # Train the model
    training(model=model, data=data, optimizer=optimizer, epochs=params['epochs'], 
             fairness=params['Fairness'], alpha=0.1, beta=0, gamma=0.1, delta=0.1)
    
    with torch.inference_mode():
        # Test the model
        metrics = test(model, data)
        
        # Store the results
        non_attention_results.append((params, metrics))


    # Delete the variable
    del model
    del optimizer
    torch.cuda.empty_cache()
   

**************************************************
Model: GCN

Parameters:
Fairness: True
dimensions: 4
epochs: 50
layers: 2
learning_rate: 0.001
model: <class '__main__.GCN'>
**************************************************
Epoch 0 | Loss: 0.4674922227859497
Epoch 10 | Loss: 0.46662721037864685
Epoch 20 | Loss: 0.4658626616001129
Epoch 30 | Loss: 0.46512094140052795
Epoch 40 | Loss: 0.46436476707458496
**************************************************
Model: FairMP_GCN

Parameters:
Fairness: True
dimensions: 4
epochs: 50
layers: 2
learning_rate: 0.001
model: <class '__main__.FairMP_GCN'>
**************************************************
Epoch 0 | Loss: 0.485199511051178
Epoch 10 | Loss: 0.4851743280887604
Epoch 20 | Loss: 0.4851522445678711
Epoch 30 | Loss: 0.48513263463974
Epoch 40 | Loss: 0.48511382937431335
**************************************************
Model: GCN

Parameters:
Fairness: True
dimensions: 4
epochs: 50
layers: 2
learning_rate: 0.01
model: <class '__main__.GCN'

## Attention-Based Models

In [9]:
hyperparameters = {
    'learning_rate': [0.001, 0.01, 0.1],
    'layers': [2],
    'dimensions': [4, 8, 16],
    'Fairness': [True, False],
    'epochs': [50, 75, 100],
    'model': [GAT, Fair_Attention_MP_GCN]
}

# Create a list to store the results
attention_results = []

# Create a parameter grid
param_grid = ParameterGrid(hyperparameters)

data.to(set_device())

# For each combination of hyperparameters
for params in param_grid:
    
    print("*" * 50)
    print(f"Model: {params['model'].__name__}\n")
    print("Parameters:")
    for key, value in params.items():
        print(f"{key}: {value}")
    print("*" * 50)

    # Create a new model with the current parameters
    model = params['model'](data, layers=params['layers'], hidden=params['dimensions'])
    
    optimizer = torch.optim.Adam(model.parameters(), lr=params['learning_rate'])

    model.to(set_device())
    # data.to(set_device())
    
    # Train the model
    training(model=model, data=data, optimizer=optimizer, epochs=params['epochs'], 
             fairness=params['Fairness'], alpha=0.1, beta=0, gamma=0.1, delta=0.1)
    
    # Test the model
    metrics = test(model, data)
    
    # Store the results
    attention_results.append((params, metrics))

    # Delete the variable
    del model
    del optimizer

**************************************************
Model: GAT

Parameters:
Fairness: True
dimensions: 4
epochs: 50
layers: 2
learning_rate: 0.001
model: <class '__main__.GAT'>
**************************************************
Epoch 0 | Loss: 0.5010011792182922
Epoch 10 | Loss: 0.49330952763557434
Epoch 20 | Loss: 0.48790794610977173
Epoch 30 | Loss: 0.4842440187931061
Epoch 40 | Loss: 0.48180729150772095
**************************************************
Model: Fair_Attention_MP_GCN

Parameters:
Fairness: True
dimensions: 4
epochs: 50
layers: 2
learning_rate: 0.001
model: <class '__main__.Fair_Attention_MP_GCN'>
**************************************************
Epoch 0 | Loss: 0.47772616147994995
Epoch 10 | Loss: 0.4772789776325226
Epoch 20 | Loss: 0.4768955111503601
Epoch 30 | Loss: 0.4764362871646881
Epoch 40 | Loss: 0.4758746922016144
**************************************************
Model: GAT

Parameters:
Fairness: True
dimensions: 4
epochs: 50
layers: 2
learning_rate: 0.01
mod

# Export

In [10]:
import torch

def flatten_data(data):
    flat_data = {}
    config, metrics = data  # Unpack the tuple into configuration and results

    # Add configuration items to flat_data
    for key, value in config.items():
        if isinstance(value, type):  # Converting class type to string representation
            flat_data[key] = value.__name__
        else:
            flat_data[key] = value

    # Add metric items to flat_data
    for key, value in metrics.items():
        if torch.is_tensor(value):
            flat_data[key] = value.item()  # Convert tensors to Python numbers
        else:
            flat_data[key] = value

    return flat_data


In [11]:
import csv

def write_to_csv(data, filename):
    with open(filename, mode='w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=data[0].keys())
        writer.writeheader()
        for item in data:
            writer.writerow(item)

flattened_na_data = [flatten_data(item) for item in non_attention_results]
flattened_data = [flatten_data(item) for item in attention_results]

# Write the flattened data to a CSV file
write_to_csv(flattened_na_data, 'ALIBABA_G_SENS_results.csv')
write_to_csv(flattened_data, 'ALIBABA_G_SENS_results_ATTENTION.csv')