In [1]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv, to_hetero
from torch_geometric.data import HeteroData
from torch_geometric.loader import DataLoader
from torch import nn
from torch_geometric.utils import from_dgl
from tqdm import tqdm

In [2]:
import data.fraud_dataset as fraud_dataset

DATASET_NAME = "yelp"
TRAIN_SIZE = 0.4
VAL_SIZE = 0.1
RANDOM_SEED = 42
FORCE_RELOAD = False

In [3]:
fraud_data = fraud_dataset.FraudDataset(
    DATASET_NAME, 
    train_size=TRAIN_SIZE, 
    val_size=VAL_SIZE, 
    random_seed=RANDOM_SEED, 
    force_reload=FORCE_RELOAD
)
graph = fraud_data[0]

data = from_dgl(graph)
data.metadata

Done loading data from cached files.


<bound method HeteroData.metadata of HeteroData(
  review={
    test_mask=[45954],
    val_mask=[45954],
    train_mask=[45954],
    label=[45954],
    feature=[45954, 32],
  },
  (review, net_rsr, review)={ edge_index=[2, 6805486] },
  (review, net_rtr, review)={ edge_index=[2, 1147232] },
  (review, net_rur, review)={ edge_index=[2, 98630] }
)>

In [4]:
class GAT(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=1, dropout=0.6):
        super(GAT, self).__init__()
        # First GAT convolution layer
        self.conv1 = GATConv(in_channels, hidden_channels, heads=heads, concat=True, dropout=dropout, bias=False)
        # Second GAT convolution layer
        self.conv2 = GATConv(hidden_channels * heads, out_channels, heads=1, concat=True, bias=False)
        self.dropout = dropout

    def forward(self, x, edge_index):
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = F.elu(self.conv1(x, edge_index))  # Apply the first GATConv layer
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)  # Apply the second GATConv layer
        return x

In [5]:
# from torch_geometric.nn.models import GAT

# Creating a model instance covering heterogeneity
# model = GAT(in_channels=32 ,hidden_channels=32, num_layers=2, out_channels=2)

model = GAT(in_channels=32 ,hidden_channels=32, heads=2, out_channels=2)
model = to_hetero(model, data.metadata(), aggr='sum', debug=True)

opcode         name        target                                args                     kwargs
-------------  ----------  ------------------------------------  -----------------------  ----------------------------------------------
placeholder    x           x                                     ()                       {}
placeholder    edge_index  edge_index                            ()                       {}
call_function  dropout     <function dropout at 0x7f35d19f3ce0>  (x,)                     {'p': 0.6, 'training': True, 'inplace': False}
call_module    conv1       conv1                                 (dropout, edge_index)    {}
call_function  elu         <function elu at 0x7f35d1a00400>      (conv1,)                 {'alpha': 1.0, 'inplace': False}
call_function  dropout_1   <function dropout at 0x7f35d19f3ce0>  (elu,)                   {'p': 0.6, 'training': True, 'inplace': False}
call_module    conv2       conv2                                 (dropout_1, edge_index)  



In [6]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = model.to(device)
data = data.to(device)

def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.feature_dict, data.edge_index_dict)

    train_mask = data['review'].train_mask.to(device)
    label = data['review'].label.to(device)

    logits = out['review'][train_mask.bool()]
    targets = label[train_mask.bool()].long()

    loss = F.cross_entropy(logits, targets)

    loss.backward()
    optimizer.step()
    return loss.item()

n_epochs = 300
progress_bar = tqdm(range(n_epochs), desc='Training')

for epoch in progress_bar:
    loss = train()
    progress_bar.set_postfix({'Loss': f'{loss:.4f}'})

Training: 100%|██████████| 300/300 [00:30<00:00,  9.81it/s, Loss=0.3751]


In [None]:
import torch
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score


def test(model, data):
    model.eval()
    with torch.no_grad():
        out = model(data.feature_dict, data.edge_index_dict)
        scores = torch.softmax(out['review'], dim=1)  # Convert logits to probabilities

    labels = data['review'].label.cpu()
    pred = scores.argmax(dim=1).cpu()

    def calc_metrics(target_mask):
        mask_indices = target_mask.cpu()
        masked_labels = labels[mask_indices.bool()]
        masked_pred = pred[mask_indices.bool()]
        masked_scores = scores[mask_indices.bool()][:, 1].cpu()

        f1 = f1_score(masked_labels, masked_pred, average='macro')
        try:
            auc = roc_auc_score(masked_labels, masked_scores)
            ap = average_precision_score(masked_labels, masked_scores)
        except Exception as e:
            print(e)
            auc, ap = float('nan'), float('nan')  # In case of an exception (like only one class present), return NaN
        return f1, auc, ap

    train_metrics = calc_metrics(data['review'].train_mask)
    val_metrics = calc_metrics(data['review'].val_mask)
    test_metrics = calc_metrics(data['review'].test_mask)

    print('--- Training Metrics ---')
    print(f'F1 Score: {train_metrics[0]:.4f}, AUC: {train_metrics[1]:.4f}, AP: {train_metrics[2]:.4f}')
    
    print('--- Validation Metrics ---')
    print(f'F1 Score: {val_metrics[0]:.4f}, AUC: {val_metrics[1]:.4f}, AP: {val_metrics[2]:.4f}')
    
    print('--- Test Metrics ---')
    print(f'F1 Score: {test_metrics[0]:.4f}, AUC: {test_metrics[1]:.4f}, AP: {test_metrics[2]:.4f}')

    return {'train': train_metrics, 'val': val_metrics, 'test': test_metrics}

_ = test(model, data)

--- Training Metrics ---
F1 Score: 0.4602, AUC: 0.7145, AP: 0.3041
--- Validation Metrics ---
F1 Score: 0.4629, AUC: 0.7165, AP: 0.2910
--- Test Metrics ---
F1 Score: 0.4615, AUC: 0.7146, AP: 0.2907


{'train': (0.4601920648439106, 0.7145413732230615, 0.30406614726013415),
 'val': (0.4629499766245909, 0.7164793646571422, 0.2910083500944018),
 'test': (0.46150989110166585, 0.7145653532011383, 0.2906736540234121)}

In [8]:
# There is multi-step label leakage in training, might not learn anything, might learn something
# This way can avoid using ego network (which cannot be run efficiently)

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Parameter
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.nn.inits import glorot, reset, zeros
from torch_geometric.utils import (add_remaining_self_loops, add_self_loops,
                                   remove_self_loops, softmax)
from torch_scatter import scatter_add


class LAGATConvLayer(MessagePassing):
    def __init__(self,
                 in_channels,
                 out_channels,
                 num_labels,
                 label_embedding_dim,
                 heads=1,
                 concat=True,
                 negative_slope=0.2,
                 dropout=0,
                 bias=False,
                 **kwargs):
        super(LAGATConvLayer, self).__init__(aggr='add', **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        # add label related params
        self.num_labels = num_labels
        self.label_embedding_dim = label_embedding_dim
        self.heads = heads
        self.concat = concat
        self.negative_slope = negative_slope
        self.dropout = dropout

        self.weight = Parameter(torch.Tensor(in_channels,
                                             heads * out_channels))
        # Add learnable label embedding 
        self.label_embs = Parameter(torch.Tensor(self.num_labels, self.label_embedding_dim))

        # add Label Embedding into attention calculation
        self.att = Parameter(torch.Tensor(1, heads, 2 * out_channels + self.label_embedding_dim))

        if bias and concat:
            self.bias = Parameter(torch.Tensor(heads * out_channels))
        elif bias and not concat:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()

    def reset_parameters(self):
        glorot(self.weight)
        glorot(self.att)
        zeros(self.bias)

    def forward(self, x, edge_index, label_mask, size=None):
        """"""
        if size is None and torch.is_tensor(x):
            edge_index, _ = remove_self_loops(edge_index)
            edge_index, _ = add_self_loops(edge_index,
                                           num_nodes=x.size(self.node_dim))
        if torch.is_tensor(x):
            x = torch.matmul(x, self.weight)
        else:
            x = (None if x[0] is None else torch.matmul(x[0], self.weight),
                None if x[1] is None else torch.matmul(x[1], self.weight))

        # print(f"x: {x.shape}, edge_index: {edge_index.shape}, label_mask: {label_mask.shape}")
        return self.propagate(edge_index, size=size, x=x, label_mask=label_mask)

    def message(self, edge_index_i, edge_index_j, x_i, x_j, size_i, label_mask):
        # Compute attention coefficients.
        x_j = x_j.view(-1, self.heads, self.out_channels)
        # index and expanded label_emb to be concatenated
        loop_edge_mask = edge_index_i == edge_index_j
        label_j = label_mask[edge_index_j]
        # Replace labels for loop edges with 0 (index for self.label_embs[0])
        label_j = torch.where(loop_edge_mask, torch.zeros_like(label_j), label_j)
        label_emb = self.label_embs[label_j]
        label_emb = label_emb.unsqueeze(1).repeat(1, self.heads, 1)
        if x_i is None:
            alpha = (torch.cat([x_j, label_emb], dim=-1) * self.att[:, :, self.out_channels:]).sum(dim=-1)
        else:
            x_i = x_i.view(-1, self.heads, self.out_channels)
            alpha = (torch.cat([x_i, x_j, label_emb], dim=-1) * self.att).sum(dim=-1)


        alpha = F.leaky_relu(alpha, self.negative_slope)
        alpha = softmax(alpha, edge_index_i, num_nodes=size_i)

        # Sample attention coefficients stochastically.
        alpha = F.dropout(alpha, p=self.dropout, training=self.training)

        # return (x_j * alpha.view(-1, self.heads, 1)).view(-1, self.heads*self.out_channels)
        output = (x_j * alpha.view(-1, self.heads, 1)).view(-1, self.heads*self.out_channels)
        # print(f"x_i: {x_i.shape}, x_j: {x_j.shape}, label_mask: {label_mask.shape}, alpha: {alpha.shape}, output: {output.shape}")
        return output

    def update(self, aggr_out):
        # if self.concat is True:
        #     aggr_out = aggr_out.view(-1, self.heads * self.out_channels)
        # else:
        #     aggr_out = aggr_out.mean(dim=1)

        if self.bias is not None:
            aggr_out = aggr_out + self.bias
        return aggr_out

    def __repr__(self):
        return '{}({}, {}, heads={})'.format(self.__class__.__name__,
                                             self.in_channels,
                                             self.out_channels, self.heads)