In [1]:
import data.fraud_dataset as fraud_dataset

In [22]:
DATASET_NAME = "amazon"
TRAIN_SIZE = 0.4
VAL_SIZE = 0.1
RANDOM_SEED = 42
FORCE_RELOAD = False

if DATASET_NAME == "yelp":
    NODE_TYPE = "review"
    FEATURE_SIZE = 32
else:
    NODE_TYPE = "user"
    FEATURE_SIZE = 25

In [13]:
fraud_data = fraud_dataset.FraudDataset(
    DATASET_NAME, 
    train_size=TRAIN_SIZE, 
    val_size=VAL_SIZE, 
    random_seed=RANDOM_SEED, 
    force_reload=FORCE_RELOAD
)
graph = fraud_data[0]

Done loading data from cached files.


In [14]:
from torch_geometric.utils import from_dgl

data = from_dgl(graph)
data.metadata

<bound method HeteroData.metadata of HeteroData(
  user={
    test_mask=[11944],
    val_mask=[11944],
    train_mask=[11944],
    label=[11944],
    feature=[11944, 25],
  },
  (user, net_upu, user)={ edge_index=[2, 351216] },
  (user, net_usu, user)={ edge_index=[2, 7132958] },
  (user, net_uvu, user)={ edge_index=[2, 2073474] }
)>

In [15]:
data.feature_dict

{'user': tensor([[ 1.0000, 26.0000,  0.0000,  ...,  1.0000, 13.0000,  1.0000],
         [ 4.0000, 17.0000,  0.0000,  ...,  0.0000, 45.0000,  1.0000],
         [ 2.0000, 15.0000,  0.0000,  ...,  1.0000, 24.5000,  1.0000],
         ...,
         [ 1.0000, 10.0000,  0.0000,  ...,  1.0000, 15.0000,  1.0000],
         [ 1.0000, 10.0000,  0.0000,  ...,  1.0000, 88.0000,  1.0000],
         [ 1.0000, 10.0000,  0.0000,  ...,  1.0000, 31.0000,  1.0000]])}

In [16]:
import torch

def mask_label(data, observed_pct=1):
    # Ensure observed_pct is a value between 0 and 1
    assert 0 <= observed_pct <= 1, "observed_pct must be between 0 and 1"
    
    # Create a copy of the labels to modify
    label_mask = data[NODE_TYPE].label.clone()
    unknown_encoding = -1

    # Mask all validation and test labels
    label_mask[data[NODE_TYPE].val_mask.bool()] = unknown_encoding
    label_mask[data[NODE_TYPE].test_mask.bool()] = unknown_encoding

    # Identify the indices of the training data
    train_indices = data[NODE_TYPE].train_mask.nonzero(as_tuple=False).squeeze()

    # Calculate the number of training labels to mask
    num_train_labels = train_indices.size(0)
    num_to_mask = int((1 - observed_pct) * num_train_labels)

    # Randomly select indices to mask
    mask_indices = train_indices[torch.randperm(num_train_labels)[:num_to_mask]]
    print(mask_indices)
    label_mask[mask_indices] = unknown_encoding

    return label_mask

# Example usage
masked_labels = mask_label(data, 0.7)
print(masked_labels)
print((masked_labels == -1).float().mean())  # Prints the fraction of labels that are masked

tensor([10696, 11310,  4330,  ...,  7641, 10028,  9184])
tensor([ 0,  0,  0,  ..., -1, -1, -1])
tensor(0.5208)


In [17]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv, to_hetero
from torch_geometric.data import HeteroData
from torch_geometric.loader import DataLoader

In [23]:
from torch_geometric.nn.models import GAT

# Creating a model instance covering heterogeneity
model = GAT(in_channels=FEATURE_SIZE ,hidden_channels=32, num_layers=2, out_channels=2)
model = to_hetero(model, data.metadata(), aggr='sum', debug=True)

opcode         name                       target                       args                                                                                      kwargs
-------------  -------------------------  ---------------------------  ----------------------------------------------------------------------------------------  ------------------------
placeholder    x                          x                            ()                                                                                        {}
placeholder    edge_index                 edge_index                   ()                                                                                        {}
placeholder    edge_weight                edge_weight                  (None,)                                                                                   {}
placeholder    edge_attr                  edge_attr                    (None,)                                                                            

In [24]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.feature_dict, data.edge_index_dict)
    loss = F.cross_entropy(out[NODE_TYPE][data[NODE_TYPE].train_mask], data[NODE_TYPE].label[data[NODE_TYPE].train_mask])
    loss.backward()
    optimizer.step()
    return float(loss)

for epoch in range(100):
    loss = train()
    print(f'Epoch {epoch+1}, Loss: {loss:.4f}')

  loss = F.cross_entropy(out[NODE_TYPE][data[NODE_TYPE].train_mask], data[NODE_TYPE].label[data[NODE_TYPE].train_mask])
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch 1, Loss: 96.4808
Epoch 2, Loss: 79.7190
Epoch 3, Loss: 268.6375
Epoch 4, Loss: 127.8856
Epoch 5, Loss: 84.8860
Epoch 6, Loss: 97.7253
Epoch 7, Loss: 99.8022
Epoch 8, Loss: 108.9555
Epoch 9, Loss: 114.6783
Epoch 10, Loss: 116.4046
Epoch 11, Loss: 114.2737
Epoch 12, Loss: 109.1056
Epoch 13, Loss: 101.4709
Epoch 14, Loss: 91.6444
Epoch 15, Loss: 79.8691
Epoch 16, Loss: 66.4446
Epoch 17, Loss: 51.7923
Epoch 18, Loss: 36.7540
Epoch 19, Loss: 32.5217
Epoch 20, Loss: 51.5389
Epoch 21, Loss: 59.3634
Epoch 22, Loss: 41.8961
Epoch 23, Loss: 26.1862
Epoch 24, Loss: 26.2061
Epoch 25, Loss: 31.5052
Epoch 26, Loss: 35.1098
Epoch 27, Loss: 36.0087
Epoch 28, Loss: 34.2883
Epoch 29, Loss: 30.2484
Epoch 30, Loss: 24.1128
Epoch 31, Loss: 17.0306
Epoch 32, Loss: 17.3687
Epoch 33, Loss: 25.7753
Epoch 34, Loss: 20.0646
Epoch 35, Loss: 11.8743
Epoch 36, Loss: 13.7569
Epoch 37, Loss: 16.0990
Epoch 38, Loss: 16.3863
Epoch 39, Loss: 14.6222
Epoch 40, Loss: 11.8070
Epoch 41, Loss: 11.0573
Epoch 42, Loss: 1

In [27]:
import torch
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score

def test(model, data):
    model.eval()
    with torch.no_grad():
        out = model(data.feature_dict, data.edge_index_dict)
        scores = torch.softmax(out[NODE_TYPE], dim=1)  # Convert logits to probabilities

    labels = data[NODE_TYPE].label.cpu()
    pred = scores.argmax(dim=1).cpu()

    def calc_metrics(target_mask):
        mask_indices = target_mask.cpu().bool()
        masked_labels = labels[mask_indices]
        masked_pred = pred[mask_indices]
        masked_scores = scores[mask_indices]

        f1 = f1_score(masked_labels, masked_pred, average='macro')
        print(masked_scores.shape)
        try:
            binary_labels = torch.nn.functional.one_hot(masked_labels, num_classes=scores.size(-1)).numpy()
            auc = roc_auc_score(binary_labels, masked_scores.numpy())
            ap = average_precision_score(binary_labels, masked_scores.numpy(), average='macro')
        except Exception as e:
            auc, ap = float('nan'), float('nan')  # In case of an exception (like only one class present), return NaN
        return f1, auc, ap

    train_metrics = calc_metrics(data[NODE_TYPE].train_mask)
    val_metrics = calc_metrics(data[NODE_TYPE].val_mask)
    test_metrics = calc_metrics(data[NODE_TYPE].test_mask)

    print('--- Training Metrics ---')
    print(f'F1 Score: {train_metrics[0]:.4f}, AUC: {train_metrics[1]:.4f}, AP: {train_metrics[2]:.4f}')
    
    print('--- Validation Metrics ---')
    print(f'F1 Score: {val_metrics[0]:.4f}, AUC: {val_metrics[1]:.4f}, AP: {val_metrics[2]:.4f}')
    
    print('--- Test Metrics ---')
    print(f'F1 Score: {test_metrics[0]:.4f}, AUC: {test_metrics[1]:.4f}, AP: {test_metrics[2]:.4f}')

    return {'train': train_metrics, 'val': val_metrics, 'test': test_metrics}


test(model, data)

torch.Size([3455, 2])
torch.Size([863, 2])
torch.Size([4321, 2])
--- Training Metrics ---
F1 Score: 0.6937, AUC: 0.7932, AP: 0.6719
--- Validation Metrics ---
F1 Score: 0.6617, AUC: 0.7612, AP: 0.6328
--- Test Metrics ---
F1 Score: 0.6756, AUC: 0.7651, AP: 0.6431


{'train': (0.693680656054028, 0.7932156993563724, 0.6719451908941052),
 'val': (0.6616604843330619, 0.7612467628560858, 0.6327724655386606),
 'test': (0.6756249530816005, 0.7651458617706912, 0.6430612805404439)}

In [None]:
class GAT(BasicGNN):
    r"""The Graph Neural Network from `"Graph Attention Networks"
    <https://arxiv.org/abs/1710.10903>`_ or `"How Attentive are Graph Attention
    Networks?" <https://arxiv.org/abs/2105.14491>`_ papers, using the
    :class:`~torch_geometric.nn.GATConv` or
    :class:`~torch_geometric.nn.GATv2Conv` operator for message passing,
    respectively.

    Args:
        in_channels (int or tuple): Size of each input sample, or :obj:`-1` to
            derive the size from the first input(s) to the forward method.
            A tuple corresponds to the sizes of source and target
            dimensionalities.
        hidden_channels (int): Size of each hidden sample.
        num_layers (int): Number of message passing layers.
        out_channels (int, optional): If not set to :obj:`None`, will apply a
            final linear transformation to convert hidden node embeddings to
            output size :obj:`out_channels`. (default: :obj:`None`)
        v2 (bool, optional): If set to :obj:`True`, will make use of
            :class:`~torch_geometric.nn.conv.GATv2Conv` rather than
            :class:`~torch_geometric.nn.conv.GATConv`. (default: :obj:`False`)
        dropout (float, optional): Dropout probability. (default: :obj:`0.`)
        act (str or Callable, optional): The non-linear activation function to
            use. (default: :obj:`"relu"`)
        act_first (bool, optional): If set to :obj:`True`, activation is
            applied before normalization. (default: :obj:`False`)
        act_kwargs (Dict[str, Any], optional): Arguments passed to the
            respective activation function defined by :obj:`act`.
            (default: :obj:`None`)
        norm (str or Callable, optional): The normalization function to
            use. (default: :obj:`None`)
        norm_kwargs (Dict[str, Any], optional): Arguments passed to the
            respective normalization function defined by :obj:`norm`.
            (default: :obj:`None`)
        jk (str, optional): The Jumping Knowledge mode. If specified, the model
            will additionally apply a final linear transformation to transform
            node embeddings to the expected output feature dimensionality.
            (:obj:`None`, :obj:`"last"`, :obj:`"cat"`, :obj:`"max"`,
            :obj:`"lstm"`). (default: :obj:`None`)
        **kwargs (optional): Additional arguments of
            :class:`torch_geometric.nn.conv.GATConv` or
            :class:`torch_geometric.nn.conv.GATv2Conv`.
    """
    supports_edge_weight: Final[bool] = False
    supports_edge_attr: Final[bool] = True
    supports_norm_batch: Final[bool]

    def init_conv(self, in_channels: Union[int, Tuple[int, int]],
                  out_channels: int, **kwargs) -> MessagePassing:

        v2 = kwargs.pop('v2', False)
        heads = kwargs.pop('heads', 1)
        concat = kwargs.pop('concat', True)

        # Do not use concatenation in case the layer `GATConv` layer maps to
        # the desired output channels (out_channels != None and jk != None):
        if getattr(self, '_is_conv_to_out', False):
            concat = False

        if concat and out_channels % heads != 0:
            raise ValueError(f"Ensure that the number of output channels of "
                             f"'GATConv' (got '{out_channels}') is divisible "
                             f"by the number of heads (got '{heads}')")

        if concat:
            out_channels = out_channels // heads

        Conv = GATConv if not v2 else GATv2Conv
        return Conv(in_channels, out_channels, heads=heads, concat=concat,
                    dropout=self.dropout.p, **kwargs)
