Fraud detection often involves identifying suspicious activity in transaction networks, where entities (like users, accounts) and interactions (like money transfers) form a graph. Graph Neural Networks (GNNs) can capture complex inter-entity relationships and detect fraud by learning structural patterns. In this project, I’ll use a GCN model on a synthetic transaction graph to classify nodes as fraudulent or not.

In [1]:
pip install torch-geometric networkx matplotlib

Collecting torch-geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m56.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-geometric
Successfully installed torch-geometric-2.6.1


In [2]:
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import networkx as nx
import matplotlib.pyplot as plt
import random


In [3]:
# Create a Barabási–Albert (scale-free) graph with 50 nodes.
# Each new node attaches to 2 existing nodes with probability ∝ degree.
# Think of nodes as “accounts” and edges as “transactions.”

# 1.Simulating a small transaction network
G = nx.barabasi_albert_graph(50, 2)  # 50 accounts, preferential attachment

In [4]:
# 2. Create synthetic labels for fraud detection
# ------------------------------------------------------------------------

# Initialize a tensor of zeros (all “legit”): shape (50,), dtype long for classification
labels = torch.zeros(50, dtype=torch.long)

# Randomly pick 5 nodes (10%) to be “fraudulent” (label=1)
fraud_nodes = random.sample(range(50), 5)

labels[fraud_nodes] = 1

In [5]:
# 3. Convert the NetworkX graph into PyTorch Geometric format
# Edge list as a tensor of shape (2, num_edges)
edge_index = torch.tensor(list(G.edges), dtype=torch.long).t().contiguous()
# Since we want an undirected graph in GCN, add the reverse direction of each edge
edge_index = torch.cat([edge_index, edge_index.flip(0)], dim=1)  # make undirected

In [6]:
# 4. Create node features
# ------------------------------------------------------------------------

# For simplicity: random 16-dimensional features per node.
# In a real system, these could be transaction volumes, activity counts, etc.
x = torch.rand((50, 16))

# ------------------------------------------------------------------------
# 5. Define train/test splits
# ------------------------------------------------------------------------

# Boolean masks of shape (50,), True for training nodes, True for test nodes
train_mask = torch.zeros(50, dtype=torch.bool)
test_mask = torch.zeros(50, dtype=torch.bool)

# First 35 nodes for training, remaining 15 for testing
train_mask[:35] = True
test_mask[35:] = True

In [7]:
# 6. Bundle data into a single PyG Data object
# ------------------------------------------------------------------------

data = Data(x=x, edge_index=edge_index, y=labels, train_mask=train_mask, test_mask=test_mask)

In [8]:
# 7. Define a simple 2-layer Graph Convolutional Network (GCN)
# -----------------------------------------------------------------------
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        # First GCN layer:  in_channels → hidden_channels
        self.conv1 = GCNConv(in_channels, hidden_channels)
        # Second GCN layer: hidden_channels → out_channels (here 2 classes)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.3, training=self.training)
        return self.conv2(x, edge_index)

In [9]:
# 8. Setup device, model, optimizer, and loss
# ------------------------------------------------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GCN(16, 32, 2).to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = torch.nn.CrossEntropyLoss()

In [10]:
# 5. Train function
def train():
    model.train()  # putting model in training mode
    optimizer.zero_grad()   # clearing previous gradients
    # Forward pass: compute logits for all nodes
    out = model(data.x, data.edge_index)
    # Compute loss only on the training nodes
    loss = loss_fn(out[data.train_mask], data.y[data.train_mask])
    # Backward pass + optimizer step
    loss.backward()
    optimizer.step()
    return loss.item()

In [11]:
# 6. Test function
def test():
    model.eval()   # evaluation mode (disables dropout)
    out = model(data.x, data.edge_index)
    pred = out.argmax(dim=1)     # pick class with highest logit
    # Count correct predictions on the test set
    correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
    total = data.test_mask.sum()
    return int(correct) / int(total)

In [12]:
# 7. Runing training loop
for epoch in range(1, 31):
    loss = train()
    acc = test()
    print(f"Epoch {epoch:02d}, Loss: {loss:.4f}, Test Accuracy: {acc:.4f}")

Epoch 01, Loss: 1.0517, Test Accuracy: 0.0667
Epoch 02, Loss: 0.7578, Test Accuracy: 0.9333
Epoch 03, Loss: 0.6470, Test Accuracy: 0.9333
Epoch 04, Loss: 0.5610, Test Accuracy: 0.9333
Epoch 05, Loss: 0.4761, Test Accuracy: 0.9333
Epoch 06, Loss: 0.4408, Test Accuracy: 0.9333
Epoch 07, Loss: 0.4168, Test Accuracy: 0.9333
Epoch 08, Loss: 0.3996, Test Accuracy: 0.9333
Epoch 09, Loss: 0.4257, Test Accuracy: 0.9333
Epoch 10, Loss: 0.4293, Test Accuracy: 0.9333
Epoch 11, Loss: 0.4129, Test Accuracy: 0.9333
Epoch 12, Loss: 0.4159, Test Accuracy: 0.9333
Epoch 13, Loss: 0.4223, Test Accuracy: 0.9333
Epoch 14, Loss: 0.4520, Test Accuracy: 0.9333
Epoch 15, Loss: 0.4604, Test Accuracy: 0.9333
Epoch 16, Loss: 0.4643, Test Accuracy: 0.9333
Epoch 17, Loss: 0.4647, Test Accuracy: 0.9333
Epoch 18, Loss: 0.4446, Test Accuracy: 0.9333
Epoch 19, Loss: 0.4331, Test Accuracy: 0.9333
Epoch 20, Loss: 0.4420, Test Accuracy: 0.9333
Epoch 21, Loss: 0.4184, Test Accuracy: 0.9333
Epoch 22, Loss: 0.4284, Test Accur