In [1]:
# install libraries
!pip install torch-scatter torch-sparse torch-geometric networkx pandas scikit-learn

Collecting torch-scatter
  Downloading torch_scatter-2.1.2.tar.gz (108 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.0/108.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting torch-sparse
  Downloading torch_sparse-0.6.18.tar.gz (209 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m210.0/210.0 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting torch-geometric
  Downloading torch_geometric-2.7.0-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.7/63.7 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.7.0-py3-none-any.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m46.0 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: torch-scatter, torch-sparse
  Building wheel for to

In [2]:
# Creating DataSet
import pandas as pd

data = {
    "user_id": ["U1","U1","U2","U2","U3","U3"],
    "account_id": ["A1","A1","A2","A2","A3","A3"],
    "transaction_id": ["T1","T2","T3","T4","T5","T6"],
    "amount": [2000, 50000, 3000, 45000, 1500, 60000],
    "is_fraud": [0,1,0,1,0,1]
}

df = pd.DataFrame(data)
df

Unnamed: 0,user_id,account_id,transaction_id,amount,is_fraud
0,U1,A1,T1,2000,0
1,U1,A1,T2,50000,1
2,U2,A2,T3,3000,0
3,U2,A2,T4,45000,1
4,U3,A3,T5,1500,0
5,U3,A3,T6,60000,1


In [3]:
#Build Transaction Graph (NetworkX)
import networkx as nx

G = nx.Graph()

for _, row in df.iterrows():
    user = f"user_{row['user_id']}"
    account = f"account_{row['account_id']}"
    txn = f"txn_{row['transaction_id']}"

    G.add_node(user, node_type="user")
    G.add_node(account, node_type="account")
    G.add_node(txn, node_type="transaction",
               amount=row["amount"],
               label=row["is_fraud"])

    G.add_edge(user, account)
    G.add_edge(account, txn)

print("Number of nodes:", G.number_of_nodes())
print("Number of edges:", G.number_of_edges())

Number of nodes: 12
Number of edges: 9


In [4]:
#PyTorch Geometric
import torch
from torch_geometric.data import Data

node_map = {node: i for i, node in enumerate(G.nodes())}

edges = []
for u, v in G.edges():
    edges.append([node_map[u], node_map[v]])
    edges.append([node_map[v], node_map[u]])

edge_index = torch.tensor(edges, dtype=torch.long).t()

x = []
y = []

for node, attr in G.nodes(data=True):
    if attr["node_type"] == "transaction":
        x.append([attr["amount"]])
        y.append(attr["label"])
    else:
        x.append([0.0])
        y.append(-1)

x = torch.tensor(x, dtype=torch.float)
y = torch.tensor(y, dtype=torch.long)

data = Data(x=x, edge_index=edge_index, y=y)
data

Data(x=[12, 1], edge_index=[2, 18], y=[12])

In [5]:
#GNN Model (GCN)
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class FraudGCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(1, 16)
        self.conv2 = GCNConv(16, 2)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

In [6]:
#Train the Model
model = FraudGCN()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

mask = data.y != -1

for epoch in range(100):
    model.train()
    optimizer.zero_grad()

    out = model(data)
    loss = criterion(out[mask], data.y[mask])

    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

Epoch 0, Loss: 2623.2312
Epoch 10, Loss: 482.3283
Epoch 20, Loss: 178.6290
Epoch 30, Loss: 44.6407
Epoch 40, Loss: 10.9455
Epoch 50, Loss: 77.7170
Epoch 60, Loss: 33.4998
Epoch 70, Loss: 56.1180
Epoch 80, Loss: 10.6700
Epoch 90, Loss: 20.6648


In [7]:
#Evaluate Model
from sklearn.metrics import precision_score, recall_score, roc_auc_score
model.eval()
with torch.no_grad():
    logits = model(data)
    preds = logits[mask].argmax(dim=1)
    probs = torch.softmax(logits[mask], dim=1)[:, 1]
print("Precision:", precision_score(data.y[mask], preds))
print("Recall:", recall_score(data.y[mask], preds))
print("AUC:", roc_auc_score(data.y[mask], probs))

Precision: 0.5
Recall: 1.0
AUC: 0.5


In [8]:
embeddings = model.conv1(data.x, data.edge_index)
embeddings

tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  3.6448e-01, -4.1056e-01,
         -8.1204e-03, -1.8575e-01,  0.0000e+00,  0.0000e+00,  0.0000e+00,
          0.0000e+00, -4.0070e-01,  3.7577e-01,  0.0000e+00, -3.8857e-01,
          0.0000e+00],
        [-6.5272e+02, -1.0311e+04, -9.1484e+03,  6.9045e+03,  9.8595e+03,
         -1.5648e+02, -1.5992e+03, -2.9215e+03, -8.6133e+03, -8.7418e+03,
         -7.1596e+03,  7.5821e+03,  7.5280e+03, -4.2300e+03,  1.6989e+03,
         -3.8134e+03],
        [-3.5503e+01, -5.6085e+02, -4.9761e+02,  3.7590e+02,  5.3590e+02,
         -8.5189e+00, -8.7163e+01, -1.5891e+02, -4.6850e+02, -4.7549e+02,
         -3.8943e+02,  4.1203e+02,  4.0982e+02, -2.3008e+02,  9.2040e+01,
         -2.0742e+02],
        [-8.8758e+02, -1.4021e+04, -1.2440e+04,  9.3888e+03,  1.3407e+04,
         -2.1278e+02, -2.1746e+03, -3.9728e+03, -1.1712e+04, -1.1887e+04,
         -9.7357e+03,  1.0310e+04,  1.0237e+04, -5.7520e+03,  2.3103e+03,
         -5.1855e+03],
        [ 0.0000e+00