In [2]:
import torch
print(torch.__version__)

2.6.0


✅ What this script does

✅ Creates a transaction graph from your example.
✅ Assigns example node features.
✅ Builds a PyTorch Geometric Data object.
✅ Defines and trains a small GraphSAGE GNN.
✅ Prints all key tables (features, edge index, final suspicion scores).

In [5]:
import torch
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import SAGEConv
import pandas as pd
import networkx as nx
from sklearn.preprocessing import LabelEncoder

# ---------------------------------
# Step 1: Create transactions data
# ---------------------------------
df = pd.DataFrame({
    'from_account': ["A", "A", "B", "C"],
    'to_account':   ["B", "C", "D", "D"],
    'amount': [500, 200, 700, 300]
})

print("🔎 Transactions table:")
print(df, '\n')



🔎 Transactions table:
  from_account to_account  amount
0            A          B     500
1            A          C     200
2            B          D     700
3            C          D     300 



In [6]:
# ---------------------------------
# Step 2: Create graph and account list
# ---------------------------------
accounts = pd.unique(df[['from_account', 'to_account']].values.ravel())
account_df = pd.DataFrame({'account': accounts})

# Assign dummy node features
account_df['avg_amount'] = [350, 500, 200, 0]  # Placeholder features
account_df['num_tx'] = [2, 1, 1, 0]
account_df['risk_score'] = [0.2, 0.3, 0.3, 0.9]

print("📄 Account features before encoding:")
print(account_df, '\n')

# Encode account names to indices
encoder = LabelEncoder()
account_df['account_idx'] = encoder.fit_transform(account_df['account'])

# Build edge index
edge_index = torch.tensor(
    df[['from_account', 'to_account']]
    .replace(dict(zip(encoder.classes_, encoder.transform(encoder.classes_))))
    .values.T,
    dtype=torch.long
)

# Create node feature matrix
x = torch.tensor(account_df[['avg_amount', 'num_tx', 'risk_score']].values, dtype=torch.float)

# Labels (1 = suspicious, 0 = normal)
labels = torch.tensor([0, 0, 0, 1], dtype=torch.long)  # D is suspicious

print("🟢 Node features matrix (x):")
print(x, '\n')

print("🔗 Edge index:")
print(edge_index, '\n')

📄 Account features before encoding:
  account  avg_amount  num_tx  risk_score
0       A         350       2         0.2
1       B         500       1         0.3
2       C         200       1         0.3
3       D           0       0         0.9 

🟢 Node features matrix (x):
tensor([[3.5000e+02, 2.0000e+00, 2.0000e-01],
        [5.0000e+02, 1.0000e+00, 3.0000e-01],
        [2.0000e+02, 1.0000e+00, 3.0000e-01],
        [0.0000e+00, 0.0000e+00, 9.0000e-01]]) 

🔗 Edge index:
tensor([[0, 0, 1, 2],
        [1, 2, 3, 3]]) 



  .replace(dict(zip(encoder.classes_, encoder.transform(encoder.classes_))))


In [7]:

# ---------------------------------
# Step 3: Create PyG data object
# ---------------------------------
data = Data(x=x, edge_index=edge_index, y=labels)

# ---------------------------------
# Step 4: Define a simple GraphSAGE model
# ---------------------------------
class GNN(torch.nn.Module):
    def __init__(self):
        super(GNN, self).__init__()
        self.conv1 = SAGEConv(3, 8)  # input features 3 → hidden 8
        self.conv2 = SAGEConv(8, 2)  # hidden 8 → output classes 2

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

model = GNN()

In [8]:

# ---------------------------------
# Step 4: Define a simple GraphSAGE model
# ---------------------------------
class GNN(torch.nn.Module):
    def __init__(self):
        super(GNN, self).__init__()
        self.conv1 = SAGEConv(3, 8)  # input features 3 → hidden 8
        self.conv2 = SAGEConv(8, 2)  # hidden 8 → output classes 2

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

model = GNN()

In [9]:

# ---------------------------------
# Step 5: Train the GNN
# ---------------------------------
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

model.train()
for epoch in range(50):
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = criterion(out, data.y)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

Epoch 0, Loss: 1.1621
Epoch 10, Loss: 0.0000
Epoch 20, Loss: 0.0000
Epoch 30, Loss: 0.0000
Epoch 40, Loss: 0.0000


In [10]:
# ---------------------------------
# Step 6: Get predictions and scores
# ---------------------------------
model.eval()
out = model(data.x, data.edge_index)
probs = F.softmax(out, dim=1)[:, 1].detach().numpy()

account_df['suspicion_score'] = probs

print("\n✅ Final account table with suspicion scores:")
print(account_df[['account', 'suspicion_score']])


✅ Final account table with suspicion scores:
  account  suspicion_score
0       A     1.797408e-15
1       B     0.000000e+00
2       C     3.054020e-15
3       D     1.000000e+00
