BLOCK 1: Mount Google Drive & Load Dataset

In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Load dataset using pandas
import pandas as pd

# Dataset
csv_path = '/content/drive/MyDrive/TGAT Model/clean_combined_dataset.csv'
df = pd.read_csv(csv_path)

# Show first few rows
print(df.head())

# Show useful info about columns & data types
print(df.info())


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
   Unnamed: 0.1                                        from  \
0      10159229  0x219c5355f7496c47e743f5a6d98527509ea42444   
1      10010990  0x21a1662d90d163f79f9e71fda42c60926e80699c   
2      10159517  0xcbe64fb9fdee1eb4172d2bc375c12ace497ac253   
3      13323550  0x007077061537f25eaf485a1e6fa4af64e883be98   
4      13323549  0x7a44dbe0d1823cd177a9b4c35899046190811fb3   

                                           to  amount     timestamp  \
0  0xaaaf91d9b90df800df4f55c205fd6989c977e73a     0.0  1.494145e+09   
1  0xaaaf91d9b90df800df4f55c205fd6989c977e73a     0.0  1.494196e+09   
2  0xaaaf91d9b90df800df4f55c205fd6989c977e73a     0.0  1.494145e+09   
3  0xf0f8b0b8dbb1124261fc8d778e2287e3fd2cf4f5     0.0  1.494185e+09   
4  0xf0f8b0b8dbb1124261fc8d778e2287e3fd2cf4f5     0.0  1.494184e+09   

   fromIsPhi  toIsPhi                 date  Unnamed: 0  
0       

BLOCK 2: Preprocess Data & Split by Month

In [2]:
def preprocess(df):
    # Drop unwanted index columns
    df.drop(columns=['Unnamed: 0.1', 'Unnamed: 0'], inplace=True, errors='ignore')

    # Convert 'date' string column to datetime format
    df['date'] = pd.to_datetime(df['date'], format='mixed', errors='coerce')

    # Drop rows with invalid dates
    df.dropna(subset=['date'], inplace=True)

    # Sort by timestamp for time consistency
    df.sort_values(by='timestamp', inplace=True)
    df.reset_index(drop=True, inplace=True)  # Optional but clean

    # Create 'month' column for splitting
    df['month'] = df['date'].dt.to_period("M")

    # Split DataFrame into dictionary of monthly splits
    graph_splits = dict(tuple(df.groupby('month')))

    # Debug prints
    print("‚úÖ Dataset Preprocessed")
    print("üìÜ Total months found:", len(graph_splits))
    print(f"üóìÔ∏è Found {len(graph_splits)} months:", list(graph_splits.keys()))
    print(f"üßæ Total transactions: {len(df)}")

    return graph_splits

graph_splits = preprocess(df)

‚úÖ Dataset Preprocessed
üìÜ Total months found: 23
üóìÔ∏è Found 23 months: [Period('2016-11', 'M'), Period('2017-03', 'M'), Period('2017-05', 'M'), Period('2017-06', 'M'), Period('2017-07', 'M'), Period('2017-08', 'M'), Period('2017-09', 'M'), Period('2017-10', 'M'), Period('2017-11', 'M'), Period('2017-12', 'M'), Period('2018-01', 'M'), Period('2018-02', 'M'), Period('2018-03', 'M'), Period('2018-04', 'M'), Period('2018-05', 'M'), Period('2018-06', 'M'), Period('2018-07', 'M'), Period('2018-08', 'M'), Period('2018-09', 'M'), Period('2018-10', 'M'), Period('2018-11', 'M'), Period('2018-12', 'M'), Period('2019-01', 'M')]
üßæ Total transactions: 69486


BLOCK 3: Build Directed Temporal Graph

In [3]:
import networkx as nx

def build_graph_from_month(df_month):
    G = nx.DiGraph()

    for _, row in df_month.iterrows():
        # Add edge with attributes
        G.add_edge(row['from'], row['to'], amount=row['amount'], timestamp=row['timestamp'])

        for node in [row['from'], row['to']]:
            G.nodes[node]['is_phishing'] = max(G.nodes[node].get('is_phishing', 0), row['fromIsPhi'] if node == row['from'] else row['toIsPhi'])
            G.nodes[node]['timestamp'] = row['timestamp']

    return G


BLOCK 4: Add 14 Node + 3 Edge Features (PDTGA Section 4.4)

In [4]:
def add_features(G):
    for node in G.nodes():
        # 1Ô∏è‚É£ Inbound and outbound edges
        in_edges = list(G.in_edges(node))
        out_edges = list(G.out_edges(node))

        in_deg = len(in_edges)
        out_deg = len(out_edges)

        recv_amt = [G[u][v]['amount'] for u, v in in_edges]
        sent_amt = [G[u][v]['amount'] for u, v in out_edges]

        total_recv = sum(recv_amt)
        total_sent = sum(sent_amt)

        avg_recv = total_recv / len(recv_amt) if recv_amt else 0.0
        avg_sent = total_sent / len(sent_amt) if sent_amt else 0.0

        balance = total_recv - total_sent

        # 2Ô∏è‚É£ Additional temporal edge features
        all_edges = in_edges + out_edges
        edge_amounts = [G[u][v]['amount'] for u, v in all_edges]
        timestamps = [G[u][v]['timestamp'] for u, v in all_edges]

        mean_amt = sum(edge_amounts) / len(edge_amounts) if edge_amounts else 0.0
        min_ts = min(timestamps) if timestamps else 0.0
        max_ts = max(timestamps) if timestamps else 0.0

        # 3Ô∏è‚É£ Assemble all 17 features: 14 static + 3 temporal edge-based
        G.nodes[node]['x'] = [
            in_deg, out_deg,
            len(recv_amt), len(sent_amt),
            total_recv, total_sent,
            avg_recv, avg_sent,
            balance,
            0, 0, 0, 0, 0,  # üß© Padding (features 10‚Äì14)
            mean_amt, min_ts, max_ts  # üïí Temporal edge-derived (15‚Äì17)
        ]

        # Needed for TGAT's œÜ(t)
        G.nodes[node]['timestamp'] = max_ts


Info 1:

So the training process should be:

In [5]:
# [Graph Data] ‚ûù Temporal Graph Attention Layer ‚ûù Node Embeddings ‚ûù MLP ‚ûù Phishing Classification


In [6]:
!pip install torch-geometric



BLOCK 5: Convert NetworkX ‚ûù PyTorch Geometric Data Object

In [7]:
from torch_geometric.data import Data
from torch_geometric.utils import from_networkx
import torch

def convert_to_pyg(G):
    # Extract node features and timestamp
    pyg_data = from_networkx(G, group_node_attrs=['x', 'timestamp'])
    pyg_data.x = pyg_data.x.float()  # [N, 17]
    pyg_data.x = (pyg_data.x - pyg_data.x.mean(dim=0)) / (pyg_data.x.std(dim=0) + 1e-5)   # Normalize Features

    # Build edge_index and edge timestamps manually
    node_list = list(G.nodes())
    node_id_map = {node: i for i, node in enumerate(node_list)}

    # Build edge_index manually
    edge_index = []
    edge_timestamps = []

    for u, v, attr in G.edges(data=True):
        edge_index.append([node_id_map[u], node_id_map[v]])
        edge_timestamps.append(attr['timestamp'])

    pyg_data.edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
    pyg_data.edge_timestamp = torch.tensor(edge_timestamps, dtype=torch.float)

    # Label tensor
    pyg_data.y = torch.tensor(
        [G.nodes[n]['is_phishing'] for n in node_list],
        dtype=torch.long
    )

    # Timestamp tensor
    pyg_data.timestamp = torch.tensor(
        [G.nodes[n]['timestamp'] for n in node_list],
        dtype=torch.float
    )

    return pyg_data


BLOCK 6.1: Temporal Encoding œÜ(t) (Section 3.2)

In [8]:
import torch
import numpy as np

def time_encoding(t_diff, d_model=16, max_time_scale=10000.0):
    """
    Sinusoidal time encoding as described in PDTGA (TGAT-based).

    Args:
        t_diff (Tensor): Tensor of shape [num_edges] or [batch_size]
        d_model (int): Dimension of the time embedding (should match TGAT time_dim)
        max_time_scale (float): Scaling constant for frequency range

    Returns:
        Tensor: Time embedding of shape [num_edges, d_model]
    """
    # Ensure float32 for precision
    t_diff = t_diff.float().unsqueeze(1)  # [E, 1]

    # Generate frequency base (same as in transformers)
    div_term = torch.exp(
        torch.arange(0, d_model, 2, dtype=torch.float32) * (-np.log(max_time_scale) / d_model)
    )  # shape: [d_model // 2]

    # Compute sinusoidal encoding
    sinusoid = torch.zeros((t_diff.shape[0], d_model), device=t_diff.device)
    sinusoid[:, 0::2] = torch.sin(t_diff * div_term)  # even indices
    sinusoid[:, 1::2] = torch.cos(t_diff * div_term)  # odd indices
    return sinusoid  # [num_edges, d_model]


BLOCK 6.2: 2-Layer TGAT Module

In [9]:
from torch_geometric.nn import GATConv
import torch.nn as nn
import torch.nn.functional as F

class TGATLayer(nn.Module):
    def __init__(self, in_dim, out_dim, time_dim=16):
        super().__init__()
        self.time_dim = time_dim
        self.attn = GATConv(in_dim + time_dim, out_dim)

    def forward(self, x, edge_index, src_ts, dst_ts):
        t_diff = dst_ts - src_ts  # [E]
        t_enc = time_encoding(t_diff, d_model=self.time_dim)  # [E, time_dim]
        x_src = x[edge_index[0]]  # Only source node features for each edge: [E, F]

        # üü¶ Augment only the source node features
        x_aug = torch.cat([x, torch.zeros(x.size(0), self.time_dim, device=x.device)], dim=1)
        x_aug[edge_index[0]] = torch.cat([x_src, t_enc], dim=1)

        return self.attn(x_aug, edge_index)

class PDTGA(nn.Module):
    def __init__(self, node_dim, time_dim=16, hidden_dim=50):
        super().__init__()
        self.dropout = nn.Dropout(0.2)
        self.layer1 = TGATLayer(node_dim, hidden_dim, time_dim)
        self.layer2 = TGATLayer(hidden_dim, hidden_dim, time_dim)

    def forward(self, x, edge_index, timestamps):
        src_ts = timestamps[edge_index[0]]
        dst_ts = timestamps[edge_index[1]]
        x = self.dropout(F.relu(self.layer1(x, edge_index, src_ts, dst_ts)))
        x = self.dropout(F.relu(self.layer2(x, edge_index, src_ts, dst_ts)))
        return x


BLOCK 7: MLP Classifier

In [10]:
import torch.nn as nn

class PhishingClassifier(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(in_dim, 32),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(32, 1),     # Only 1 output neuron for binary classification
            nn.Sigmoid()          # Sigmoid activation
        )

    def forward(self, x):
        return self.fc(x)  # Output shape: [N, 1]


TGAT Model Pipeline (PDTGA)

In [11]:
# üìÅ CSV ‚ûù üßº Clean ‚ûù üåê Graph ‚ûù üßÆ Features ‚ûù üß† Time Encoding œÜ(t) ‚ûù üß≤ TGAT (2 layers) ‚ûù üéØ MLP ‚ûù üìä Evaluation


BLOCK 8: Train TGAT Model (BCELoss + Adam + Early Stop + Flood)

In [12]:
import torch
import torch.nn as nn
from sklearn.metrics import f1_score

# üß† Define model and classifier
model = PDTGA(node_dim=18, time_dim=16, hidden_dim=50)  # 17 static + 1 timestamp = 18
clf = PhishingClassifier(in_dim=50)      # input: output from TGAT, dim=50

# üéØ Combine parameters
params = list(model.parameters()) + list(clf.parameters())
optimizer = torch.optim.Adam(params, lr=0.01)

# üåä Flooding + Early Stop
flood_level = 0.3
early_stop = 10
no_improve = 0
best_f1 = 0.0
tolerance = 0.001
threshold = 0.7

# üîÅ Training loop
month_list = list(graph_splits.keys())
train_months = month_list[:18]
print(f"üß™ Test months ({len(train_months)}): {train_months}")

for month in train_months:
    print(f"\nüü¶ Training on: {month}")
    G = build_graph_from_month(graph_splits[month])
    add_features(G)
    data = convert_to_pyg(G)

    for epoch in range(50):
        model.train()
        clf.train()

        out = model(data.x, data.edge_index, data.timestamp)
        logits = clf(out).squeeze()  # Shape: [N]

        # Prepare labels for BCELoss
        targets = data.y.float()  # Shape: [N]

        # üìå Apply weights manually
        weights = torch.ones_like(targets)
        weights[data.y == 1] = 5.0  # phishing = 1 ‚ûù weight 30.0

        bce = nn.BCELoss(weight=weights)

        # üí• Apply BCE + Flooding
        loss_raw = bce(logits, targets)
        # loss = (loss_raw - flood_level).abs() + flood_level  # flooding trick
        loss = loss_raw

        # Backward pass
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # üß™ Monitor basic performance
        with torch.no_grad():
            pred = (logits >= threshold).long()  # Convert probabilities to class (0 or 1)
            f1 = f1_score(data.y.cpu(), pred.cpu(), average='macro')
            correct = (pred == data.y).sum().item()
            acc = correct / len(data.y)

        if f1 > best_f1 + tolerance:
            best_f1 = f1
            no_improve = 0
        else:
            no_improve += 1

        if no_improve >= early_stop:
            print(f"‚õî Early stopping at epoch {epoch} due to no F1 improvement")
            break

        if epoch % 5 == 0:
            print(f"Epoch {epoch:02d} | Loss: {loss.item():.4f} | Acc: {acc:.2%} | F1: {f1:.4f}")

    print(f"üéØ Classifier on {month}: Loss {loss.item():.4f} | Acc {acc:.2%}")


üß™ Test months (18): [Period('2016-11', 'M'), Period('2017-03', 'M'), Period('2017-05', 'M'), Period('2017-06', 'M'), Period('2017-07', 'M'), Period('2017-08', 'M'), Period('2017-09', 'M'), Period('2017-10', 'M'), Period('2017-11', 'M'), Period('2017-12', 'M'), Period('2018-01', 'M'), Period('2018-02', 'M'), Period('2018-03', 'M'), Period('2018-04', 'M'), Period('2018-05', 'M'), Period('2018-06', 'M'), Period('2018-07', 'M'), Period('2018-08', 'M')]

üü¶ Training on: 2016-11
Epoch 00 | Loss: 1.5250 | Acc: 66.67% | F1: 0.4000
Epoch 05 | Loss: 1.1551 | Acc: 66.67% | F1: 0.4000
Epoch 10 | Loss: 1.1117 | Acc: 100.00% | F1: 1.0000
Epoch 15 | Loss: 0.3596 | Acc: 100.00% | F1: 1.0000
‚õî Early stopping at epoch 17 due to no F1 improvement
üéØ Classifier on 2016-11: Loss 0.0866 | Acc 100.00%

üü¶ Training on: 2017-03
‚õî Early stopping at epoch 0 due to no F1 improvement
üéØ Classifier on 2017-03: Loss 0.2235 | Acc 100.00%

üü¶ Training on: 2017-05
‚õî Early stopping at epoch 0 due to n

BLOCK 9: Evaluate on Test Months

In [13]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, precision_recall_fscore_support
import numpy as np

# 1Ô∏è‚É£ Prepare storage
all_true, all_pred, all_probs = [], [], []

# ‚úÖ Define your months
month_list = list(graph_splits.keys())
test_months = month_list[18:]
print(f"üß™ Test months ({len(test_months)}): {test_months}")

# 2Ô∏è‚É£ Loop over test months
for month in test_months:
    print(f"\nüü• Testing on: {month}")

    # Build test graph
    G = build_graph_from_month(graph_splits[month])
    add_features(G)
    data = convert_to_pyg(G)

    # Predict
    with torch.no_grad():
        model.eval()
        clf.eval()

        out = model(data.x, data.edge_index, data.timestamp)
        logits = clf(out).squeeze()  # [N]
        pred = (logits >= 0.5).long()

    # Store for summary
    all_true.extend(data.y.cpu().numpy())
    all_pred.extend(pred.cpu().numpy())
    all_probs.extend(logits.cpu().numpy())

# 3Ô∏è‚É£ Combined summary
print("\nTest Summary:")

all_true = np.array(all_true)
all_pred = np.array(all_pred)
all_probs = np.array(all_probs)

print("Total nodes in test data:", len(all_true))
print("Total NORMAL nodes (true):", np.sum(all_true == 0))
print("Total PHISHING nodes (true):", np.sum(all_true == 1))

# Confusion matrix
cm = confusion_matrix(all_true, all_pred)
print("\nüî¢ Confusion Matrix:")
print(cm)

# Classification Report
print("\nüìä Classification Report:")
print(classification_report(all_true, all_pred, target_names=["Normal", "Phishing"], zero_division=0))

# AUC Score
try:
    auc = roc_auc_score(all_true, all_probs)
    print(f"üìà AUC Score: {auc:.4f}")
except ValueError:
    print("‚ö†Ô∏è AUC Score cannot be computed (only one class present in predictions).")

# Precision, Recall, F1 (macro)
prec, rec, f1, _ = precision_recall_fscore_support(all_true, all_pred, average='macro', zero_division=0)
print(f"\n‚öôÔ∏è Macro Precision: {prec:.4f} | Recall: {rec:.4f} | F1: {f1:.4f}")

# Detailed Metrics
if confusion_matrix(all_true, all_pred).shape == (2, 2):
    tn, fp, fn, tp = confusion_matrix(all_true, all_pred).ravel()
    print(f"\n‚úÖ Normal correctly classified: {tn} / {tn + fp} = {tn / (tn + fp):.2%}")
    print(f"‚úÖ Phishing correctly classified: {tp} / {tp + fn} = {tp / (tp + fn):.2%}")


üß™ Test months (5): [Period('2018-09', 'M'), Period('2018-10', 'M'), Period('2018-11', 'M'), Period('2018-12', 'M'), Period('2019-01', 'M')]

üü• Testing on: 2018-09

üü• Testing on: 2018-10

üü• Testing on: 2018-11

üü• Testing on: 2018-12

üü• Testing on: 2019-01

Test Summary:
Total nodes in test data: 1003
Total NORMAL nodes (true): 800
Total PHISHING nodes (true): 203

üî¢ Confusion Matrix:
[[635 165]
 [ 50 153]]

üìä Classification Report:
              precision    recall  f1-score   support

      Normal       0.93      0.79      0.86       800
    Phishing       0.48      0.75      0.59       203

    accuracy                           0.79      1003
   macro avg       0.70      0.77      0.72      1003
weighted avg       0.84      0.79      0.80      1003

üìà AUC Score: 0.7940

‚öôÔ∏è Macro Precision: 0.7041 | Recall: 0.7737 | F1: 0.7213

‚úÖ Normal correctly classified: 635 / 800 = 79.38%
‚úÖ Phishing correctly classified: 153 / 203 = 75.37%
