# CS5284 Group07 - XGBoost

In [1]:
!pip install torch

Looking in indexes: https://pypi.org/simple, https://pypi.nvidia.com
Collecting nvidia-cublas-cu12==12.9.1.4.* (from cuda-toolkit[cublas,curand,cusolver,cusparse]==12.*->libraft-cu12==25.10.*->libcugraph-cu12==25.10.*->pylibcugraph-cu12==25.10.*->nx-cugraph-cu12)
  Downloading https://pypi.nvidia.com/nvidia-cublas-cu12/nvidia_cublas_cu12-12.9.1.4-py3-none-manylinux_2_27_x86_64.whl (581.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m581.2/581.2 MB[0m [31m58.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-curand-cu12==10.3.10.19.* (from cuda-toolkit[cublas,curand,cusolver,cusparse]==12.*->libraft-cu12==25.10.*->libcugraph-cu12==25.10.*->pylibcugraph-cu12==25.10.*->nx-cugraph-cu12)
  Downloading https://pypi.nvidia.com/nvidia-curand-cu12/nvidia_curand_cu12-10.3.10.19-py3-none-manylinux_2_27_x86_64.whl (68.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.3/68.3 MB[0m [31m66.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuso

In [58]:
import pandas as pd
import networkx as nx
from xgboost import DMatrix, train
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import numpy as np
import torch
import torch.nn as nn

nx.config.warnings_to_ignore.add("cache") # Ignore networkx cugraph warnings

In [59]:
def load_elliptic_splits(include_unknowns=True, combine_train_val=True):
  """Load pre-computed splits from splits folder"""
  splits_dir = 'data/splits/full_dataset' if include_unknowns else 'data/splits/labeled_only'
  print(f"Loading splits from {splits_dir} with include_unknowns={include_unknowns}")

  # Load features and classes for each split
  def load_split_data(split_name):
      features_df = pd.read_csv(f'{splits_dir}/{split_name}_features.csv')  # Has header
      classes_df = pd.read_csv(f'{splits_dir}/{split_name}_classes.csv')
      edges_df = pd.read_csv(f'{splits_dir}/{split_name}_edges.csv')

      # Process features - txId is first column, timestep is second, features start from 3rd
      node_ids = features_df['txId'].values
      timesteps = features_df['timestep'].values  # Extract timestep information
      features = features_df.iloc[:, 1:].values.astype(np.float32)  # Skip only txId, include timestep and all features

      # Process labels - handle both string and integer class values
      labels = []
      for _, row in classes_df.iterrows():
          class_val = row['class']

          # Handle both string and integer class values
          if class_val == '1' or class_val == 1:  # illicit
              labels.append(0)
          elif class_val == '2' or class_val == 2:  # licit
              labels.append(1)
          else:  # unknown (string 'unknown' or any other value)
              labels.append(-1 if include_unknowns else None)

      # Filter out None labels if not including unknowns
      if not include_unknowns:
          valid_mask = [l is not None for l in labels]
          valid_indices = [i for i, valid in enumerate(valid_mask) if valid]
          node_ids = node_ids[valid_indices]
          timesteps = timesteps[valid_indices]
          features = features[valid_indices]
          labels = [labels[i] for i in valid_indices]

      # Create node mapping for edges
      node_to_idx = {node_id: idx for idx, node_id in enumerate(node_ids)}
      node_index = [node_to_idx[node_id] for node_id in node_ids]

      # Process edges
      edge_list = []
      for _, row in edges_df.iterrows():
          if row['txId1'] in node_to_idx and row['txId2'] in node_to_idx:
              edge_list.append([node_to_idx[row['txId1']], node_to_idx[row['txId2']]])

      edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous() if edge_list else torch.empty((2, 0), dtype=torch.long)

      return {
          'features': torch.tensor(features, dtype=torch.float),
          'edge_index': edge_index,
          'labels': torch.tensor(labels, dtype=torch.long),
          'timesteps': torch.tensor(timesteps, dtype=torch.long),
          'node_ids': node_ids,
          'node_index': torch.tensor(node_index, dtype=torch.long)
      }

  # Load train, val, test splits
  train_data = load_split_data('train')
  val_data = load_split_data('val')
  test_data = load_split_data('test')

  print(f"Split sizes:")
  print(f"  Train: {train_data['features'].shape[0]} nodes, {train_data['edge_index'].shape[1]} edges")
  print(f"  Val: {val_data['features'].shape[0]} nodes, {val_data['edge_index'].shape[1]} edges")
  print(f"  Test: {test_data['features'].shape[0]} nodes, {test_data['edge_index'].shape[1]} edges")

  # Combine train and val if requested (paper setup)
  if combine_train_val:
      print("Combining train and val into single training set (paper setup)")

      # Concatenate features and labels
      combined_features = torch.cat([train_data['features'], val_data['features']], dim=0)
      combined_labels = torch.cat([train_data['labels'], val_data['labels']], dim=0)
      combined_timesteps = torch.cat([train_data['timesteps'], val_data['timesteps']], dim=0)
      combined_node_ids = np.concatenate([train_data['node_ids'], val_data['node_ids']])

      # Adjust edge indices for val data
      val_edges_adjusted = val_data['edge_index'] + train_data['features'].shape[0]
      combined_edge_index = torch.cat([train_data['edge_index'], val_edges_adjusted], dim=1)

      # Adjust node index for val data
      val_nodes_adjusted = val_data['node_index'] + train_data['features'].shape[0]
      combined_node_index = torch.cat([train_data['node_index'], val_nodes_adjusted], dim=0)

      train_data = {
          'features': combined_features,
          'edge_index': combined_edge_index,
          'labels': combined_labels,
          'timesteps': combined_timesteps,
          'node_ids': combined_node_ids,
          'node_index': combined_node_index
      }

      print(f"Combined train: {train_data['features'].shape[0]} nodes, {train_data['edge_index'].shape[1]} edges")

  # Count labels for each split
  for split_name, split_data in [('Train', train_data), ('Test', test_data)]:
      labels = split_data['labels']
      if include_unknowns:
          illicit_count = (labels == 0).sum().item()
          licit_count = (labels == 1).sum().item()
          unknown_count = (labels == -1).sum().item()
          print(f"  {split_name}: illicit={illicit_count}, licit={licit_count}, unknown={unknown_count}")
      else:
          illicit_count = (labels == 0).sum().item()
          licit_count = (labels == 1).sum().item()
          print(f"  {split_name}: illicit={illicit_count}, licit={licit_count}")

  return train_data, test_data

train_data, test_data = load_elliptic_splits(include_unknowns=False, combine_train_val=True)
print(train_data['features'].shape)

Loading splits from data/splits/labeled_only with include_unknowns=False
Split sizes:
  Train: 26381 nodes, 20151 edges
  Val: 2989 nodes, 2375 edges
  Test: 16670 nodes, 13726 edges
Combining train and val into single training set (paper setup)
Combined train: 29370 nodes, 22526 edges
  Train: illicit=3379, licit=25991
  Test: illicit=1083, licit=15587
torch.Size([29370, 166])


# Original Features Only

## Data Preparation

In [49]:
X_train = train_data["features"]
Y_train = train_data["labels"]

X_test = test_data["features"]
Y_test = test_data["labels"]

In [50]:
# Create DMatrix from X and Y
dtrain = DMatrix(X_train, Y_train)
dtest = DMatrix(X_test, Y_test)

## Train Model

In [51]:
# Set XGBoost parameters
params = {
    'objective': 'binary:logistic',
    'learning_rate': 0.1,
    'random_state': 1
}

In [52]:
# Train the model
model = train(params, dtrain)

## Test Model

In [53]:
# Predict
preds = model.predict(dtest) # output a probability
preds_binary = (preds > 0.5).astype(int) # convert back to binary for comparison

# Calculate metrics
acc = accuracy_score(Y_test, preds_binary)
prec = precision_score(Y_test, preds_binary, pos_label=0, zero_division=0)
rec = recall_score(Y_test, preds_binary, pos_label=0, zero_division=0)
f1  = f1_score(Y_test, preds_binary, pos_label=0, zero_division=0)
print("Accuracy: ", acc)
print("Precision: ", prec)
print("Recall: ", rec)
print("F1 Score: ", f1)

Accuracy:  0.9755848830233953
Precision:  0.9082125603864735
Recall:  0.6943674976915974
F1 Score:  0.7870225013082156


# With Node Features

## Prepare Node Features
- Betweeness Centrality
- In-degree

### Add in-degree

In [66]:
train_data_w_node_feature = train_data.copy()
in_degree = torch.bincount(train_data['edge_index'][1], minlength=train_data['features'].shape[0])
in_degree = in_degree.unsqueeze(1).float()
train_data_w_node_feature["features"] = torch.cat([train_data['features'], in_degree], dim=1)

test_data_w_node_feature = test_data.copy()
in_degree = torch.bincount(test_data['edge_index'][1], minlength=test_data['features'].shape[0])
in_degree = in_degree.unsqueeze(1).float()
test_data_w_node_feature["features"] = torch.cat([test_data['features'], in_degree], dim=1)

print(train_data_w_node_feature["features"].shape)
print(test_data_w_node_feature["features"].shape)

torch.Size([29370, 167])
torch.Size([16670, 167])


### Add betweenness

In [67]:
# Train
graph = nx.from_pandas_edgelist(train_data["edge_index"], source=0, target=1, create_using=nx.DiGraph())

print("Calculating betweenness")
k = 1000
betweenness_dict = nx.betweenness_centrality(graph, k=k, seed=1)
betweenness_list = [betweenness_dict.get(i, 0.0) for i in range(train_data['features'].shape[0])]
betweenness_tensor = torch.tensor(betweenness_list).unsqueeze(1).float()

# Add Betweenness
train_data_w_node_feature["features"] = torch.cat([train_data_w_node_feature['features'], betweenness_tensor], dim=1)
print(train_data_w_node_feature["features"].shape)

# Test
graph = nx.from_pandas_edgelist(test_data["edge_index"], source=0, target=1, create_using=nx.DiGraph())

print("Calculating betweenness")
k = 1000
betweenness_dict = nx.betweenness_centrality(graph, k=k, seed=1)
betweenness_list = [betweenness_dict.get(i, 0.0) for i in range(test_data['features'].shape[0])]
betweenness_tensor = torch.tensor(betweenness_list).unsqueeze(1).float()

# Add Betweenness
test_data_w_node_feature["features"] = torch.cat([test_data_w_node_feature['features'], betweenness_tensor], dim=1)
print(test_data_w_node_feature["features"].shape)

Calculating betweenness
torch.Size([29370, 168])
Calculating betweenness
torch.Size([16670, 168])


In [77]:
X_train = train_data_w_node_feature["features"]
Y_train = train_data_w_node_feature["labels"]

X_test = test_data_w_node_feature["features"]
Y_test = test_data_w_node_feature["labels"]

## Train Model

In [69]:
# Create DMatrix from X and Y
dtrain_graph_features = DMatrix(X_train, Y_train)
dtest_graph_features = DMatrix(X_test, Y_test)

In [70]:
# Set XGBoost parameters
params = {
    'objective': 'binary:logistic',
    'learning_rate': 0.1,
    'random_state': 1
}

In [71]:
# Train the model
model_graph_features = train(params, dtrain_graph_features)

## Test Model

In [78]:
# Predict
preds = model_graph_features.predict(dtest_graph_features) # output a probability
preds_binary = (preds > 0.5).astype(int) # convert back to binary for comparison

# Calculate error
acc = accuracy_score(Y_test, preds_binary)
prec = precision_score(Y_test, preds_binary, pos_label=0, zero_division=0)
rec = recall_score(Y_test, preds_binary, pos_label=0, zero_division=0)
f1  = f1_score(Y_test, preds_binary, pos_label=0, zero_division=0)
print("Accuracy: ", acc)
print("Precision: ", prec)
print("Recall: ", rec)
print("F1 Score: ", f1)

Accuracy:  0.9758848230353929
Precision:  0.913730255164034
Recall:  0.6943674976915974
F1 Score:  0.789087093389297


## Evaluate timestep

### Normal

In [75]:
illicit_f1_score_arr = []
for t in range(35, 50):
  print(f"Timestep: {t}")
  mask = test_data["features"][:,0] == t
  features_t = test_data["features"][mask]
  labels_t = test_data["labels"][mask]

  X_test = features_t
  Y_test = labels_t
  dtest = DMatrix(X_test, Y_test)

  preds = model.predict(dtest)
  preds_binary = (preds > 0.5).astype(int) # convert back to binary for comparison

  # Calculate metrics
  acc = accuracy_score(Y_test, preds_binary)
  prec = precision_score(Y_test, preds_binary, pos_label=0, zero_division=0)
  rec = recall_score(Y_test, preds_binary, pos_label=0, zero_division=0)
  f1  = f1_score(Y_test, preds_binary, pos_label=0, zero_division=0)
  illicit_f1_score_arr.append(f1)
  print("Accuracy: ", acc)
  print('-------------')
  print('ILLICIT METRICS')
  print("Precision: ", prec)
  print("Recall: ", rec)
  print("F1 Score: ", f1)

  prec = precision_score(Y_test, preds_binary, pos_label=1, zero_division=0)
  rec = recall_score(Y_test, preds_binary, pos_label=1, zero_division=0)
  f1  = f1_score(Y_test, preds_binary, pos_label=1, zero_division=0)

  print('-------------')
  print('LICIT METRICS')
  print("Precision: ", prec)
  print("Recall: ", rec)
  print("F1 Score: ", f1)
  print('-------------')

print(illicit_f1_score_arr)

Timestep: 35
Accuracy:  0.9873228933631618
-------------
ILLICIT METRICS
Precision:  0.9940119760479041
Recall:  0.9120879120879121
F1 Score:  0.9512893982808023
-------------
LICIT METRICS
Precision:  0.9863713798977853
Recall:  0.999137187230371
F1 Score:  0.9927132447492499
-------------
Timestep: 36
Accuracy:  0.9964871194379391
-------------
ILLICIT METRICS
Precision:  0.9090909090909091
Recall:  0.9090909090909091
F1 Score:  0.9090909090909091
-------------
LICIT METRICS
Precision:  0.9982089552238806
Recall:  0.9982089552238806
F1 Score:  0.9982089552238806
-------------
Timestep: 37
Accuracy:  0.9618473895582329
-------------
ILLICIT METRICS
Precision:  1.0
Recall:  0.525
F1 Score:  0.6885245901639344
-------------
LICIT METRICS
Precision:  0.960167714884696
Recall:  1.0
F1 Score:  0.9796791443850268
-------------
Timestep: 38
Accuracy:  0.9814814814814815
-------------
ILLICIT METRICS
Precision:  1.0
Recall:  0.8738738738738738
F1 Score:  0.9326923076923077
-------------
LICIT

### With node features

In [79]:
illicit_f1_score_arr = []
for t in range(35, 50):
  print(f"Timestep: {t}")
  mask = test_data_w_node_feature["features"][:,0] == t
  features_t = test_data_w_node_feature["features"][mask]
  labels_t = test_data_w_node_feature["labels"][mask]

  X_test = features_t
  Y_test = labels_t
  dtest = DMatrix(X_test, Y_test)

  preds = model_graph_features.predict(dtest)
  preds_binary = (preds > 0.5).astype(int) # convert back to binary for comparison

  # Calculate metrics
  acc = accuracy_score(Y_test, preds_binary)
  prec = precision_score(Y_test, preds_binary, pos_label=0, zero_division=0)
  rec = recall_score(Y_test, preds_binary, pos_label=0, zero_division=0)
  f1  = f1_score(Y_test, preds_binary, pos_label=0, zero_division=0)
  illicit_f1_score_arr.append(f1)
  print("Accuracy: ", acc)
  print('-------------')
  print('ILLICIT METRICS')
  print("Precision: ", prec)
  print("Recall: ", rec)
  print("F1 Score: ", f1)

  prec = precision_score(Y_test, preds_binary, pos_label=1, zero_division=0)
  rec = recall_score(Y_test, preds_binary, pos_label=1, zero_division=0)
  f1  = f1_score(Y_test, preds_binary, pos_label=1, zero_division=0)

  print('-------------')
  print('LICIT METRICS')
  print("Precision: ", prec)
  print("Recall: ", rec)
  print("F1 Score: ", f1)
  print('-------------')

print(illicit_f1_score_arr)

Timestep: 35
Accuracy:  0.9873228933631618
-------------
ILLICIT METRICS
Precision:  0.9940119760479041
Recall:  0.9120879120879121
F1 Score:  0.9512893982808023
-------------
LICIT METRICS
Precision:  0.9863713798977853
Recall:  0.999137187230371
F1 Score:  0.9927132447492499
-------------
Timestep: 36
Accuracy:  0.9964871194379391
-------------
ILLICIT METRICS
Precision:  0.9090909090909091
Recall:  0.9090909090909091
F1 Score:  0.9090909090909091
-------------
LICIT METRICS
Precision:  0.9982089552238806
Recall:  0.9982089552238806
F1 Score:  0.9982089552238806
-------------
Timestep: 37
Accuracy:  0.9618473895582329
-------------
ILLICIT METRICS
Precision:  1.0
Recall:  0.525
F1 Score:  0.6885245901639344
-------------
LICIT METRICS
Precision:  0.960167714884696
Recall:  1.0
F1 Score:  0.9796791443850268
-------------
Timestep: 38
Accuracy:  0.9814814814814815
-------------
ILLICIT METRICS
Precision:  1.0
Recall:  0.8738738738738738
F1 Score:  0.9326923076923077
-------------
LICIT