# Stage 0: Elliptic++ EDA

This notebook is for the initial exploration of the Elliptic++ dataset.

In [None]:
# Basic libs
!pip install -q numpy pandas scipy scikit-learn networkx matplotlib tqdm pyarrow pytest

# PyTorch (choose CPU or CUDA matching runtime)
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

# Install PyG - if CUDA present, install matching wheel from PyG instructions.
# For many Colab runtimes, a simple pip works, otherwise consult PyG install helper
!pip install -q torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric

# If PyG fails, fallback to DGL (dgl-cpu) or install specific CUDA wheels manually.
# !pip install -q dgl

## Load the processed data

In [None]:
import torch
# Make sure to upload the ellipticpp.pt file to your Colab environment
data = torch.load('ellipticpp.pt')
print(data)

## Basic EDA

In [None]:
import matplotlib.pyplot as plt

# Plot node degree distribution
node_degrees = {}
for node_type in data.node_types:
    node_degrees[node_type] = data[node_type].num_nodes

plt.bar(node_degrees.keys(), node_degrees.values())
plt.title("Node Counts per Type")
plt.show()

# Plot edge time distribution
if hasattr(data, 'time'):
    for edge_type in data.edge_types:
        if hasattr(data[edge_type], 'time'):
            plt.hist(data[edge_type].time.numpy(), bins=50)
            plt.title(f"Time Distribution for {edge_type}")
            plt.show()

## Tiny GCN Forward Pass (Smoke Test)

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, HeteroConv

class ToyGCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.conv1 = HeteroConv({
            edge_type: GCNConv(-1, hidden_channels) for edge_type in data.edge_types
        }, aggr='sum')

    def forward(self, x_dict, edge_index_dict):
        x = self.conv1(x_dict, edge_index_dict)
        return x

model = ToyGCN(hidden_channels=16)
output = model(data.x_dict, data.edge_index_dict)
print("Toy GCN output shape:", {key: val.shape for key, val in output.items()})