In [1]:
import pandas as pd
import networkx as nx

In [None]:
FEATURE_FILE = "../data/elliptic_txs_features.csv"
CLASS_FILE = "../data/elliptic_txs_classes.csv"
EDGE_FILE = "../data/elliptic_txs_edgelist.csv"

# OUTPUT_CSV = "../output/elliptic_mini.csv"
# OUTPUT_GRAPH = "../output/elliptic_graph.gpickle"

# how many features to keep
N_FEATURES = 15
# maximum time_step (optional filter to shrink dataset)
MAX_TIMESTEP = 5

## combine dataset

In [12]:
print("📄 Loading features...")
df_features = pd.read_csv(FEATURE_FILE, header=None)

# assign column names
n_cols = df_features.shape[1]
columns = ['txId', 'time_step'] + [f'V{i}' for i in range(1, n_cols - 1)]
df_features.columns = columns

df_features['txId'] = df_features['txId'].astype(str)

print("📄 Loading labels...")
df_labels = pd.read_csv(CLASS_FILE, header=None, names=['txId', 'class'])
df_labels['txId'] = df_labels['txId'].astype(str)

print("📄 Merging features & labels...")
df = df_features.merge(df_labels, on='txId', how='left')

📄 Loading features...
📄 Loading labels...
📄 Merging features & labels...
📄 Loading labels...
📄 Merging features & labels...


In [13]:
df.shape

(203769, 168)

## Small elliptic - without unknown pattern

In [14]:
df = df[df['class'].isin(['1', '2'])]
print(f"📊 After filtering, shape: {df.shape}")
print(df['class'].value_counts())

📊 After filtering, shape: (46564, 168)
class
2    42019
1     4545
Name: count, dtype: int64


In [15]:
import os

# make sure output folder exists
os.makedirs("output", exist_ok=True)

# save cleaned dataset
OUTPUT_CSV = os.path.join("output", "elliptic_mini.csv")
df.to_csv(OUTPUT_CSV, index=False)

print(f"✅ Saved cleaned dataset to: {OUTPUT_CSV}")

✅ Saved cleaned dataset to: output\elliptic_mini.csv


---

## Generate graph and save

In [9]:
import pandas as pd
import networkx as nx
import os
import pickle

EDGE_FILE = "../data/elliptic_txs_edgelist.csv"
OUTPUT_GRAPH = os.path.join("output", "elliptic_graph.gpickle")

# Load the cleaned dataset that was saved earlier
print("📄 Loading cleaned dataset...")
df = pd.read_csv("output/elliptic_mini.csv")
print(f"✅ Loaded dataset: {df.shape}")

print("📄 Loading edge list...")
df_edges = pd.read_csv(EDGE_FILE, header=None, names=['source', 'target'])

# ✅ Remove invalid header row if present
df_edges = df_edges[df_edges['source'] != 'txId1']
df_edges = df_edges[df_edges['target'] != 'txId2']

print(f"✅ Cleaned edges: {df_edges.shape}")

print("🕸️ Building graph...")
G = nx.from_pandas_edgelist(
    df_edges,
    source='source',
    target='target',
    create_using=nx.DiGraph()
)

print(f"✅ Graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")

# attach node features & labels
print("📝 Attaching node features & labels...")
feature_dict = df.set_index('txId').to_dict(orient='index')
nx.set_node_attributes(G, feature_dict)

# optional: remove nodes with no features
nodes_with_attrs = set(feature_dict.keys())
nodes_to_remove = [n for n in G if n not in nodes_with_attrs]
G.remove_nodes_from(nodes_to_remove)

print(f"✅ Final graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
print(f"📄 Sample node: {list(G.nodes(data=True))[:1]}")

os.makedirs("output", exist_ok=True)
with open("output/elliptic_graph.pkl", "wb") as f:
    pickle.dump(G, f)

print(f"✅ Saved cleaned graph to: {OUTPUT_GRAPH}")

📄 Loading cleaned dataset...
✅ Loaded dataset: (46564, 168)
📄 Loading edge list...
✅ Cleaned edges: (234355, 2)
🕸️ Building graph...
✅ Loaded dataset: (46564, 168)
📄 Loading edge list...
✅ Cleaned edges: (234355, 2)
🕸️ Building graph...
✅ Graph: 203769 nodes, 234355 edges
📝 Attaching node features & labels...
✅ Graph: 203769 nodes, 234355 edges
📝 Attaching node features & labels...
✅ Final graph: 0 nodes, 0 edges
📄 Sample node: []
✅ Saved cleaned graph to: output\elliptic_graph.gpickle
✅ Final graph: 0 nodes, 0 edges
📄 Sample node: []
✅ Saved cleaned graph to: output\elliptic_graph.gpickle


In [10]:
with open("output/elliptic_graph.pkl", "rb") as f:
    G = pickle.load(f)

print(f"✅ Graph loaded: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")

✅ Graph loaded: 0 nodes, 0 edges


In [7]:
import networkx as nx
import pickle

# Load the graph using pickle
with open("output/elliptic_graph.gpickle", 'rb') as f:
    G = pickle.load(f)

print(f"Graph loaded: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
print("\nSample nodes with data:")
for i, (node, data) in enumerate(G.nodes(data=True)):
    if i < 3:  # Show first 3 nodes
        print(f"Node {node}: {data}")
    else:
        break

print("\nSample edges with data:")
for i, (u, v, data) in enumerate(G.edges(data=True)):
    if i < 3:  # Show first 3 edges
        print(f"Edge {u} -> {v}: {data}")
    else:
        break

Graph loaded: 0 nodes, 0 edges

Sample nodes with data:

Sample edges with data:
