In [2]:
import networkx as nx
import pandas as pd

In [3]:
edgelists = pd.read_csv('data/elliptic_bitcoin_dataset/elliptic_txs_edgelist.csv')
features = pd.read_csv('data/elliptic_bitcoin_dataset/elliptic_txs_features.csv')
classes = pd.read_csv('data/elliptic_bitcoin_dataset/elliptic_txs_classes.csv')

In [6]:
features.columns = ['txId'] + [f"V{i + 1}" for i in range(len(features.columns) - 1)]

In [9]:
print("Number of nodes:", len(features))
print("Number of edges:", len(edgelists))

Number of nodes: 203768
Number of edges: 234355


In [12]:
classes['class_mapped'] = classes['class'].replace({'1': 'illicit', '2': 'licit'})

percentage_distribution = round(100 * classes['class_mapped'].value_counts(normalize=True), 2)
class_counts = classes['class_mapped'].value_counts()

emoji_mapping = {
    'licit': '✅', 
    'illicit': '❌', 
    'unknown': '🤷'
}
classes['emoji'] = classes['class_mapped'].map(emoji_mapping)

classes_df = pd.DataFrame({
    'Class Mapped': classes['class_mapped'].unique(),
    'Class Raw': classes['class'].unique(),    
    'Counts': class_counts.values,
    'Percentage': percentage_distribution.values,
    'Emoji': [emoji_mapping[class_label] for class_label in classes['class_mapped'].unique()]
})

assert len(classes_df) == 3, "There should be 3 unique classes"
assert sum(classes_df['Counts']) == len(classes), "Total counts should match the number of rows in classes"

In [13]:
classes_df

Unnamed: 0,Class Mapped,Class Raw,Counts,Percentage,Emoji
0,unknown,unknown,157205,77.15,🤷
1,licit,2,42019,20.62,✅
2,illicit,1,4545,2.23,❌
