In [1]:
import pandas as pd


### Map our dataset to be used in GNNs

In [2]:
# Paths of the CSV files
classes_path = "../elliptic_bitcoin_dataset/elliptic_txs_classes.csv"
edges_path = "../elliptic_bitcoin_dataset/elliptic_txs_edgelist.csv"
features_path = "../elliptic_bitcoin_dataset/elliptic_txs_features.csv"

# Define the column names
feat_cols = ['txId', 'time_step'] + [f'trans_feat_{i}' for i in range(93)] + [f'agg_feat_{i}' for i in range(72)]

# Load the data
classes = pd.read_csv(classes_path)
edges = pd.read_csv(edges_path)
features = pd.read_csv(features_path, header=None, names=feat_cols)

# Step 1: Create a list of all unique IDs (both from the classes and edges)
all_ids = pd.concat([edges['txId1'], edges['txId2'], classes['txId'], features['txId']]).unique()

# Step 2: Create the mapping between the original IDs and numeric IDs from 0 to N-1 (where N is the total number of nodes)
id_mapping = {old_id: new_id for new_id, old_id in enumerate(all_ids)}

# Map the IDs in the classes file
classes['txId'] = classes['txId'].map(id_mapping)

# Map the IDs in the edges file
edges['txId1'] = edges['txId1'].map(id_mapping)
edges['txId2'] = edges['txId2'].map(id_mapping)

# Map the IDs in the features file
features['txId'] = features['txId'].map(id_mapping)

# Step 4: Save the modified files
classes.to_csv("../elliptic_bitcoin_dataset/modified_elliptic_txs_classes.csv", index=False)
edges.to_csv("../elliptic_bitcoin_dataset/modified_elliptic_txs_edgelist.csv", index=False)
features.to_csv("../elliptic_bitcoin_dataset/modified_elliptic_txs_features.csv", index=False, header=False)

print(classes.head())
print(edges.head())
print(features.head())





   txId    class
0     0  unknown
1  5577  unknown
2     1  unknown
3   537        2
4     2  unknown
   txId1  txId2
0      0   5577
1      1    537
2      2   1885
3      3   2807
4      4   4133
   txId  time_step  trans_feat_0  trans_feat_1  trans_feat_2  trans_feat_3  \
0     0          1     -0.171469     -0.184668     -1.201369     -0.121970   
1  5577          1     -0.171484     -0.184668     -1.201369     -0.121970   
2     1          1     -0.172107     -0.184668     -1.201369     -0.121970   
3   537          1      0.163054      1.963790     -0.646376     12.409294   
4     2          1      1.011523     -0.081127     -1.201369      1.153668   

   trans_feat_4  trans_feat_5  trans_feat_6  trans_feat_7  ...  agg_feat_62  \
0     -0.043875     -0.113002     -0.061584     -0.162097  ...    -0.562153   
1     -0.043875     -0.113002     -0.061584     -0.162112  ...     0.947382   
2     -0.043875     -0.113002     -0.061584     -0.162749  ...     0.670883   
3     -0.063725  