In [1]:
from __future__ import print_function, division
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import networkx as nx
import matplotlib.pyplot as plt
%matplotlib inline
import plotly.offline as py 
import plotly.graph_objs as go 
py.init_notebook_mode(connected=True)
import plotly.io as pio

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans
from sklearn.model_selection import GridSearchCV

In [2]:
edges = pd.read_csv("elliptic_txs_edgelist.csv")
features = pd.read_csv("elliptic_txs_features.csv",header=None)
classes = pd.read_csv("elliptic_txs_classes.csv")
features_wallets = pd.read_csv("wallets_features_classes_combined.csv").drop_duplicates(subset='address', keep='first')

In [3]:
display(edges.head(5),features.head(5),classes.head(5),features_wallets.head(5))

Unnamed: 0,txId1,txId2
0,230425980,5530458
1,232022460,232438397
2,230460314,230459870
3,230333930,230595899
4,232013274,232029206


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,157,158,159,160,161,162,163,164,165,166
0,230425980,1,-0.171469,-0.184668,-1.201369,-0.12197,-0.043875,-0.113002,-0.061584,-0.162097,...,-0.562153,-0.600999,1.46133,1.461369,0.018279,-0.08749,-0.131155,-0.097524,-0.120613,-0.119792
1,5530458,1,-0.171484,-0.184668,-1.201369,-0.12197,-0.043875,-0.113002,-0.061584,-0.162112,...,0.947382,0.673103,-0.979074,-0.978556,0.018279,-0.08749,-0.131155,-0.097524,-0.120613,-0.119792
2,232022460,1,-0.172107,-0.184668,-1.201369,-0.12197,-0.043875,-0.113002,-0.061584,-0.162749,...,0.670883,0.439728,-0.979074,-0.978556,-0.098889,-0.106715,-0.131155,-0.183671,-0.120613,-0.119792
3,232438397,1,0.163054,1.96379,-0.646376,12.409294,-0.063725,9.782742,12.414558,-0.163645,...,-0.577099,-0.613614,0.241128,0.241406,1.072793,0.08553,-0.131155,0.677799,-0.120613,-0.119792
4,230460314,1,1.011523,-0.081127,-1.201369,1.153668,0.333276,1.312656,-0.061584,-0.163523,...,-0.511871,-0.400422,0.517257,0.579382,0.018279,0.277775,0.326394,1.29375,0.178136,0.179117


Unnamed: 0,txId,class
0,230425980,unknown
1,5530458,unknown
2,232022460,unknown
3,232438397,2
4,230460314,unknown


Unnamed: 0,address,Time step,class,num_txs_as_sender,num_txs_as receiver,first_block_appeared_in,last_block_appeared_in,lifetime_in_blocks,total_txs,first_sent_block,...,blocks_btwn_output_txs_min,blocks_btwn_output_txs_max,blocks_btwn_output_txs_mean,blocks_btwn_output_txs_median,num_addr_transacted_multiple,transacted_w_address_total,transacted_w_address_min,transacted_w_address_max,transacted_w_address_mean,transacted_w_address_median
0,111112TykSw72ztDN2WJger4cynzWYC5w,25,2,0.0,1.0,439586.0,439586.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,24.0,1.0,1.0,1.0,1.0
1,1111DAYXhoxZx2tsRnzimfozo783x1yC2,25,3,0.0,8.0,439589.0,485959.0,46370.0,8.0,0.0,...,0.0,20164.0,6624.285714,8060.0,0.0,8.0,1.0,1.0,1.0,1.0
9,1111VHuXEzHaRCgXbVwojtaP7Co3QABb,21,2,0.0,1.0,431522.0,431522.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0
10,111218KKkh1JJFRHbwM16AwCiVCc4m7he1,17,3,1.0,1.0,423456.0,423456.0,0.0,2.0,423456.0,...,0.0,0.0,0.0,0.0,0.0,4.0,1.0,1.0,1.0,1.0
12,1115LWW3xsD9jT9VRY7viCN9S34RVAAuA,20,2,0.0,1.0,429513.0,429513.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,5.0,1.0,1.0,1.0,1.0


In [4]:
tx_features = ["tx_feat_"+str(i) for i in range(2,95)]
agg_features = ["agg_feat_"+str(i) for i in range(1,73)]
features.columns = ["txId","time_step"] + tx_features + agg_features
features = pd.merge(features,classes,left_on="txId",right_on="txId",how='left')
features['class'] = features['class'].apply(lambda x: '0' if x == "unknown" else x)

In [5]:
count_by_class = features[["time_step",'class']].groupby(['time_step','class']).size().to_frame().reset_index()
illicit_count = count_by_class[count_by_class['class'] == '1']
licit_count = count_by_class[count_by_class['class'] == '2']
unknown_count = count_by_class[count_by_class['class'] == "0"]

In [6]:
import plotly.graph_objects as go

x_list = list(range(1, 50))

fig = go.Figure(data=[
    go.Bar(name="Unknown", x=x_list, y=unknown_count[0], marker=dict(color='#7AA6CF',
                                                                     line=dict(color='#7AA6CF', width=1))),
    go.Bar(name="Licit", x=x_list, y=licit_count[0], marker=dict(color='#0D3E6B',
                                                                  line=dict(color='#0D3E6B', width=1))),
    go.Bar(name="Illicit", x=x_list, y=illicit_count[0], marker=dict(color='#DC143C',
                                                                     line=dict(color='#DC143C', width=1)))
])

fig.update_layout(
    barmode='stack',
    xaxis=dict(
        title='Time Step',
        titlefont=dict(size=22, family='Arial', color='black', weight='bold'),  # 加粗横轴标签
        tickfont=dict(size=18, family='Arial', color='black', weight='bold')  # 加粗横轴数字
    ),
    yaxis=dict(
        title='Count',
        titlefont=dict(size=22, family='Arial', color='black', weight='bold'),  # 加粗纵轴标签
        tickfont=dict(size=18, family='Arial', color='black', weight='bold')  # 加粗纵轴数字
    ),
    legend=dict(
        x=0.5,
        y=1.0,
        font=dict(
            size=20,
            #family='Arial',
            color='black',
            # weight='bold'  # 加粗图例文字
        )
    ),
    annotations=[
        dict(
            x=0.5,
            y=1.10,
            xref='paper',
            yref='paper',
            text='Count of Classes Over Time Steps (Transactions)',
            showarrow=False,
            font=dict(
                size=22,
                family='Arial',
                color='black',
                weight='bold' # 加粗标题
            )
        )
    ]
)

# 显示图表
fig.show()


In [7]:
actors_count_by_class = features_wallets[["Time step",'class']].groupby(['Time step','class']).size().to_frame().reset_index()
actors_illicit_count = actors_count_by_class[actors_count_by_class['class'] == 1]
actors_licit_count = actors_count_by_class[actors_count_by_class['class'] == 2]
actors_unknown_count = actors_count_by_class[actors_count_by_class['class'] == 3]

In [8]:
import plotly.graph_objects as go

x_list = list(range(1, 50))

fig = go.Figure(data=[
    go.Bar(name="Unknown", x=x_list, y=actors_unknown_count[0], marker=dict(color='#7AA6CF',
                                                                     line=dict(color='#7AA6CF', width=1))),
    go.Bar(name="Licit", x=x_list, y=actors_licit_count[0], marker=dict(color='#0D3E6B',
                                                                  line=dict(color='#0D3E6B', width=1))),
    go.Bar(name="Illicit", x=x_list, y=actors_illicit_count[0], marker=dict(color='#DC143C',
                                                                     line=dict(color='#DC143C', width=1)))
])

fig.update_layout(
    barmode='stack',
    xaxis=dict(
        title='Time Step',
        titlefont=dict(size=22, family='Arial', color='black', weight='bold'),  # 加粗横轴标签
        tickfont=dict(size=18, family='Arial', color='black', weight='bold')  # 加粗横轴数字
    ),
    yaxis=dict(
        title='Count',
        titlefont=dict(size=22, family='Arial', color='black', weight='bold'),  # 加粗纵轴标签
        tickfont=dict(size=18, family='Arial', color='black', weight='bold')  # 加粗纵轴数字
    ),
    legend=dict(
        x=0.5,
        y=1.0,
        font=dict(
            size=20,
            #family='Arial',
            color='black',
            # weight='bold'  # 加粗图例文字
        )
    ),
    annotations=[
        dict(
            x=0.5,
            y=1.10,
            xref='paper',
            yref='paper',
            text='Count of Classes Over Time Steps (Actors)',
            showarrow=False,
            font=dict(
                size=22,
                family='Arial',
                color='black',
                weight='bold' # 加粗标题
            )
        )
    ]
)

# 显示图表
fig.show()


In [9]:
import networkx as nx
import plotly.graph_objects as go
import pandas as pd

def prepare_and_visualize_illicit_graph(features, edges):
    # Filter IDs for illicit transactions at time step 32
    illicit_ids = features[(features['time_step'] == 32) & (features['class'] == '1')]['txId']
    illicit_edges = edges[edges['txId1'].isin(illicit_ids)]
    illicit_graph = nx.from_pandas_edgelist(illicit_edges, source='txId1', target='txId2', create_using=nx.DiGraph())
    pos = nx.spring_layout(illicit_graph)

    # Prepare edge trace
    edge_x, edge_y = [], []
    for edge in illicit_graph.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.extend([x0, x1, None])
        edge_y.extend([y0, y1, None])

    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.5, color='#DC143C'),  # Adjust edge color for illicit transactions
        hoverinfo='none',
        mode='lines')

    # Prepare node trace
    node_x, node_y, node_text = [], [], []
    for node in illicit_graph.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)
        node_text.append(node)

    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers',
        hoverinfo='text',
        text=node_text,
        marker=dict(
            size=10,
            color=pd.to_numeric(features[features['txId'].isin(illicit_graph.nodes())]['class']).map(node_color_map),
            line=dict(width=2)))

    # Create figure
    fig = go.Figure(data=[edge_trace, node_trace], layout=go.Layout(
        title=dict(
            text="Illicit Transactions (Time Step = 32)",
            font=dict(size=24, color='black', family='Arial', weight='bold'),
            x=0.5,
            xanchor='center'
        ),
        showlegend=False,
        hovermode='closest',
        margin=dict(b=20, l=5, r=5, t=40),
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    ))

    fig.update_layout(
        plot_bgcolor='rgba(0,0,0,0)'  # Set plot background color to transparent
    )

    fig.show()

# Define the node color map
node_color_map = {0: '#7AA6CF', 1: '#DC143C', 2: '#0D3E6B'}

# Example usage
prepare_and_visualize_illicit_graph(features, edges)


In [10]:
import networkx as nx
import plotly.graph_objects as go
import pandas as pd

def prepare_and_visualize_licit_graph(features, edges):
    # Filter IDs for illicit transactions at time step 32
    licit_ids = features[(features['time_step'] == 32) & ((features['class'] == '2'))]['txId']
    licit_edges = edges[edges['txId1'].isin(licit_ids)]
    licit_graph = nx.from_pandas_edgelist(licit_edges, source = 'txId1', target = 'txId2', create_using=nx.DiGraph())
    pos = nx.spring_layout(licit_graph)

    # Prepare edge trace
    edge_x, edge_y = [], []
    for edge in licit_graph.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.extend([x0, x1, None])
        edge_y.extend([y0, y1, None])

    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.5, color='#4682B4'),  # Adjust edge color for illicit transactions
        hoverinfo='none',
        mode='lines')

    # Prepare node trace
    node_x, node_y, node_text = [], [], []
    for node in licit_graph.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)
        node_text.append(node)

    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers',
        hoverinfo='text',
        text=node_text,
        marker=dict(
            size=10,
            color=pd.to_numeric(features[features['txId'].isin(licit_graph.nodes())]['class']).map(node_color_map),
            line=dict(width=2)))

    # Create figure
    fig = go.Figure(data=[edge_trace, node_trace], layout=go.Layout(
        title=dict(
            text="Licit Transactions (Time Step = 32)",
            font=dict(size=24, color='black', family='Arial', weight='bold'),
            x=0.5,
            xanchor='center'
        ),
        showlegend=False,
        hovermode='closest',
        margin=dict(b=20, l=5, r=5, t=40),
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
    ))

    fig.update_layout(
        plot_bgcolor='rgba(0,0,0,0)'  # Set plot background color to transparent
    )

    fig.show()


# Define the node color map
node_color_map = {0: '#7AA6CF', 1: '#DC143C', 2: '#0D3E6B'}

# Example usage
prepare_and_visualize_licit_graph(features, edges)

