# 说明，以下代码环境均是针对于kaggle平台

1. 首先读取相关的文件并确保版本是否符合要求

In [None]:
import platform
import pickle
import networkx as nx

In [None]:
print("python version:\t\t", platform.python_version())
print("networkx version:\t", nx.__version__)

In [None]:
def load_pickle(fname):
    with open(fname, 'rb') as f:
        return pickle.load(f)

G = load_pickle('../input/ethereum-phishing-transaction-network/Ethereum Phishing Transaction Network/MulDiGraph.pkl')
print(nx.info(G))

2. 设计代码完成对node data的提取，并进行数据的初步筛选，根据transactions进行筛选，若transactions大于1000或者小于10均除去相关的数据

In [None]:
legal_node_set=set()
for ind,nd in enumerate(nx.nodes(G)):
    count=0
    suc=list(G.successors(nd))
    pre=list(G.predecessors(nd))
    for i in range(len(suc)):
        count+=(G.number_of_edges(nd,suc[i]))
    for j in range(len(pre)):
        count+=(G.number_of_edges(pre[j],nd))
    if count<10 or count>1000:
        continue
    legal_node_set.add((nd,count))
print(len(legal_node_set))

3. 生成label的csv文件并统计信息

In [None]:
import csv
legal_node_list=list(legal_node_set)
with open('label.csv','w',newline='') as file:
    writer=csv.writer(file)
    writer.writerow(['account','isp'])
    for i in range(len(legal_node_list)):
        data=[]
        data.append(legal_node_list[i][0])
        data.append(G.nodes[legal_node_list[i][0]]['isp'])
        writer.writerow(data)

In [None]:
phishingCount=0
for i in range(len(legal_node_list)):
    if G.nodes[legal_node_list[i][0]]['isp']==1:
        phishingCount+=1
print(phishingCount)
print(phishingCount/len(legal_node_list))
print(len(legal_node_list)-phishingCount)

4. 生成对应的node_data特征文件

In [None]:
import statistics
node_data=[]
for i in range(len(legal_node_list)):
    node=legal_node_list[i][0]
    node_data_temp=[]
    time=[]
    time_min=99999999
    time_max=0
    amount=[]
    in_time=[]
    in_time_min=99999999
    in_time_max=0
    in_amount=[]
    node_set=set()
    pre=list(G.predecessors(node))
    for j in range(len(pre)):
        for v in G[pre[j]][node].values():
            in_time.append(v['amount'])
            in_amount.append(v['timestamp'])
            if v['timestamp']>in_time_max:
                in_time_max=v['timestamp']
            if v['timestamp']<in_time_min:
                in_time_min=v['timestamp']


    for i,v in dict(G[node]).items():
        node_set.add(i)
        for v1 in v.values():
            time.append(v1['timestamp'])
            amount.append(v1['amount'])
            if v1['timestamp']>time_max:
                time_max=v1['timestamp']
            if v1['timestamp']<time_min:
                time_min=v1['timestamp']

    if len(time)>0:
        out_block_std=statistics.pstdev(time)
        out_time_ptp=time_max-time_min
        amount_sum=sum(amount)
        amount_max=max(amount)
        amount_min=min(amount)
        amount_mean=statistics.mean(amount)
        amount_std=statistics.pstdev(amount)
        count=len(time)
        unique=len(node_set)
        unique_ratio=unique/count

    if len(time)<=0 :
        out_block_std=0
        out_time_ptp=0
        amount_sum=0
        amount_max=0
        amount_min=0
        amount_mean=0
        amount_std=0
        count=0
        unique=0
        unique_ratio=0

    if len(in_time)>0:
        in_block_std=statistics.pstdev(in_time)
        in_time_ptp=in_time_max-in_time_min
        in_amount_sum=sum(in_amount)
        in_amount_max=max(in_amount)
        in_amount_min=min(in_amount)
        in_amount_mean=statistics.mean(in_amount)
        in_amount_std=statistics.pstdev(in_amount)
        in_count=len(in_time)
        in_unique=len(node_set)
        in_unique_ratio=in_unique/in_count

    if len(in_time)<=0:
        in_block_std=0
        in_time_ptp=0
        in_amount_sum=0
        in_amount_max=0
        in_amount_min=0
        in_amount_mean=0
        in_amount_std=0
        in_count=0
        in_unique=0
        in_unique_ratio=0

    node_data_temp.append(node)
    node_data_temp.append(out_block_std)
    node_data_temp.append(out_time_ptp)
    node_data_temp.append(amount_sum)
    node_data_temp.append(amount_max)
    node_data_temp.append(amount_min)
    node_data_temp.append(amount_mean)
    node_data_temp.append(amount_std)
    node_data_temp.append(count)
    node_data_temp.append(unique)
    node_data_temp.append(unique_ratio)

    node_data_temp.append(in_block_std)
    node_data_temp.append(in_time_ptp)
    node_data_temp.append(in_amount_sum)
    node_data_temp.append(in_amount_max)
    node_data_temp.append(in_amount_min)
    node_data_temp.append(in_amount_mean)
    node_data_temp.append(in_amount_std)
    node_data_temp.append(in_count)
    node_data_temp.append(in_unique)
    node_data_temp.append(in_unique_ratio)





    node_data.append(node_data_temp)




In [None]:
import csv
with open('node_data_features.csv','w',newline='') as file:
    writer=csv.writer(file)
    writer.writerow(['account','out_block_std','out_time_ptp','out_amount_sum','out_amount_max','out_amount_min'
                    ,'out_amount_mean','out_amount_std','out_count','out_unique',
                    'out_unique_ratio',
                    'in_block_std','in_time_ptp','in_amount_sum','in_amount_max','in_amount_min'
                    ,'in_amount_mean','in_amount_std','in_count','in_unique',
                    'in_unique_ratio'

                    ])
    for i in range(len(node_data)):
        writer.writerow(node_data[i])

5. 验证生成是否有误

In [None]:
for i in range(len(legal_node_list)):
    if legal_node_list[i][0]!=node_data[i][0] or (legal_node_list[i][1]!=
                                                 node_data[i][8]+node_data[i][18]):
        print("出现错误")
print("没问题")