Label 1 -> illicit
Label 2 -> licit

In [10]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import scipy as sp

import community as louvain
from networkx.algorithms.community import girvan_newman

In [2]:
classes = pd.read_csv('elliptic_bitcoin_dataset/elliptic_txs_classes.csv')
edges = pd.read_csv('elliptic_bitcoin_dataset/elliptic_txs_edgelist.csv')
features = pd.read_csv('elliptic_bitcoin_dataset/elliptic_txs_features.csv')

In [3]:
classes.head()

Unnamed: 0,txId,class
0,230425980,unknown
1,5530458,unknown
2,232022460,unknown
3,232438397,2
4,230460314,unknown


In [4]:
edges.head()

Unnamed: 0,txId1,txId2
0,230425980,5530458
1,232022460,232438397
2,230460314,230459870
3,230333930,230595899
4,232013274,232029206


In [5]:
features.head()

Unnamed: 0,230425980,1,-0.1714692896288031,-0.18466755143291433,-1.2013688016765636,-0.12196959975910057,-0.04387454791734898,-0.11300200928476244,-0.06158379407303222,-0.16209679981659642,...,-0.5621534802884299,-0.6009988905192808,1.4613303209554889,1.4613689382001922,0.01827940003744589,-0.0874901561101501,-0.13115530389558736,-0.09752359377152515,-0.12061340670311574,-0.11979245961251665
0,5530458,1,-0.171484,-0.184668,-1.201369,-0.12197,-0.043875,-0.113002,-0.061584,-0.162112,...,0.947382,0.673103,-0.979074,-0.978556,0.018279,-0.08749,-0.131155,-0.097524,-0.120613,-0.119792
1,232022460,1,-0.172107,-0.184668,-1.201369,-0.12197,-0.043875,-0.113002,-0.061584,-0.162749,...,0.670883,0.439728,-0.979074,-0.978556,-0.098889,-0.106715,-0.131155,-0.183671,-0.120613,-0.119792
2,232438397,1,0.163054,1.96379,-0.646376,12.409294,-0.063725,9.782742,12.414558,-0.163645,...,-0.577099,-0.613614,0.241128,0.241406,1.072793,0.08553,-0.131155,0.677799,-0.120613,-0.119792
3,230460314,1,1.011523,-0.081127,-1.201369,1.153668,0.333276,1.312656,-0.061584,-0.163523,...,-0.511871,-0.400422,0.517257,0.579382,0.018279,0.277775,0.326394,1.29375,0.178136,0.179117
4,230459870,1,0.96104,-0.081127,-1.201369,1.303743,0.333276,1.480381,-0.061584,-0.163577,...,-0.504702,-0.422589,-0.22679,-0.117629,0.018279,0.277775,0.413931,1.149556,-0.696053,-0.69554


In [6]:
graph = nx.Graph()

nodes_list = list(classes['txId'])
edges_list = [(edges['txId1'][i], edges['txId2'][i]) for i in range(len(edges))]

In [7]:
# Create the graph
graph.add_nodes_from(nodes_list)
graph.add_edges_from(edges_list)

In [21]:
graph.nodes()

NodeView((230425980, 5530458, 232022460, 232438397, 230460314, 230459870, 230333930, 230595899, 232013274, 232029206, 232344069, 27553029, 36411953, 230405052, 34194980, 5529846, 3881097, 232457116, 230409257, 32877982, 230351738, 195218118, 88008478, 232012569, 232412408, 232412405, 232038018, 232470342, 2925426, 230550393, 232051089, 232470704, 233591710, 234439913, 232945017, 94251207, 6595740, 79840192, 212704383, 212366182, 69254632, 230595355, 166890641, 82304496, 232033533, 230470022, 230598493, 43560505, 14878322, 230415750, 232073372, 75604335, 230473487, 7089694, 231179595, 3307463, 230658679, 232074274, 5234549, 231177927, 232431877, 2758467, 81273879, 232437171, 3878856, 3878886, 231182296, 14660781, 13334987, 5530452, 230519287, 194810163, 230423321, 232399276, 230659456, 230658142, 230347386, 37233982, 230427468, 43358239, 230528714, 75605182, 75604052, 230585122, 231208959, 230455950, 2881274, 232658952, 232658962, 230530291, 2881273, 75604616, 37232637, 230531551, 46193

In [13]:
# Draw the graph
pos = nx.spring_layout(graph, iterations=1000)
# nx.draw(graph, pos=pos, with_labels=True)

# plt.title('Graph')
# plt.show()

KeyboardInterrupt: 

In [18]:
def detect_communities(g, method):
    if isinstance(g, nx.DiGraph):
      g = nx.Graph(g)


    if method == 'girvan-newman':
      communities_generator = girvan_newman(g)
      print(communities_generator)
      communities = []
      for community in communities_generator:
        print(community)
        communities.append([list(c) for c in community])

      total_modularity = 0
      for i in range(len(communities)):
        modularity = nx.algorithms.community.modularity(g, communities[i])
        total_modularity += modularity

      total_modularity /= len(communities)
      return communities, total_modularity

    elif method == 'louvain':
      louvain = louvain.best_partition(g)
      communities = [[] for _ in range(max(louvain.values()) + 1)]
      for node, community_id in louvain.items():
          communities[community_id].append(node)

      modularity = nx.algorithms.community.modularity(g, communities)
      return communities, modularity

    else:
      print('Entered an invalid method')
      return None

In [19]:
communities_girvan, modularities = detect_communities(graph, method='girvan-newman')

<generator object girvan_newman at 0x000001E8D95255D0>


KeyboardInterrupt: 