In [1]:
import pandas as pd
import numpy as np

import networkx as nx
from networkx.algorithms.community import k_clique_communities
import random as rd
rd.seed(42)

import warnings
warnings.filterwarnings('ignore')

## Load Data & Obtain Graphs

In [2]:
# Load DataFrame (self-loops removed)
trans_3w = pd.read_csv(r'C:\Users\sarah\Documents\UNI\Masters\Study\Term_3\Master Project\trans_3w_cl.csv')
trans_3w.head()

Unnamed: 0,txn_hash,input_address,output_address,ammount,fees,block_index,block_time,input_flag,output_flag
0,bd36f2ca16e2a2c73c807b7d1569657b30de8453450cd2...,13Uf71d8y94xEk2LX7GCtaBJmPiahhA7TR,16FPyvvz5Ug3cx97qH67KfgC6PY1S9fskQ,24200000.0,320000.0,453318,2017-02-16 12:05:04,0,1
1,8c852e187a0541cd8ea8c93a6c728843b5f8b9c579b6fc...,166zajP74bcRVo7BmdeDME3mRX3Mi9e3xn,1ASaHGPN8qRuqZkpnR7d2tcndU9uHL6aGj,2503.648,3.314845,453318,2017-02-16 12:05:04,0,1
2,8c852e187a0541cd8ea8c93a6c728843b5f8b9c579b6fc...,1LU3DtRE3XK32WxFqrnaT9k99nRgwHtLHd,1ASaHGPN8qRuqZkpnR7d2tcndU9uHL6aGj,635940.2,841.988605,453318,2017-02-16 12:05:04,0,1
3,8c852e187a0541cd8ea8c93a6c728843b5f8b9c579b6fc...,1HVQNFf7vDpJVZk7tEzbFxnmALSezA2qPD,1ASaHGPN8qRuqZkpnR7d2tcndU9uHL6aGj,590236.9,781.47725,453318,2017-02-16 12:05:04,0,1
4,8c852e187a0541cd8ea8c93a6c728843b5f8b9c579b6fc...,1LU3DtRE3XK32WxFqrnaT9k99nRgwHtLHd,1ASaHGPN8qRuqZkpnR7d2tcndU9uHL6aGj,101303.2,134.126076,453318,2017-02-16 12:05:04,0,1


Slef-loops, i.e. rows where the input and output addresses are the same, are excluded because these reflect the remaining balance in an address's crypto walet or the transfer of change. If the transfer of change goes to the same account this is taken as an indication that a user has nothing to hide, but if a user transfers any change received from a transaction to another address this is considered as being an attempt to disperse funds (reduce transparency).

Keeping self-loops also has implications for sampling methods. In the case of node sampling methods, self-loops can result in a sample meeting the edge condition (seen in samplers below) whilst still having several isolated nodes. In the case of random walks, self-loops can cause the walk to circle back and avoid nodes that would otherwise be sampled.

In [3]:
# Obtain full network NetworkX Graph
G_full = nx.from_pandas_edgelist(trans_3w, 'input_address', 'output_address', edge_attr=['txn_hash', 'ammount', 'fees', 
                                'block_index', 'block_time'], create_using=nx.DiGraph())

# Confirm that Graph is Directed
nx.is_directed(G_full)

True

In [4]:
# Generator for giant component
giant = max(nx.connected_components(G_full.to_undirected()))

# Get sub-graph of Giant Component
G_giant = G_full.subgraph(giant)
G_giant = G_giant.to_directed()

print(nx.info(G_giant))

DiGraph with 3852506 nodes and 9804954 edges


In [5]:
# Remove unnecessary objects for memory consideration
del trans_3w
del G_full

## Random Node Sampling

When running the RandomNodeSampler it was noted that the method to an extremely long time to find a representative sample due to 2 limitations: 
1. a large sample, and 
2. a weakly connected component that makes it difficult to satify the edge condition. Hence, the sample of nodes is adjusted to exclude nodes that would slow down this process. 

This sub-sampling is aimed at ensuring that all fraudulent/high-risk nodes are included, and that low centrality nodes are excluded such that they don't slow down the process. 

### Random Node Sampler

In [6]:
# List all nodes in giant component
n_giant = G_giant.nodes

In [7]:
# Get node properties from data exploration exercise
node_properties = pd.read_csv(r'C:\Users\sarah\Documents\UNI\Masters\Study\Term_3\Master Project\node_properties_cl.csv')
node_properties[['degree', 'in_degree', 'out_degree', 'eigen_centrality']].describe()

Unnamed: 0,degree,in_degree,out_degree,eigen_centrality
count,4666873.0,4666873.0,4666873.0,4666873.0
mean,4.494298,2.247149,2.247149,3.970074e-05
std,39.35922,19.98412,28.68249,0.0004611943
min,1.0,0.0,0.0,1.1933219999999999e-21
25%,1.0,0.0,0.0,1.1933219999999999e-21
50%,2.0,1.0,1.0,4.8926199999999995e-20
75%,3.0,1.0,2.0,1.517475e-14
max,27917.0,12116.0,25335.0,0.1642653


In [13]:
# Subset by fraudulent/high-risk nodes

node_prop = node_properties[node_properties['address'].isin(n_giant)]
fr_nodes = node_prop[node_prop['fraud_flag'] == 1]
fr_nodes = fr_nodes['address'].tolist()

In [14]:
# Remove nodes with low centrality

licit_nodes = node_prop[node_prop['fraud_flag'] == 0] # sample from low-risk nodes
licit_nodes = licit_nodes[licit_nodes['eigen_centrality'] > 1.818528e-6] # remove nodes in bottom 84% of centrality
licit_nodes = licit_nodes['address'].tolist()
len(licit_nodes)

363041

*Adjusted methods to account for pre-randomisation sub-sampling approach*

In [15]:
class RandomNodeSampler:
    def __init__(self, graph, fraud_sample, licit_sample, number_of_nodes):
        self.number_of_nodes = number_of_nodes
        self.fraud_sample = fraud_sample
        self.licit_sample = licit_sample 
        
    def sampler(self, graph, fraud_sample, licit_sample, number_of_nodes):
        s_size = self.number_of_nodes - len(fraud_sample) # Number of licit nodes to sample
        rd_nodes = rd.sample(self.licit_sample, s_size) # Randomly select licit of nodes
        rd_nodes = rd_nodes + self.fraud_sample  # Get full list of nodes
        g_sampler = graph.subgraph(rd_nodes) # Obtain sub-graph from randomly selected nodes
        return g_sampler
        
    def sample(self, graph, fraud_sample, licit_sample, number_of_nodes):
        new_graph = self.sampler(graph, self.fraud_sample, self.licit_sample, self.number_of_nodes) # Get randomly sampled graph
        # Edge condition: Number of Edges >= Number of Nodes (Ensures graph is not too sparse)
        while new_graph.number_of_edges() < self.number_of_nodes: # If condition not met
            new_graph = self.sampler(graph, self.fraud_sample, self.licit_sample, self.number_of_nodes) # Get new sub-graph
            if new_graph.number_of_edges() >= self.number_of_nodes:
                        break
        return new_graph

In [16]:
# Instantiate Sampler
rns_generator = RandomNodeSampler(G_giant, fr_nodes, licit_nodes, 50000)

# Obtain sub-sample
G_rns = rns_generator.sample(G_giant, fr_nodes, licit_nodes, 50000)

# Check if directed structure is maintained
nx.is_directed(G_rns)

True

In [17]:
print(nx.info(G_rns))

DiGraph with 50000 nodes and 54227 edges


##### Check Properties of Random Sample

*DataFrame*

In [18]:
# To DataFrame
rns_df = nx.to_pandas_edgelist(G_rns)

# Save sample as .csv and .gml
rns_df.to_csv("G_rns_df.csv", index = False)
nx.write_gml(G_rns, "G_rns.gml")

rns_df.head()

Unnamed: 0,source,target,fees,ammount,block_time,txn_hash,block_index
0,1FtZSDBLEoV3qZPJDdw61sA7vhU11Mr8DP,1JWroayLMZoCVTi4goz79DDqztXjHwremJ,18795.184401,51953840.0,2017-02-25 07:08:47,6071fb7e80a607e5fb9b97b8695ed38ca32c6d8600f842...,454607
1,1JCewT1SCWhvco1AYnFfFQkJmprVdaXgiy,1TipsnxGEhPwNxhAwKouhHgTUnmmuYg9P,6.844207,20000.0,2017-02-27 04:06:19,42edcf3b0bfca8899742cf4e1a6d8260505c268cda2aca...,454925
2,1JCewT1SCWhvco1AYnFfFQkJmprVdaXgiy,19ugaktHY7asfYUBWSefqY6D1JZrnvDbiS,27097.155793,79182750.0,2017-02-27 04:06:19,42edcf3b0bfca8899742cf4e1a6d8260505c268cda2aca...,454925
3,32A7xwi6FUXBQJTM8aqDa6x4MsbS7c1XQg,1JDwWa9SYpGw4bubxtmhK8iD53q6m2NpYb,0.645779,639.2221,2017-02-17 02:32:32,bd813a403a5223fcf530f9c6ee04c5a6e557d0169686fe...,453398
4,32A7xwi6FUXBQJTM8aqDa6x4MsbS7c1XQg,1H6XgajaCidytjBYbMsmEZVM8d7R48TREQ,4.137533,4095.524,2017-02-17 02:32:32,bd813a403a5223fcf530f9c6ee04c5a6e557d0169686fe...,453398


In [19]:
# Number of unique transactions
rns_df['txn_hash'].nunique()

25695

In [20]:
# Number of unique input addresses
rns_df['source'].nunique()

9914

In [21]:
# Number of unique output addresses
rns_df['target'].nunique()

25500

There is an adequate variety in the number of transactions and addresses. Unlike the full sample the number of input and output addresses are not approximately balanced.

In [22]:
# View transaction properties
rns_df.describe()

Unnamed: 0,fees,ammount,block_index
count,54227.0,54227.0,54227.0
mean,6161.057,9025750.0,454433.135375
std,34000.71,142147400.0,950.640021
min,0.0,-2442296000.0,453318.0
25%,20.91602,5430.0,453520.0
50%,199.3147,82947.0,454287.0
75%,2476.953,980535.3,455195.0
max,4080994.0,27425160000.0,456437.0


A distribution skewed towards large amounts is maintained. We also note that the presence of negative values is also kept.

In [23]:
# Number of unique timestamps
rns_df['block_time'].nunique() # Near all timestamps are represented

1491

*Nodes*

In [24]:
# Get Properties

address = [node for (node, val) in G_rns.degree()]
degree = [val for (node, val) in G_rns.degree()]
in_degree = [val for (node, val) in G_rns.in_degree()]
out_degree = [val for (node, val) in G_rns.out_degree()]
eigen_centrality = nx.eigenvector_centrality(G_rns)
eigen_centrality = [eigen_centrality[node] for node in eigen_centrality]

In [25]:
# Add Properties to DataFrame

nodes = pd.DataFrame()
nodes['address'] = address
nodes['degree'] = degree
nodes['in_degree'] = in_degree
nodes['out_degree'] = out_degree
nodes['eigen_centrality'] = eigen_centrality

In [26]:
# Add fraud flag

nodes = nodes.assign(**dict.fromkeys(['fraud_flag'], 0))

for i in fr_nodes:
    nodes.loc[nodes.address == i, 'fraud_flag'] = 1
    
nodes.head()

Unnamed: 0,address,degree,in_degree,out_degree,eigen_centrality,fraud_flag
0,1BKSQsREyrcCop6iuCQdFC67iHKQMM7wo6,0,0,0,2.157823e-29,0
1,3QxFFTEjLiHuMQWqRJijvmiqRKNkGJf1Fx,1,1,0,4.3156460000000005e-28,0
2,15MEbvHRr81dBYueyw8pLyoHVFDjZUQ3qx,0,0,0,2.157823e-29,0
3,34dwbuLrRSFKv92wd77z9NxMwZWEW4LEX3,0,0,0,2.157823e-29,1
4,1FtZSDBLEoV3qZPJDdw61sA7vhU11Mr8DP,4,3,1,0.01605794,0


In [27]:
# Describe sample properties overall
nodes[['degree', 'in_degree', 'out_degree', 'eigen_centrality']].describe()

Unnamed: 0,degree,in_degree,out_degree,eigen_centrality
count,50000.0,50000.0,50000.0,50000.0
mean,2.16908,1.08454,1.08454,0.001234437
std,28.483303,6.594357,23.070714,0.004298434
min,0.0,0.0,0.0,2.157823e-29
25%,0.0,0.0,0.0,2.157823e-29
50%,1.0,1.0,0.0,4.3156460000000005e-28
75%,2.0,1.0,0.0,3.032326e-05
max,3217.0,746.0,2484.0,0.1856914


In [28]:
# Describe Properties of Fraudulent Nodes

nodes_fraud = nodes[nodes['fraud_flag'] == 1]
nodes_fraud[['degree', 'in_degree', 'out_degree', 'eigen_centrality']].describe()

Unnamed: 0,degree,in_degree,out_degree,eigen_centrality
count,4400.0,4400.0,4400.0,4400.0
mean,8.733864,2.405682,6.328182,0.00138786
std,94.830001,21.138997,76.870891,0.006627883
min,0.0,0.0,0.0,2.157823e-29
25%,0.0,0.0,0.0,2.157823e-29
50%,1.0,0.0,0.0,2.157823e-29
75%,2.0,1.0,1.0,2.872062e-26
max,3217.0,746.0,2484.0,0.1856914


In [29]:
# Describe Properties of Non-Fraudulent Nodes

nodes_licit = nodes[nodes['fraud_flag'] == 0]
nodes_licit[['degree', 'in_degree', 'out_degree', 'eigen_centrality']].describe()

Unnamed: 0,degree,in_degree,out_degree,eigen_centrality
count,45600.0,45600.0,45600.0,45600.0
mean,1.535636,0.957061,0.578575,0.001219633
std,4.181196,2.094753,3.262745,0.004002378
min,0.0,0.0,0.0,2.157823e-29
25%,0.0,0.0,0.0,2.157823e-29
50%,1.0,1.0,0.0,4.3156460000000005e-28
75%,2.0,1.0,0.0,3.71545e-05
max,206.0,186.0,189.0,0.03785327


The overall statistics indicate that the sample is not significantly representative of the full network with low degrees and a considerable number of isolated nodes.

*Graph*

In [30]:
# Density
nx.density(G_rns) # Connectivity remains very low

2.1691233824676492e-05

In [31]:
# Communities - k-Cliques

com_generator_rns = k_clique_communities(G_rns.to_undirected(), k=9) # Doesn't work for k>9
com_rns = next(com_generator_rns)
print('The number of communities in randomly sampled graph is : ' + str(len(com_rns))) 

The number of communities in randomly sampled graph is : 14


This indicates that some variety in community structures was maintained. (Remember that the giant component had 107 k-cliques).

### Degree Biased Random Sampler

In [32]:
# Get Licit nodes

licit_nodes = node_prop[node_prop['fraud_flag'] == 0] # sample from low-risk nodes
licit_nodes = licit_nodes['address'].tolist()
len(licit_nodes)

3848106

In [33]:
# Obtain sub-graph with short-listed nodes

G_licit = G_giant.subgraph(licit_nodes)

In [34]:
# Obtain probabilites

dp = node_properties[node_properties['address'].isin(licit_nodes)]
dp_sum = dp['degree'].sum()
dp['p'] = dp['degree'] / dp_sum
p_degree = dp['p'].values

In [35]:
class DegreeBasedSampler:
    def __init__(self, graph, g_licit, fraud_sample, licit_sample, number_of_nodes, p_distribution):
        self.number_of_nodes = number_of_nodes
        self.fraud_sample = fraud_sample
        self.licit_sample = licit_sample
        self.p_distribution = p_distribution

    def sampler(self, graph, g_licit, fraud_sample, licit_sample, number_of_nodes, p_distribution):
        s_size = self.number_of_nodes - len(fraud_sample) # Number of licit nodes to sample
        
        # Sample nodes and create sub-graph
        sampled_nodes = np.random.choice(self.licit_sample, s_size, replace=False, p=self.p_distribution)
        sampled_nodes = list(sampled_nodes) + list(self.fraud_sample)
        g_sampler = graph.subgraph(sampled_nodes)
        return g_sampler
    
    def sample(self, graph, g_licit, fraud_sample, licit_sample, number_of_nodes, p_distribution):
        new_graph = self.sampler(graph, g_licit, self.fraud_sample, self.licit_sample, self.number_of_nodes,
                                 self.p_distribution) # Get randomly sampled graph
        # Edge condition: Number of Edges >= Number of Nodes (Ensures graph is not too sparse)
        while new_graph.number_of_edges() < self.number_of_nodes: # If condition not met
            new_graph = self.sampler(graph, g_licit, self.fraud_sample, self.licit_sample, self.number_of_nodes, 
                                     self.p_distribution) # Get new sub-graph
            if new_graph.number_of_edges() >= self.number_of_nodes:
                        break
        return new_graph

In [36]:
# Instantiate Sampler
dbs_generator = DegreeBasedSampler(G_giant, G_licit, fr_nodes, licit_nodes, 50000, p_degree) 

# Obtain sub-sample
G_dbs = dbs_generator.sample(G_giant, G_licit, fr_nodes, licit_nodes, 50000, p_degree) 

# Check if directed structure is maintained
nx.is_directed(G_dbs) 

True

In [37]:
print(nx.info(G_dbs))

DiGraph with 50000 nodes and 190450 edges


##### Check Properties of Degree Biased Random Sample

*DataFrame*

In [38]:
# To DataFrame
dbs_df = nx.to_pandas_edgelist(G_dbs)

# Save sample as .csv and .gml
dbs_df.to_csv("G_dbs_df.csv", index = False)
nx.write_gml(G_dbs, "G_dbs.gml")

dbs_df.head()

Unnamed: 0,source,target,fees,ammount,block_time,txn_hash,block_index
0,1MD5fBWY55ayQmtwAdB1P8wi9GP1arW8vC,1AdRv7DR7de9KFHgRJN4eYQxaMPgtrRL88,2.975436,96.739224,2017-03-06 08:17:16,4a7b8c0b2eb30207c48a857d79ed40bb532d93a3e62e17...,455985
1,1MD5fBWY55ayQmtwAdB1P8wi9GP1arW8vC,13V2Rgp3m12ySJ4e7RkgJ8q3G7hzCYPTt9,7.252652,235.802765,2017-03-06 08:17:16,4a7b8c0b2eb30207c48a857d79ed40bb532d93a3e62e17...,455985
2,1MD5fBWY55ayQmtwAdB1P8wi9GP1arW8vC,3Nnzx3vhQKXGraVQALjtE1XUYZZdeUhi1r,4.00667,130.267371,2017-03-06 08:17:16,4a7b8c0b2eb30207c48a857d79ed40bb532d93a3e62e17...,455985
3,1MD5fBWY55ayQmtwAdB1P8wi9GP1arW8vC,159YHzUpAeCCAc2X92kttj9BREKWsdcv2y,2.032727,66.089296,2017-03-06 08:17:16,4a7b8c0b2eb30207c48a857d79ed40bb532d93a3e62e17...,455985
4,1MD5fBWY55ayQmtwAdB1P8wi9GP1arW8vC,18UtybHabJ5E9bkGvUBwZQE5TrrREzMzwU,183.118487,5953.662616,2017-03-06 08:17:16,4a7b8c0b2eb30207c48a857d79ed40bb532d93a3e62e17...,455985


In [39]:
# Number of unique transactions
dbs_df['txn_hash'].nunique()

29498

In [40]:
# Number of unique input addresses
dbs_df['source'].nunique()

19453

In [41]:
# Number of unique output addresses
dbs_df['target'].nunique()

19269

The number of unique transactions and addresses is considered to be adequat variety. Near balance between input and output addresses as in full dataset, but the full dataset had slightly moreoutput addresses.

In [42]:
# View transaction properties
dbs_df.describe()

Unnamed: 0,fees,ammount,block_index
count,190450.0,190450.0,190450.0
mean,3756.381,7618166.0,454804.007362
std,47063.23,685423200.0,1027.33261
min,0.0,-2312935000.0,453318.0
25%,7.484877,1267.98,453703.0
50%,63.9418,12881.49,454684.0
75%,468.2257,151866.3,455954.0
max,7203339.0,268336600000.0,456437.0


The distribution of transaction ammounts appears to be adequately representative.

In [43]:
# Number of unique timestamps
dbs_df['block_time'].nunique() # Nearly all timestamps are preserved

1528

*Nodes*

In [44]:
# Get Properties

address = [node for (node, val) in G_dbs.degree()]
degree = [val for (node, val) in G_dbs.degree()]
in_degree = [val for (node, val) in G_dbs.in_degree()]
out_degree = [val for (node, val) in G_dbs.out_degree()]
eigen_centrality = nx.eigenvector_centrality(G_dbs)
eigen_centrality = [eigen_centrality[node] for node in eigen_centrality]

In [45]:
# Add Properties to DataFrame

nodes = pd.DataFrame()
nodes['address'] = address
nodes['degree'] = degree
nodes['in_degree'] = in_degree
nodes['out_degree'] = out_degree
nodes['eigen_centrality'] = eigen_centrality

In [46]:
# Add fraud flag

nodes = nodes.assign(**dict.fromkeys(['fraud_flag'], 0))

for i in fr_nodes:
    nodes.loc[nodes.address == i, 'fraud_flag'] = 1
    
nodes.head()

Unnamed: 0,address,degree,in_degree,out_degree,eigen_centrality,fraud_flag
0,1MD5fBWY55ayQmtwAdB1P8wi9GP1arW8vC,143,0,143,2.7055070000000004e-23,0
1,1AZb7hJLfy7tBWMNaxW3iqJ1nzPsnmnyWe,1,1,0,0.0001269213,0
2,1FtZSDBLEoV3qZPJDdw61sA7vhU11Mr8DP,7,7,0,0.01253217,0
3,1M29XzFhgEvgLn2cFQDwQT32yEEvQAkjUC,0,0,0,2.7055070000000004e-23,0
4,16W56cS8Fo5swUqUXxJUMKgZL4NYCDtRXb,0,0,0,2.7055070000000004e-23,0


In [47]:
# Describe sample properties overall
nodes[['degree', 'in_degree', 'out_degree', 'eigen_centrality']].describe()

Unnamed: 0,degree,in_degree,out_degree,eigen_centrality
count,50000.0,50000.0,50000.0,50000.0
mean,7.618,3.809,3.809,0.001099103
std,33.263619,15.40368,23.790729,0.004335014
min,0.0,0.0,0.0,2.7055070000000004e-23
25%,0.0,0.0,0.0,2.7055070000000004e-23
50%,1.0,0.0,0.0,2.7055070000000004e-23
75%,6.0,2.0,1.0,7.526556e-08
max,3405.0,1019.0,2386.0,0.2272576


In [48]:
# Describe Properties of Fraudulent Nodes

nodes_fraud = nodes[nodes['fraud_flag'] == 1]
nodes_fraud[['degree', 'in_degree', 'out_degree', 'eigen_centrality']].describe()

Unnamed: 0,degree,in_degree,out_degree,eigen_centrality
count,4400.0,4400.0,4400.0,4400.0
mean,8.600909,3.684773,4.916136,0.001337366
std,89.854988,28.793855,62.981243,0.00749485
min,0.0,0.0,0.0,2.7055070000000004e-23
25%,0.0,0.0,0.0,2.7055070000000004e-23
50%,1.0,0.0,0.0,2.7055070000000004e-23
75%,3.0,1.0,1.0,3.909896e-07
max,3405.0,1019.0,2386.0,0.2272576


In [49]:
# Describe Properties of Non-Fraudulent Nodes

nodes_licit = nodes[nodes['fraud_flag'] == 0]
nodes_licit[['degree', 'in_degree', 'out_degree', 'eigen_centrality']].describe()

Unnamed: 0,degree,in_degree,out_degree,eigen_centrality
count,45600.0,45600.0,45600.0,45600.0
mean,7.523158,3.820987,3.702171,0.001076113
std,20.838201,13.42326,15.421321,0.00389623
min,0.0,0.0,0.0,2.7055070000000004e-23
25%,0.0,0.0,0.0,2.7055070000000004e-23
50%,1.0,0.0,0.0,2.7055070000000004e-23
75%,6.0,2.0,1.0,5.688494e-08
max,503.0,308.0,404.0,0.07899885


Degree and centrality measures are significantly skewed towards higher amounts. This bias  is more severe than in the previous sampling method.

*Graph*

In [50]:
# Density
nx.density(G_dbs) # connectivity remains low

7.618152363047261e-05

In [51]:
# Communities - k-Cliques

com_generator_dbs = k_clique_communities(G_dbs.to_undirected(), k=10)
com_dbs = next(com_generator_dbs)
print('The number of communities in randomly sampled graph is : ' + str(len(com_dbs)))

The number of communities in randomly sampled graph is : 11


This indicates the preservation of community structures. (Note: more communities than random sampler)