# Analysis Network

This notebook will look at some network metrics to include in Deprez, B., Vanderschueren, T., Baesens, B., Verdonck, T., & Verbeke, W. (2024). Network Analytics for Anti-Money Laundering--A Systematic Literature Review and Experimental Evaluation. arXiv preprint arXiv:2405.19383.

In [None]:
import os

os.chdir("../")

from src.data.DatasetConstruction import *

In [None]:
# Import of required libraries and packages
import igraph as ig
import easygui
from matplotlib import pyplot as plt
import numpy as np
import scipy
import statistics
import pandas as pd
print(ig.__version__)

## Elliptic

We start with the analysis of the elliptic dataset. 

In [None]:
ntw_elliptic = load_elliptic()
ntw_elliptic_nx = ntw_elliptic.get_network_nx()

In [None]:
ntw_elliptic_nx.number_of_nodes()

In [None]:
graph_el = ig.Graph.from_networkx(ntw_elliptic_nx)
degree_el = graph_el.degree(); print('degree done')
betweenness_el = graph_el.betweenness(); print('betweenness done')
avg_path_length_el = graph_el.average_path_length(); print('avg_path_length done')
closeness_el = graph_el.closeness(); print('closeness done')
pagerank_el = graph_el.pagerank(); print('pagerank done')
clustering_el = graph_el.transitivity_undirected(); print('clustering done')

In [None]:
print('Elliptic')
print('Avg Degree: ', np.mean(degree_el))
print('Avg Path Length: ', avg_path_length_el)
print('Clustering: ', clustering_el)

In [None]:
plt.hist(betweenness_el, bins = 100, label='degree')
plt.xscale('log')
plt.yscale('log')

In [None]:
plt.hist(closeness_el, bins = 50, label='degree')
#plt.xscale('log')
#plt.yscale('log')

In [None]:
uq_pos_degree = sorted(set(degree_el))
out_hist = [degree_el.count(x) for x in uq_pos_degree]

x_el = np.asarray(uq_pos_degree, dtype = float)
y_el = np.asarray(out_hist, dtype = float)

plt.scatter(x_el, y_el, label='degree')
plt.xscale('log')
plt.yscale('log')

In [None]:
plt.scatter(degree_el, betweenness_el, label='betweenness')
plt.xscale('log')
plt.yscale('log')

# IBM-AML

In [None]:
ntw_ibm = load_ibm()
ntw_ibm_nx = ntw_ibm.get_network_nx()

In [None]:
ntw_ibm_nx.number_of_edges()

In [None]:
graph_ibm = ig.Graph.from_networkx(ntw_ibm_nx)
degree_ibm = graph_ibm.degree(); print('degree done')
betweenness_ibm = graph_ibm.betweenness(); print('betweenness done')
avg_path_length_ibm = graph_ibm.average_path_length(); print('avg_path_length done')
closeness_ibm = graph_ibm.closeness(); print('closeness done')
pagerank_ibm = graph_ibm.pagerank(); print('pagerank done')
clustering_ibm = graph_ibm.transitivity_undirected(); print('clustering done')

In [None]:
print('IBM')
print('Avg Degree: ', np.mean(degree_ibm))
print('Avg Path Length: ', avg_path_length_ibm)
print('Clustering: ', clustering_ibm)

In [None]:
plt.hist(degree_ibm, bins = 200, label='degree')
plt.xscale('log')
plt.yscale('log')

In [None]:
plt.hist(closeness_ibm,bins=50, label='closeness')

In [None]:
uq_pos_degree = sorted(set(degree_ibm))
out_hist = [degree_ibm.count(x) for x in uq_pos_degree]

x_ibm = np.asarray(uq_pos_degree, dtype = float)
y_ibm = np.asarray(out_hist, dtype = float)

plt.scatter(x_ibm, y_ibm, label='degree')
plt.xscale('log')
plt.yscale('log')

In [None]:
plt.scatter(degree_ibm, betweenness_ibm, label='betweenness')
plt.xscale('log')
plt.yscale('log')

## Comparison

In [None]:
plt.scatter(x_el, y_el, label='Elliptic', alpha=0.6)
plt.scatter(x_ibm, y_ibm, label='IBM-AML', alpha=0.6)
plt.legend()
plt.title('Degree distribution')
plt.xlabel('Degree (log)')
plt.ylabel('Frequency (log)')
plt.xscale('log')
plt.yscale('log')
plt.tight_layout()
plt.savefig('degree_distribution_elliptic_ibm.pdf')

In [None]:
bins = np.linspace(0, 1, 100)
plt.hist(closeness_el, bins = bins, label='Elliptic', alpha=0.7)
plt.hist(closeness_ibm, bins = bins, label='IBM-AML', alpha=0.7)
plt.title('Closeness distribution')
plt.legend()
plt.xlabel('Closeness Centrality')
plt.ylabel('Frequency')
plt.tight_layout()
plt.savefig('closeness_distribution_elliptic_ibm.pdf')

In [None]:
2557904/500000