Amanda Rodgers
July 18, 2024
Anti-money laundering Project

In [None]:
# Dataset: https://www.kaggle.com/datasets/anshankul/ibm-amlsim-example-dataset?select=transactions.csv
# Tutorial: https://medium.com/@jasonclwu/network-analysis-for-anti-money-laundering-with-python-ad981792a947
# Github: https://github.com/jasonclwu/202209_AML_Network_Analysis/blob/main/AML_Network_Analysis.ipynb

In [None]:
# Import libraries
import pandas as pd
import networkx as nx
from pyvis.network import Network as net
# from pyvis import Network as net
import matplotlib.pyplot as plt
%matplotlib inline
pd.options.display.float_format = '{:,.0f}'.format

In [None]:
# Create df with dataset
txn_small = pd.read_csv('transactions.csv', usecols=['SENDER_ACCOUNT_ID','RECEIVER_ACCOUNT_ID','TX_AMOUNT'], dtype='object')
txn_small['TX_AMOUNT'] = txn_small['TX_AMOUNT'].astype(float)
txn_small

<h2>Sum transaction amounts for each node pair</h2>

In [None]:
# Form nodes and edges by grouping by account numbers with a count of transactions and sum of amounts
# Money came from source account and target is the account it went to
# Create new columns agg_txn_count(number of transactions) and agg_txn_amt (sum of all transactions)
edge_small = txn_small.groupby(['SENDER_ACCOUNT_ID', 'RECEIVER_ACCOUNT_ID'],as_index=False).agg({'TX_AMOUNT':['count','sum']})
edge_small.columns = ['source','target','agg_txn_count','agg_txn_amt']
edge_small

In [33]:
# Make df smaller for faster processing
first_5000_rows_df = edge_small.head(5000)
first_5000_rows_df

Unnamed: 0,source,target,agg_txn_count,agg_txn_amt
0,1,884,24,4219
1,10,8317,21,2003
2,10,8520,21,2003
3,100,3472,18,978
4,100,9954,18,978
...,...,...,...,...
4995,2121,8634,22,467
4996,2121,9388,22,467
4997,2121,9448,22,467
4998,2121,9968,22,467


<h2>Construct Network with NetworkX</h2>

In [None]:
# Create graph network with networkX
G = nx.from_pandas_edgelist(first_5000_rows_df, source='source', target='target')

In [None]:
# Access node and edge information from G, variable that stores graph
# Show first five nodes of graph, pattern is source, target, source, target, source, target
list(G.nodes)[0:5]

In [None]:
# Show the first 5 edge of G
# Shows ('target', 'source')
list(G.edges)[0:5]

In [None]:
# Show the first 5 edge of G with all attributes with data=True, there are no attributes in this case
# [('source', 'target' {attributes})]
list(G.edges(data=True))[0:5]

In [None]:
# Show the first 5 nodes of G with all attributes with data=True, there are no attributes in this case
list(G.nodes(data=True))[0:5]

<h2>Various network layouts</h2>

In [None]:
# Networkx Default Layout
# Create a network graph using networkx
plt.figure(figsize=(8,6))
nx.draw(G)
plt.show()

In [None]:
# Debug: Print the graph information
# print(f"Nodes: {G.nodes()}")
# print(f"Edges: {G.edges()}")

In [None]:
# Create a Pyvis network and visualize the NetworkX graph
# Pyvis Default Layout
# nt = net.Network(width='100%', height='100%', directed=True)
# nt.from_nx(G)
# nt.show('01_pyvis_default.html')

In [None]:
# Debug: Check the Pyvis network
# print(f"Pyvis Nodes: {nt.nodes}")
# print(f"Pyvis Edges: {nt.edges}")

<h3>? can't get plot to render below?</h3>

In [None]:
# Is not working, check to see if file was created and if browser and cloud are compatible
# see if versions of pyvis and networkX are compatible
# ERROR AttributeError: module 'pyvis.network' has no attribute 'network'(pyvis object net.network is not
# being created)

nt = net.Network(width='100%', height='100%', directed=True)
nt.from_nx(G)
nt.show('01_pyvis_default.html')

In [None]:
# Debug
# Ensure that the Pyvis network object nt is correctly initialized 
# and that the from_nx method is successfully converting the NetworkX graph.
from pyvis.network import Network

nt = Network(width='100%', height='100%', directed=True)
nt.from_nx(G)

# Check the Pyvis network
print(nt.nodes)
print(nt.edges)

In [None]:
pip install --upgrade networkx

In [None]:
pip install --upgrade pyvis

In [None]:
# Debug
# Create a Pyvis network and visualize the NetworkX graph
nt = Network(width='100%', height='100%', directed=True)
nt.from_nx(G)

# Debug: Check the Pyvis network
print(f"Pyvis Nodes: {nt.nodes}")
print(f"Pyvis Edges: {nt.edges}")

# Show the network
nt.show('1_pyvis_default.html')

<h3>Create function for drawing network graph</h3>

In [None]:
def show_graph(graph, filename):
    nt = net.Network(width='100%', height='100%', directed=True)
    nt.from_nx(graph)
    nt.show(filename)

<h3>Set Node Attributes</h3>

<h3>Set the node size according to the degree (number of connections) of node</h3>

In [23]:
# List source, 
# ?? follow on graph what these mean('source1', has 1 connection), ('targetamt884', 'is connected with source1'), ('source10', 'has 2 connections')
list(G.degree)[0:5]

[('1', 1), ('884', 1), ('10', 2), ('8317', 3), ('8520', 2)]

In [25]:
# Creates a dictionary of key = node id and value = node degree (how many connections that node has)
# Nodes with a bigger degree will appear bigger in the graph
degree_dict_G = dict(G.degree)
nx.set_node_attributes(G, degree_dict_G, 'value')

In [26]:
# List graph dictionary of nodes with attributes size and value being number of connections (same as size but pyvis 
# recognizes it different)
# size = number of connections to that node
# Below shows that the new attribute value has been added
list(G.nodes(data=True))[0:5]

[('1', {'size': 10, 'value': 1}),
 ('884', {'size': 10, 'value': 1}),
 ('10', {'size': 10, 'value': 2}),
 ('8317', {'size': 10, 'value': 3}),
 ('8520', {'size': 10, 'value': 2})]

<h3>Graph won't render, pyvis net.network object never created</h3>

In [28]:
show_graph(G, '02_node_size.html')

NameError: name 'show_graph' is not defined

<h3> ?? graph won't render</h3>

In [29]:
# Show graph to that nodes with higher degrees are larger
# Besides using degree as the centrality measurement, 
# you can also use other measurements under graph theory (link) such as eigenvector, closeness, betweenness etc
# For any above, you have to obtain the measurement for each node in a dictionary form, then pass the dictionary 
# to the node attribute with attribute name of value.
nt.show_graph()

AttributeError: 'Network' object has no attribute 'show_graph'

<h3>Display Node Information on Hover</h3>

<h3> graph will not render???</h3>

In [30]:
# Use title attribute to show info of node when you hover over it
# if you want to add more info in addition to account number, pass more information to the title attribute
title_dict_G = {}
for node in list(G.nodes):
    title_dict_G[node] = f'id: {node}'
nx.set_node_attributes(G, title_dict_G, 'title')
show_graph(G, '03_node_title_1.html')

NameError: name 'show_graph' is not defined

<h3>Retrieve neighbors for each node</h3>

In [34]:
# List neighbors of node 2121
list(G.neighbors('2121'))[0:5]

['3674', '6042', '6735', '7028', '8634']

In [35]:
# Create a dictionary of neighbor list for each node
neighbor_dict_G = {}
for node in list(G.nodes):
    neighbor_dict_G[node] = ','.join(G.neighbors(node))

<h3> ? graph won't show, error: NameError: name 'show_graph' is not defined</h3>

In [36]:
# Incorporate the dictionary to the attribute title to display: 
# Display ID, Degree and Neighbors in Graph (Hover on Node)
title_dict_G = {}
for node in list(G.nodes):
    title_dict_G[node] = f'id: {node}\ndegree: {degree_dict_G[node]}\nneighbor: {neighbor_dict_G[node]}'
nx.set_node_attributes(G, title_dict_G, 'title')

show_graph(G, '04_node_title_2.html')

NameError: name 'show_graph' is not defined