Amanda Rodgers
July 18, 2024
Anti-money laundering Project

In [None]:
# Dataset: https://www.kaggle.com/datasets/anshankul/ibm-amlsim-example-dataset?select=transactions.csv
# Tutorial: https://medium.com/@jasonclwu/network-analysis-for-anti-money-laundering-with-python-ad981792a947
# Github: https://github.com/jasonclwu/202209_AML_Network_Analysis/blob/main/AML_Network_Analysis.ipynb

In [None]:
# Import libraries
import pandas as pd
import networkx as nx
from pyvis import network as net
import matplotlib.pyplot as plt
%matplotlib inline
pd.options.display.float_format = '{:,.0f}'.format

In [None]:
# Create df with dataset
txn_small = pd.read_csv('transactions.csv', usecols=['SENDER_ACCOUNT_ID','RECEIVER_ACCOUNT_ID','TX_AMOUNT'], dtype='object')
txn_small['TX_AMOUNT'] = txn_small['TX_AMOUNT'].astype(float)
txn_small

<h2>Sum transaction amounts for each node pair</h2>

In [18]:
# Form nodes and edges by grouping by account numbers with a count of transactions and sum of amounts
# Money came from source account and target is the account it went to
# Create new columns agg_txn_count(number of transactions) and agg_txn_amt (sum of all transactions)
edge_small = txn_small.groupby(['SENDER_ACCOUNT_ID', 'RECEIVER_ACCOUNT_ID'],as_index=False).agg({'TX_AMOUNT':['count','sum']})
edge_small.columns = ['source','target','agg_txn_count','agg_txn_amt']
edge_small

Unnamed: 0,source,target,agg_txn_count,agg_txn_amt
0,1,884,24,4219
1,10,8317,21,2003
2,10,8520,21,2003
3,100,3472,18,978
4,100,9954,18,978
...,...,...,...,...
68942,9999,9866,19,6035
68943,9999,9981,19,6035
68944,9999,9982,19,6035
68945,9999,9985,19,6035


In [None]:
# Make df smaller for faster processing
first_5000_rows_df = edge_small.head(5000)
first_5000_rows_df

<h2>Construct Network with NetworkX</h2>

In [None]:
# Create graph network with networkX
G = nx.from_pandas_edgelist(first_5000_rows_df, source='source', target='target')

In [None]:
# Access node and edge information from G, variable that stores graph
# Show first five nodes of graph, pattern is source, target, source, target, source, target
list(G.nodes)[0:5]

In [None]:
# Show the first 5 edge of G
# Shows ('target', 'source')
list(G.edges)[0:5]

In [None]:
# Show the first 5 edge of G with all attributes with data=True, there are no attributes in this case
list(G.edges(data=True))[0:5]

In [None]:
# Show the first 5 nodes of G with all attributes with data=True, there are no attributes in this case
list(G.nodes(data=True))[0:5]

<h2>Various network layouts</h2>

In [None]:
# Networkx Default Layout
# Create a network graph using networkx
plt.figure(figsize=(8,6))
nx.draw(G)
plt.show()

In [None]:
# Debug: Print the graph information
# print(f"Nodes: {G.nodes()}")
# print(f"Edges: {G.edges()}")

In [None]:
# Create a Pyvis network and visualize the NetworkX graph
# Pyvis Default Layout
nt = net.Network(width='100%', height='100%', directed=True)
nt.from_nx(G)
nt.show('01_pyvis_default.html')

In [None]:
# Debug: Check the Pyvis network
# print(f"Pyvis Nodes: {nt.nodes}")
# print(f"Pyvis Edges: {nt.edges}")

<h3>? can't get plot to render below?</h3>

In [None]:
# Is not working, check to see if file was created and if browser and cloud are compatible
# see if versions of pyvis and networkX are compatible
nt = net.Network(width='100%', height='100%', directed=True)
nt.from_nx(G)
nt.show('01_pyvis_default.html')

In [None]:
# Debug
# Ensure that the Pyvis network object nt is correctly initialized 
# and that the from_nx method is successfully converting the NetworkX graph.
from pyvis.network import Network

nt = Network(width='100%', height='100%', directed=True)
nt.from_nx(G)

# Check the Pyvis network
print(nt.nodes)
print(nt.edges)

In [None]:
# Debug
# update libaries
pip install --upgrade networkx pyvis

In [None]:
pip install --upgrade networkx

In [None]:
pip install --upgrade pyvis

In [None]:
# Debug
# Create a Pyvis network and visualize the NetworkX graph
nt = Network(width='100%', height='100%', directed=True)
nt.from_nx(G)

# Debug: Check the Pyvis network
print(f"Pyvis Nodes: {nt.nodes}")
print(f"Pyvis Edges: {nt.edges}")

# Show the network
nt.show('1_pyvis_default.html')

<h3>Create function for drawing network graph</h3>

In [15]:
def show_graph(graph, filename):
    nt = net.Network(width='100%', height='100%', directed=True)
    nt.from_nx(graph)
    nt.show(filename)

<h3>Set Node Attributes</h3>

<h3>Set the node size according to the degree (number of connections) of node</h3>

In [17]:
# List source, 
# ('source1, is connencted to 1 target'), ('targetamt884', 'is ')
list(G.degree)[0:5]

[('1', 1), ('884', 1), ('10', 2), ('8317', 3), ('8520', 2)]