Amanda Rodgers
July 18, 2024
Anti-money laundering Project

In [None]:
# Dataset: https://www.kaggle.com/datasets/anshankul/ibm-amlsim-example-dataset?select=transactions.csv
# Tutorial: https://medium.com/@jasonclwu/network-analysis-for-anti-money-laundering-with-python-ad981792a947
# Github: https://github.com/jasonclwu/202209_AML_Network_Analysis/blob/main/AML_Network_Analysis.ipynb

In [27]:
# Import libraries
import pandas as pd
import networkx as nx
from pyvis.network import Network as net
# from pyvis import Network as net
import matplotlib.pyplot as plt
%matplotlib inline
pd.options.display.float_format = '{:,.0f}'.format

In [None]:
# Create df with dataset
txn_small = pd.read_csv('transactions.csv', usecols=['SENDER_ACCOUNT_ID','RECEIVER_ACCOUNT_ID','TX_AMOUNT'], dtype='object')
txn_small['TX_AMOUNT'] = txn_small['TX_AMOUNT'].astype(float)
txn_small

<h2>Sum transaction amounts for each node pair</h2>

In [None]:
# Form nodes and edges by grouping by account numbers with a count of transactions and sum of amounts
# Money came from source account and target is the account it went to
# Create new columns agg_txn_count(number of transactions) and agg_txn_amt (sum of all transactions)
edge_small = txn_small.groupby(['SENDER_ACCOUNT_ID', 'RECEIVER_ACCOUNT_ID'],as_index=False).agg({'TX_AMOUNT':['count','sum']})
edge_small.columns = ['source','target','agg_txn_count','agg_txn_amt']
edge_small

In [None]:
# Make df smaller for faster processing
first_5000_rows_df = edge_small.head(5000)
first_5000_rows_df

<h2>Construct Network with NetworkX</h2>

In [28]:
# Create graph network with networkX
G = nx.from_pandas_edgelist(first_5000_rows_df, source='source', target='target')

In [29]:
# Access node and edge information from G, variable that stores graph
# Show first five nodes of graph, pattern is source, target, source, target, source, target
list(G.nodes)[0:5]

['1', '884', '10', '8317', '8520']

In [None]:
# Show the first 5 edge of G
# Shows ('target', 'source')
list(G.edges)[0:5]

In [None]:
# Show the first 5 edge of G with all attributes with data=True, there are no attributes in this case
# [('source', 'target' {attributes})]
list(G.edges(data=True))[0:5]

In [None]:
# Show the first 5 nodes of G with all attributes with data=True, there are no attributes in this case
list(G.nodes(data=True))[0:5]

<h2>Various network layouts</h2>

In [None]:
# Networkx Default Layout
# Create a network graph using networkx
plt.figure(figsize=(8,6))
nx.draw(G)
plt.show()

In [None]:
# Debug: Print the graph information
# print(f"Nodes: {G.nodes()}")
# print(f"Edges: {G.edges()}")

In [None]:
# Create a Pyvis network and visualize the NetworkX graph
# Pyvis Default Layout
# nt = net.Network(width='100%', height='100%', directed=True)
# nt.from_nx(G)
# nt.show('01_pyvis_default.html')

In [None]:
# Debug: Check the Pyvis network
# print(f"Pyvis Nodes: {nt.nodes}")
# print(f"Pyvis Edges: {nt.edges}")

<h3>? can't get plot to render below?</h3>

In [30]:
# Is not working, check to see if file was created and if browser and cloud are compatible
# see if versions of pyvis and networkX are compatible
# ERROR AttributeError: module 'pyvis.network' has no attribute 'network'(pyvis object net.network is not
# being created)

nt = net.network(width='100%', height='100%', directed=True)
nt.from_nx(G)
nt.show('01_pyvis_default.html')

AttributeError: type object 'Network' has no attribute 'network'

In [None]:
# Debug
# Ensure that the Pyvis network object nt is correctly initialized 
# and that the from_nx method is successfully converting the NetworkX graph.
from pyvis.network import Network

nt = Network(width='100%', height='100%', directed=True)
nt.from_nx(G)

# Check the Pyvis network
print(nt.nodes)
print(nt.edges)

In [None]:
# Debug
# update libaries
pip install --upgrade networkx pyvis

In [None]:
pip install --upgrade networkx

In [None]:
pip install --upgrade pyvis

In [None]:
# Debug
# Create a Pyvis network and visualize the NetworkX graph
nt = Network(width='100%', height='100%', directed=True)
nt.from_nx(G)

# Debug: Check the Pyvis network
print(f"Pyvis Nodes: {nt.nodes}")
print(f"Pyvis Edges: {nt.edges}")

# Show the network
nt.show('1_pyvis_default.html')

<h3>Create function for drawing network graph</h3>

In [None]:
def show_graph(graph, filename):
    nt = net.Network(width='100%', height='100%', directed=True)
    nt.from_nx(graph)
    nt.show(filename)

<h3>Set Node Attributes</h3>

<h3>Set the node size according to the degree (number of connections) of node</h3>

In [None]:
# List source, 
# ?? follow on graph what these mean('source1', has 1 connection), ('targetamt884', 'is connected with source1'), ('source10', 'has 2 connections')
list(G.degree)[0:5]

In [None]:
degree_dict_G = dict(G.degree)
nx.set_node_attributes(G, degree_dict_G, 'value')

In [None]:
# List graph dictionary
# size = number of connections to that node
list(G.nodes(data=True))[0:5]

<h3>Graph won't render, pyvis net.network object never created</h3>

In [None]:
show_graph(G, '02_node_size.html')