Amanda Rodgers
AML Graph Project
July 17, 2024

In [None]:
pip install pyvis

In [None]:
import pandas as pd
import networkx as nx
from pyvis import network as net
import matplotlib.pyplot as plt
%matplotlib inline
pd.options.display.float_format = '{:,.0f}'.format

In [None]:
# Create fake df for transactions

import pandas as pd
import numpy as np
import random

# Seed for reproducibility
np.random.seed(42)
random.seed(42)

# Generate 100 rows of data
n_rows = 100

# Generate random account numbers as strings for SENDER_ACCOUNT_ID and RECEIVER_ACCOUNT_ID
sender_account_ids = [f"ACC{random.randint(100000, 999999)}" for _ in range(n_rows)]
receiver_account_ids = [f"ACC{random.randint(100000, 999999)}" for _ in range(n_rows)]

# Generate random float numbers for TX_AMOUNT
tx_amounts = np.round(np.random.uniform(10.0, 1000.0, n_rows), 2)

# Create the DataFrame
txn_small = pd.DataFrame({
    'SENDER_ACCOUNT_ID': sender_account_ids,
    'RECEIVER_ACCOUNT_ID': receiver_account_ids,
    'TX_AMOUNT': tx_amounts
})

# Display the first few rows of the DataFrame
print(txn_small.head())


<h2>Sum transaction amounts for each node pair</h2>

In [None]:
# Form nodes and edges by grouping by account numbers with a count of transactions and sum of amounts
# Source account money came from and target is the account it went to
edge_small = txn_small.groupby(['SENDER_ACCOUNT_ID', 'RECEIVER_ACCOUNT_ID'],as_index=False).agg({'TX_AMOUNT':['count','sum']})
edge_small.columns = ['source','target','agg_txn_count','agg_txn_amt']
edge_small

In [None]:
# Sort sum of transactions to see if all there is only 1 transaction per each source and target
# This data set has all unique rows and only one transaction per each source and target, no anomalies
edge_small_sorted_desc = edge_small.sort_values(by='agg_txn_count', ascending=False)
print(edge_small_sorted_desc)

<h2>Construct Network with NetworkX</h2>

In [None]:
# Create graph network with networkX
G = nx.from_pandas_edgelist(edge_small, source='source', target='target')

In [None]:
# Access node and edge information from G, variable that stores graph
# Show first five nodes of graph, pattern is source, target, source, target, source, target
list(G.nodes)[0:5]

In [None]:
# Show the first 5 edge of G
# Shows ('target', 'source')
list(G.edges)[0:5]

In [None]:
# Show the first 5 edge of G with all attributes with data=True, there are no attributes in this case
list(G.edges(data=True))[0:5]

In [None]:
# Show the first 5 nodes of G with all attributes with data=True, there are no attributes in this case
list(G.nodes(data=True))[0:5]

<h2>Various network layout</h2>

In [None]:
# Networkx Default Layout
plt.figure(figsize=(8,6))
nx.draw(G)
plt.show()