In [1]:
import pandas as pd
import numpy as np
import holoviews as hv
from holoviews.streams import Stream, param
import panel as pn
from holoviews.operation.datashader import dynspread, datashade
import time
import multiprocessing as mp

hv.extension('bokeh')
pn.extension(comms='ipywidgets')

In [2]:
N = 8
node_indices = np.arange(N)
source = np.zeros(N)
target = node_indices

graph = hv.Graph(((source, target),))
graph

In [3]:
hv.help(hv.Graph)

Graph

Online example: http://holoviews.org/reference/elements/bokeh/Graph.html

[1;35m-------------
Style Options
-------------[0m

	cmap, edge_alpha, edge_cmap, edge_color, edge_fill_alpha, edge_fill_color, edge_hover_alpha, edge_hover_color, edge_hover_fill_alpha, edge_hover_fill_color, edge_hover_line_alpha, edge_hover_line_cap, edge_hover_line_color, edge_hover_line_dash, edge_hover_line_join, edge_hover_line_width, edge_line_alpha, edge_line_cap, edge_line_color, edge_line_dash, edge_line_join, edge_line_width, edge_muted, edge_muted_alpha, edge_muted_color, edge_muted_fill_alpha, edge_muted_fill_color, edge_muted_line_alpha, edge_muted_line_cap, edge_muted_line_color, edge_muted_line_dash, edge_muted_line_join, edge_muted_line_width, edge_nonselection_alpha, edge_nonselection_color, edge_nonselection_fill_alpha, edge_nonselection_fill_color, edge_nonselection_line_alpha, edge_nonselection_line_cap, edge_nonselection_line_color, edge_nonselection_line_dash, edge_nonselection_li

In [4]:
graph.select(index=([0,1,2,3]), selection_mode='nodes')

In [5]:
def get_graph(node_indices):
    return graph.select(index=node_indices, selection_mode='nodes')

def get_balance(index):
    print(index)
    return graph.select(index=(index), selection_mode='nodes')

node_indices = Stream.define('node_indices', node_indices=[0, 1, 2])
g = hv.DynamicMap(get_graph, streams=[node_indices()])

s = hv.streams.Selection1D(source=graph.nodes)
b = hv.DynamicMap(get_balance, streams=[s])

graph.nodes.opts(color='grey', alpha=0.2, size=16) * g + b

[]


BokehModel(combine_events=True, render_bundle={'docs_json': {'dce7c057-4fbc-4e50-ae09-0029f8ede9ed': {'version…

In [6]:
DATASET = '10K_accts'

df = pd.read_csv(f'../AMLsim/outputs/{DATASET}/tx_log.csv')

df = df.loc[df['nameOrig']!=-2]
df = df.loc[df['nameDest']!=-1]
df.reset_index(inplace=True, drop=True)

df

Unnamed: 0,step,type,amount,nameOrig,bankOrig,daysInBankOrig,phoneChangesOrig,oldbalanceOrig,newbalanceOrig,nameDest,bankDest,daysInBankDest,phoneChangesDest,oldbalanceDest,newbalanceDest,isSAR,alertID,modelType
0,1,TRANSFER,99.09,9385,svea,2,0,22178.28,22079.19,190,swedbank,1,0,33487.28,33586.37,0,-1,11
1,1,TRANSFER,87.78,8002,ica,2,0,21946.93,21859.14,266,swedbank,2,0,17218.82,17306.61,0,-1,11
2,1,TRANSFER,100.42,8341,ica,2,0,12644.45,12544.02,117,swedbank,1,0,27368.89,27469.31,0,-1,11
3,1,TRANSFER,105.11,9020,marginalen,2,0,19862.33,19757.21,3818,handelsbanken,1,0,18991.87,19096.98,0,-1,10
4,1,TRANSFER,94.00,8278,ica,2,0,26669.23,26575.22,316,swedbank,1,0,38575.03,38669.03,0,-1,11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157188,366,TRANSFER,100.00,1,swedbank,367,0,117296.14,117196.13,8006,ica,367,0,363416.15,363516.15,0,-1,10
157189,366,TRANSFER,99.15,639,swedbank,366,0,73508.19,73409.03,9853,skandia,367,0,274016.18,274115.34,0,-1,10
157190,366,TRANSFER,94.45,1604,swedbank,366,0,309211.62,309117.15,5989,nordea,367,0,436544.46,436638.93,0,-1,10
157191,366,TRANSFER,105.65,602,swedbank,367,0,510913.37,510807.71,7862,ica,367,0,349077.87,349183.53,0,-1,10


In [7]:
graph_df = df[['nameOrig', 'nameDest', 'step', 'amount', 'modelType', 'isSAR']].rename(columns={'nameOrig': 'source', 'nameDest': 'target', 'modelType': 'model_type', 'isSAR': 'is_sar'})

display(graph_df)

df1 = df[['nameOrig', 'bankOrig']].rename(columns={'nameOrig': 'name', 'bankOrig': 'bank'})
df2 = df[['nameDest', 'bankDest']].rename(columns={'nameDest': 'name', 'bankDest': 'bank'})
node_df = pd.concat([df1, df2]).drop_duplicates().reset_index(drop=True)
# add random x and y coordinates
node_df['x'] = np.random.random(size=node_df.shape[0])
node_df['y'] = np.random.random(size=node_df.shape[0])
# spread coordinates in different directions based on bank
node_df['x'] = node_df['x'] + node_df['bank'].astype('category').cat.codes * 0.3
node_df['y'] = node_df['y'] + node_df['bank'].astype('category').cat.codes * 0.3
#node_df.drop(columns=['bank'], inplace=True)
#node_df.rename(columns={'name': 'index'}, inplace=True)
display(node_df)

Unnamed: 0,source,target,step,amount,model_type,is_sar
0,9385,190,1,99.09,11,0
1,8002,266,1,87.78,11,0
2,8341,117,1,100.42,11,0
3,9020,3818,1,105.11,10,0
4,8278,316,1,94.00,11,0
...,...,...,...,...,...,...
157188,1,8006,366,100.00,10,0
157189,639,9853,366,99.15,10,0
157190,1604,5989,366,94.45,10,0
157191,602,7862,366,105.65,10,0


Unnamed: 0,name,bank,x,y
0,9385,svea,3.095194,3.446581
1,8002,ica,1.194418,1.157698
2,8341,ica,0.904417,1.499281
3,9020,marginalen,1.791491,2.309107
4,8278,ica,1.228694,1.423808
...,...,...,...,...
9995,7271,danske,0.580376,0.499050
9996,7100,nordea,2.575404,2.141340
9997,7114,nordea,1.850162,2.464056
9998,7131,nordea,2.784793,2.520360


In [8]:
nodes = hv.Nodes((node_df['x'], node_df['y'], node_df['name']))
graph = hv.Graph(((graph_df['source'], graph_df['target']),nodes))

In [9]:
def get_graph(node_indices):
    return graph.select(index=node_indices, selection_mode='nodes') #.opts(directed=True, node_size=5, arrowhead_length=0.0005)

def get_graph2(node_indices):
    t0 = time.time()
    n = node_df.query('name in @node_indices')
    e = graph_df.query('source in @node_indices and target in @node_indices')
    t1 = time.time() 
    print(t1 - t0)
    nodes = hv.Nodes((n['x'], n['y'], n['name'])) #.opts(size=5)
    graph = hv.Graph(((e['source'], e['target']),nodes)) #.opts(directed=True, arrowhead_length=0.0005)
    t2 = time.time()
    print(t2 - t1)
    return graph

idxs = list(range(0,10000))
node_indices = Stream.define('node_indices', node_indices=idxs)
g = hv.DynamicMap(get_graph2, streams=[node_indices()])

#dynspread(datashade(graph.select(index=idxs, selection_mode='nodes')))
g * graph.nodes.opts(color='grey', alpha=0.1, size=5)

0.010344266891479492
0.0033729076385498047


BokehModel(combine_events=True, render_bundle={'docs_json': {'2a2d4203-2e30-47d0-94bd-e8d46a94408e': {'version…

In [10]:
g.event(node_indices=list(range(0,10000)))

In [11]:
edge_df = graph_df
# from node_df add x0, y0, x1, y1 to edge_df
edge_df['x0'] = edge_df['source'].map(node_df.set_index('name')['x'])
edge_df['y0'] = edge_df['source'].map(node_df.set_index('name')['y'])
edge_df['x1'] = edge_df['target'].map(node_df.set_index('name')['x'])
edge_df['y1'] = edge_df['target'].map(node_df.set_index('name')['y'])

display(edge_df)
display(node_df)

Unnamed: 0,source,target,step,amount,model_type,is_sar,x0,y0,x1,y1
0,9385,190,1,99.09,11,0,3.095194,3.446581,4.275973,3.373273
1,8002,266,1,87.78,11,0,1.194418,1.157698,3.354959,3.494315
2,8341,117,1,100.42,11,0,0.904417,1.499281,3.604714,3.541100
3,9020,3818,1,105.11,10,0,1.791491,2.309107,1.478000,1.345597
4,8278,316,1,94.00,11,0,1.228694,1.423808,4.001429,3.693524
...,...,...,...,...,...,...,...,...,...,...
157188,1,8006,366,100.00,10,0,3.306762,4.238752,1.770830,1.669766
157189,639,9853,366,99.15,10,0,3.468204,3.588083,3.014601,3.027060
157190,1604,5989,366,94.45,10,0,3.396211,3.306810,2.520556,2.177201
157191,602,7862,366,105.65,10,0,4.114671,4.056422,1.238378,1.580411


Unnamed: 0,name,bank,x,y
0,9385,svea,3.095194,3.446581
1,8002,ica,1.194418,1.157698
2,8341,ica,0.904417,1.499281
3,9020,marginalen,1.791491,2.309107
4,8278,ica,1.228694,1.423808
...,...,...,...,...
9995,7271,danske,0.580376,0.499050
9996,7100,nordea,2.575404,2.141340
9997,7114,nordea,1.850162,2.464056
9998,7131,nordea,2.784793,2.520360


In [12]:
curves = hv.Curve(([0, 1], [0, 1])) # [x0, x1], [y0, y1]
points = hv.Points(([0, 1], [0, 1])) # [x0, x1], [y0, y1]
curves * points

In [13]:
def get_edge(row):
    return hv.Curve(([row['x0'], row['x1']], [row['y0'], row['y1']])).opts(color='grey')

def get_head(row, length=0.05, thickness=3):
    k = (row['y1']-row['y0'])/(row['x1']-row['x0'])
    head = hv.Curve(data=(
        [row['x1']-np.sign(row['x1']-row['x0']) * length/np.sqrt(1+k**2), row['x1']], [row['y1']-np.sign(row['x1']-row['x0'])* k * length/np.sqrt(1+k**2), row['y1']]
    )).opts(color='black', line_width=thickness)
    return head

nodes = hv.Points(node_df, ['x', 'y'], ['name']).opts(size=5, tools=['tap'])
nodes * get_edge(edge_df.iloc[0]) * get_head(edge_df.iloc[0]) * get_edge(edge_df.iloc[1]) * get_head(edge_df.iloc[1]) * get_edge(edge_df.iloc[2]) * get_head(edge_df.iloc[2])

In [14]:
n_workers = 10
edge_dfs = np.array_split(edge_df, n_workers)

def get_edges(df:pd.DataFrame):
    edges = []
    heads = []
    for row in df.iterrows():
        edges.append(get_edge(row[1]))
        heads.append(get_head(row[1]))
    return (edges, heads)

with mp.Pool(processes=n_workers) as pool:
    res = pool.map(get_edges, edge_dfs)
    
print(res)


  return bound(*args, **kwds)


[([:Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y), :Curve   [x]   (y

In [15]:
len(res[1][1])

15720

In [None]:
def get_edge(row:pd.Series) -> hv.Curve:
    edge = hv.Curve(([row['x0'], row['x1']], [row['y0'], row['y1']]))
    return edge

def get_head(row:pd.Series, length:float=0.05) -> hv.Curve:
    k = (row['y1']-row['y0'])/(row['x1']-row['x0'])
    head = hv.Curve(data=(
        [row['x1']-np.sign(row['x1']-row['x0']) * length/np.sqrt(1+k**2), row['x1']], [row['y1']-np.sign(row['x1']-row['x0'])* k * length/np.sqrt(1+k**2), row['y1']]
    ))
    return head


def get_edges_mp(df:pd.DataFrame, n_workers:int=10) -> (hv.NdOverlay, hv.NdOverlay):
    
    def get_edges(df:pd.DataFrame) -> (dict, dict):
        edges = {}
        heads = {}
        for row in df.iterrows():
            edges[row[0]] = get_edge(row[1])
            heads[row[0]] = get_head(row[1])
        return (edges, heads)
    
    edge_dfs = np.array_split(df, n_workers)
    with mp.Pool(processes=n_workers) as pool:
        res = pool.map(get_edges, edge_dfs)
    edges = {}
    heads = {}
    for i in range(len(res)):
        edges.update(res[i][0])
        heads.update(res[i][1])
    return (hv.NdOverlay(edges, kdims=['index']), hv.NdOverlay(heads, kdims=['index']))

# edges, heads = get_edges_mp(df_edges)
# nodes = get_nodes(df_nodes)

In [None]:
nodes = hv.Nodes((df_nodes['x'], df_nodes['y'], df_nodes['name']))
graph = hv.Graph(((df_edges['source'], df_edges['target']),nodes)).opts(node_size=5, directed=True, arrowhead_length=0.001)

def get_graph(idxs):
    t = time.time()
    g = graph.select(index=idxs, selection_mode='nodes')
    print(time.time()-t)
    return g

stream = Stream.define('idxs', idxs=list(range(8000,10000)))
dmap = hv.DynamicMap(get_graph, streams=[stream()])

dynspread(rasterize(dmap)) * graph.nodes.opts(color='red', size=5, tools=['tap']) # rasterize(dmap, aggregator=ds.any()) # , selector=ds.first('x'), line_width=0.01