In [19]:
from pyvis.network import Network
import pandas as pd

In [20]:
fraudulent_device = '91b12379-8098-457f-a2ad-a94d767797c2'
fraudulent_account = '0007f265568f1abc1da791e852877df2047b3af9'
data = pd.read_csv('output.csv')

In [21]:
def prepare_nodes_parameters(df:pd.DataFrame, color: str = 'white') -> dict:
    """
    :param df: pandas.DataFrame, will convert into nodes of graph
    :param color: str, property of node
    :return: result, dict, structure below
    """
    result = {
        'n_id': [],
        'value': [],
        'title': [],
        'label': [],
        'color': [],
        'data': []
    }
    for index, row in df.iterrows():
        result['n_id'].append(index)
        result['value'].append(100)
        result['title'].append(
            f"""device_id: {row['device_id']}
            account: {row['identity']}
            device_fingerprint: {row['device_fingerprint']}
            bank: {row['bank']}
            ip: {row['ips']}
            lat: {row['lat']}
            lon: {row['lon']}
            country: {row['country']}
            city: {row['city']}
            os: {row['os']}
            browser: {row['browser']}
            screen: {row['screen']}
            mobile: {row['mobile']}
            proxy: {row['proxy']}
            """)
        result['label'].append(f'id {str(index)}')
        result['color'].append(color)
        result['data'].append(row[:].to_dict())
    return result

In [22]:
def calc_edge_width_and_title(row1: dict, row2: dict, fields: list = list(data.columns)) -> (int, str):
    """
    Compares each column of both nodes data and calculates how many connections they have.
    If there are connections in device_id, device_fingerprint, identitywill width grow by 10
    :return: width, integer value of edge width
    :return: title, contains names of connection fields, will be shown by hover on edge
    """
    title = ['These fields are equal:']
    width = 0
    for field in fields:
        if row1[field] == row2[field] and field in ['device_id', 'identity', 'device_fingerprint']:
            title.append(field)
            width += 1
    return width, '\n'.join(title)

In [23]:
def one_node_to_many_list(node1: dict, nodes_dict: dict) -> list:
    """
    Creates list of edges which connect with node1
    :param node1: chosen node
    :param nodes_dict: nodes to check for connection with node1
    :return: list of edges, each edge is tuple: (node1_id, node2_id, width, title)
    """
    result = []
    for i in range(len(nodes_dict['n_id'])):
        width, title = calc_edge_width_and_title(node1[list(node1.keys())[0]], nodes_dict['data'][i])
        if width != 0:
            result.append((list(node1.keys())[0], nodes_dict['n_id'][i], width, title))
    return result

In [24]:
compromised_nodes = prepare_nodes_parameters(
    data[
        (data['device_id'] == fraudulent_device)
        | (data['identity'] == fraudulent_account)
    ],
    'red'
)

In [25]:
associated_fingerprints = list(set(data['device_fingerprint'][
    ((data['device_id'] == fraudulent_device)
    | (data['identity'] == fraudulent_account))
    & (data['device_fingerprint'] != 'No data')
].values))

In [26]:
parameters_of_user_agent = {
    'os': [],
    'browser': [],
    'screen': [],
}

for index, row in data[data['device_fingerprint'].isin(associated_fingerprints)].iterrows():
    parameters_of_user_agent['os'].append(row['os'])
    parameters_of_user_agent['browser'].append(row['browser'])
    parameters_of_user_agent['screen'].append(row['screen'])

for key in parameters_of_user_agent.keys():
    parameters_of_user_agent[key] = list(set(parameters_of_user_agent[key]))


In [47]:
associated_nodes = prepare_nodes_parameters(
    data[
        (data['os'].isin(parameters_of_user_agent['os']))
        & (data['browser'].isin(parameters_of_user_agent['browser']))
        & (data['screen'].isin(parameters_of_user_agent['screen']))
        & (data['country'] == 'Russia')
    ],
    'yellow'
)

In [28]:
edge_list = []

for i in range(len(compromised_nodes['n_id'])):
    edge_list += one_node_to_many_list({compromised_nodes['n_id'][i]: compromised_nodes['data'][i]}, associated_nodes)
# edge_list

In [41]:
net = Network(height="750px", width="100%", bgcolor="#222230", font_color="white", notebook=True, cdn_resources='in_line')

In [42]:
net.add_nodes(
    compromised_nodes['n_id'], 
    value=compromised_nodes['value'],
    title=compromised_nodes['title'],
    label=compromised_nodes['label'],
    color=compromised_nodes['color']
)

In [43]:
net.add_nodes(
    associated_nodes['n_id'], 
    value=associated_nodes['value'],
    title=associated_nodes['title'],
    label=associated_nodes['label'],
    color=associated_nodes['color']
)

In [44]:
for edge in edge_list:
    net.add_edge(edge[0], edge[1], width=edge[2], title=edge[3], color='gray')

In [46]:
net.barnes_hut()
net.show('nx.html')

nx.html
