In [2]:
import os

os.environ['DGLBACKEND'] = "pytorch"
import dgl
import torch
import pandas as pd
from dgl.data import DGLDataset
import networkx as nx

In [None]:
def parse_edgelist(edges, id_to_node, header=False, source_type='user', sink_type='user'):
    """
    Parse an edgelist path file and return the edges as a list of tuple
    :param edges: path to comma separated file containing bipartite edges with header for edgetype
    :param id_to_node: dictionary containing mapping for node names(id) to dgl node indices
    :param header: boolean whether or not the file has a header row
    :param source_type: type of the source node in the edge. defaults to 'user' if no header
    :param sink_type: type of the sink node in the edge. defaults to 'user' if no header.
    :return: (list, dict) a list containing edges of a single relationship type as tuples and updated id_to_node dict.
    """
    edge_list = []
    source_pointer, sink_pointer = 0, 0
    with open(edges, "r") as fh:
        for i, line in enumerate(fh):
            source, sink = line.strip().split(",")
            if i == 0:
                if header:
                    source_type, sink_type = source, sink
                if source_type in id_to_node:
                    source_pointer = max(id_to_node[source_type].values()) + 1
                if sink_type in id_to_node:
                    sink_pointer = max(id_to_node[sink_type].values()) + 1
                continue

            source_node, id_to_node, source_pointer = _get_node_idx(id_to_node, source_type, source, source_pointer)
            if source_type == sink_type:
                sink_node, id_to_node, source_pointer = _get_node_idx(id_to_node, sink_type, sink, source_pointer)
            else:
                sink_node, id_to_node, sink_pointer = _get_node_idx(id_to_node, sink_type, sink, sink_pointer)

            edge_list.append((source_node, sink_node))

    return edge_list, id_to_node, source_type, sink_type

In [None]:
def get_features(node_feature_file_path):
    """
    get node features
    return: node feature matrix in order
    """
    for node_file in node_feature_file_path:
        is_1st_line = True
        with open(node_file, "r") as fh:
            for line in fh:
                if is_1st_line:
                    is_1st_line = False
                
                node_feats = line.strip().split(",")

In [None]:
def build_graph_bipartite():
    

1. We built a map to assign a node_id to each merchant or customer.
2. Multiple transactions are aggregated in a single transaction.
3. The networkx function, nx.from_edgelist, is used to build the networkx graph.
4. Two attributes, namely, weight and label, are assigned to each edge. The former represents the total number of transactions between the two nodes, whereas the latter indicates whether the transaction is genuine or fraudulent.

In [None]:
# Create a graph in NetworkX
G_nx = nx.build_graph_bipartite()

# Convert the NetworkX graph into a DGL graph
G_dgl = dgl.from_networkx(G_nx)