In [17]:
import snap
import os.path
import itertools
import numpy as np

import util

# Global Variables

In [6]:
__no_label__ = "NO_LABEL"

In [7]:
from enum import Enum
# Constants used in the paper:
# NODE_WEIGHT_EVALUATION("NUMBER_OF_NODES_INSIDE_OF_SN"),
# NODE_WEIGHT_FREQUENCY("NUMBER_OF_SN_INSIDE_OF_HN"),
# NODE_WEIGHT_EVALUATION_AVG("AVG_WEIGHT_ON_SN"),
# PERCENTAGE("PERCENTAGE"),
# NUMBER_OF_INNER_EDGES("NUMBER_OF_INNER_EDGES"),
# LABEL("LABEL"),
# EDGE_RATIO("EDGE_RATIO"),
# GROUPING("GROUPING"),
# REACHABILITY_COUNT("REACH_NUMBER_OF_INNER_PATHS"),
# PATH_OUT("REACH_PATH_OUT_BY_LABEL"),                      # Not used in og repo
# PATH_IN("REACH_PATH_IN_BY_LABEL"),                        # Not used in og repo
# EDGE_WEIGHT("EDGE_WEIGHT"),
# PARTICIPATION_LABEL("PARTICIPATION_LABEL"),
# TRAVERSAL_FRONTIERS("TRAVERSAL_FRONTIERS");

class GraphAttributeConstants(str, Enum):
    SUPER_NODE_ID = "SUPER_NODE_ID"


class NodeAttributeConstants(str, Enum):
    # Compression
    NODE_WEIGHT = "NUMBER_OF_INNER_NODES"
    EDGE_WEIGHT = "NUMBER_OF_INNER_EDGES"

    # NODE_WEIGHT_AVG = "AVG_WEIGHT_ON_SN"
    
    # NODE_WEIGHT_FREQUENCY = "NUMBER_OF_SN_INSIDE_OF_HN"
    
    # LABEL = "LABEL"
    # GROUPING = "GROUPING"

    # EDGE_RATIO = "EDGE_RATIO"
    
    # PARTICIPATION_LABEL = "PARTICIPATION_LABEL"
    # TRAVERSAL_FRONTIERS = "TRAVERSAL_FRONTIERS"

class EdgeAttributeConstants(str, Enum):
    EDGE_WEIGHT = "NUMBER_OF_EDGES"

# Each of the following attributes will exist for each l-label, i.e., we would
# have PERCENTAGE_KNOWS, PERCENTAGE_LIKES, for a graph with l-labels = {KNOWS,
# LIKES}.
class LabelAttributeConstants(str, Enum):
    # Inner-Connectivity
    PERCENTAGE = "LABEL_PERCENTAGE_" # percent of l-labeled inner edges
    REACH = "LABEL_REACH_" # number of pairs connected with an l-labeled inner edge

# Load datasets

In [8]:
# Generic funtion to read edge file into a network
# TODO: 
# allow any graph type not only snap.TNEANet
# check if binary exists, if so, load that (rename function to make graph/net)
def edge_file_to_network(filename, edge_attr, src_node_attr, dst_node_attr, tab_separated=False, dump=False):
    context = snap.TTableContext()

    schema = snap.Schema()
    schema.Add(snap.TStrTAttrPr("srcID", snap.atInt))
    schema.Add(snap.TStrTAttrPr("dstID", snap.atInt))
    
    edgeattrv = snap.TStrV()
    for attr, val in edge_attr.items():
        edgeattrv.Add(attr)
        schema.Add(snap.TStrTAttrPr(attr, val))
    
    srcnodeattrv = snap.TStrV()
    for attr, val in src_node_attr.items():
        srcnodeattrv.Add(attr)
        schema.Add(snap.TStrTAttrPr(attr, val))

    dstnodeattrv = snap.TStrV()
    for attr, val in dst_node_attr.items():
        dstnodeattrv.Add(attr)
        schema.Add(snap.TStrTAttrPr(attr, val))

    if tab_separated:
        separator = '\t'
    else:
        separator = ' '

    table = snap.TTable.LoadSS(schema, filename, context, separator, snap.TBool(False))

    # net will be an object of type snap.TNEANet
    net = table.ToNetwork(snap.TNEANet, "srcID", "dstID", srcnodeattrv, dstnodeattrv, edgeattrv, snap.aaFirst)

    if dump:
        net.Dump()

    # Save to binary
    outfile = filename + ".bin"
    FOut = snap.TFOut(outfile)
    table.Save(FOut)
    FOut.Flush()

    return net

def make_residence_hall_network():
    filename = "data/moreno_oz/out.moreno_oz_oz"

    edge_attr = {"relationship" : snap.atInt}
    src_node_attr = {}
    dst_node_attr = {}

    return edge_file_to_network(filename, edge_attr, src_node_attr, dst_node_attr)

def make_pg_paper_network():
    filename = "data/example/pg_paper.txt"

    edge_attr = {"edgelabel" : snap.atStr}
    src_node_attr = {}
    dst_node_attr = {}

    return edge_file_to_network(filename, edge_attr, src_node_attr, dst_node_attr)

# Grouping

In [10]:
class Session:
    def __init__(self, network):
        self.network = network
        self.labels_freq = {}
        for EI in network.Edges():
            edge_id = EI.GetId()
            attr_values = snap.TStrV()
            network.AttrValueEI(edge_id, attr_values)
            label = attr_values[0]
            self.labels_freq.setdefault(label, 0) + 1
        self.groupings = {}
        for attr in GraphAttributeConstants:
            self.network.AddIntAttrN(attr)

    # The 1st attribute of an edge is the edge label
    def __get_edge_ids_per_label(self):
        labels = {}
        for EI in self.network.Edges():
            edge_id = EI.GetId()
            attr_values = snap.TStrV()
            self.network.AttrValueEI(edge_id, attr_values)
            label = attr_values[0]
            labels.setdefault(label, snap.TIntV()).append(edge_id)
        return labels

    def compute_groupings(self):
        labels_edge_ids = self.__get_edge_ids_per_label()
        node_ids_in_groupings = snap.TIntV()
        for label, edge_ids in sorted(labels_edge_ids.items(), 
                                        key=lambda item: len(item[1]), 
                                        reverse=True):
            grouping = self.network.ConvertESubGraph(snap.TNEANet, edge_ids)
            # We cannot use DelNodes; an exception is thrown if we try to remove a 
            # node that is not there. There is not way to continue after the 
            # exception is thrown, hence not all nodes get removed.
            for node_id in node_ids_in_groupings:
                try:
                    grouping.DelNode(node_id)
                except Exception:
                    pass
            for NI in grouping.Nodes():
                node_ids_in_groupings.append(NI.GetId())
            if not grouping.Empty():
                self.groupings[label] = grouping

        # We must place nodes with degree zero in their own grouping
        in_deg_v = self.network.GetNodeInDegV()
        out_deg_v = self.network.GetNodeOutDegV()
        zero_deg_nodes = snap.TIntV()
        for node_id_in_deg in in_deg_v:
            if node_id_in_deg.GetVal2() == 0:
                zero_deg_nodes.Add(node_id_in_deg.GetVal1())
        for node_id_in_deg in out_deg_v:
            if node_id_in_deg.GetVal2() != 0:
                zero_deg_nodes.DelIfIn(node_id_in_deg.GetVal1())
        if not zero_deg_nodes.Empty():
            grouping = self.network.ConvertSubGraph(snap.TNEANet, zero_deg_nodes)
            self.groupings[__no_label__] = grouping

# Evaluation

In [37]:
class SuperNode:
    def __init__(self, grouping_label, inner_node_ids):
        self.grouping_label = grouping_label
        self.inner_node_ids = inner_node_ids


class Evaluation:
    def __init__(self, session):
        self.session = session
        self.super_edge_id_counter = itertools.count()
        self.super_node_id_counter = itertools.count()
        self.evaluation_graph = snap.TNEANet.New()
        for attr in NodeAttributeConstants:
            self.evaluation_graph.AddFltAttrN(attr.value)
        for attr in LabelAttributeConstants:
            for label in self.session.labels_freq.keys():
                self.evaluation_graph.AddFltAttrN(attr+label)
        for attr in EdgeAttributeConstants:
            self.evaluation_graph.AddFltAttrE(attr.value)
        self.super_nodes = {}


    def __add_super_node(self, super_node):
        super_node_id = next(self.super_node_id_counter)
        self.super_nodes[super_node_id] = super_node
        return super_node_id


    def evaluate(self):
        for label, grouping in self.session.groupings.items():
            print("Size of %s grouping: %d" % (label, grouping.GetNodes()))
            wccs = grouping.GetWccs()
            for wcc in wccs:
                inner_node_ids = snap.TIntV()
                    
                print("-- Size of subgrouping: %d" % wcc.Len())
                # Need to turn the component into a list because ConvertSubGraph 
                # only takes TIntV and component is of type TCnCom. According to 
                # the docs it should be of type TIntV (it essentially is), but I 
                # don't know how to get python to see that.
                # component_as_list = list(component)
                # wcc_sub_graph = self.session.network.ConvertSubGraph(
                #     snap.TNEANet, component_as_list)
                # The problem with the above approach is that ConvertSubGraph
                # will return all the edges connected to the nodes in the 
                # component list. Meaning that the 1st component in the example
                # graph will have over 30 edges. Throughtout the evaluation we 
                # will need the subgraph comprised of only the inner nodes and 
                # edges and also the subgraph comprised of the inner nodes,  
                # all of their edges, as well as all dst nodes.
                
                for node_id in wcc:
                    inner_node_ids.Add(node_id)

                super_node = SuperNode(label, inner_node_ids)
                super_node_id = self.__add_super_node(super_node)

                # Add an attribute to the nodes in the original indicating the 
                # the super node they are a part of
                for node_id in inner_node_ids:
                    self.session.network.AddIntAttrDatN(node_id, super_node_id, 
                    GraphAttributeConstants.SUPER_NODE_ID)


    def build_evaluation_graph(self):
        labels = self.session.labels_freq.keys()
        network = self.session.network

        # Add all super nodes to evaluation graph
        for super_node_id in self.super_nodes.keys():
            self.evaluation_graph.AddNode(super_node_id)

        # Compute super node attributes and its super edges
        for super_node_id, super_node in self.super_nodes.items():
            inner_node_ids = super_node.inner_node_ids
            outer_node_ids = snap.TIntV()
            labels_to_inner_edge_ids = {}
            labels_to_outer_edge_ids = {}
            dst_super_node_id_label_to_total_edges = {}
            for label in labels:
                labels_to_inner_edge_ids[label] = snap.TIntV()
                labels_to_outer_edge_ids[label] = snap.TIntV()
            
            # This sub_graphs includes all the edges from inner nodes to 
            # other nodes in the original graph
            partial_sub_graph = network.ConvertSubGraph(
                snap.TNEANet, super_node.inner_node_ids)
            
            for EI in partial_sub_graph.Edges():
                src_node_id = EI.GetSrcNId()
                dst_node_id = EI.GetDstNId()
                edge_id = EI.GetId()
                attr_values = snap.TStrV()
                network.AttrValueEI(edge_id, attr_values)
                label = attr_values[0]

                src_super_node_id = network.GetIntAttrDatN(src_node_id,
                    GraphAttributeConstants.SUPER_NODE_ID)
                dst_super_node_id = network.GetIntAttrDatN(dst_node_id,
                    GraphAttributeConstants.SUPER_NODE_ID)
                if src_super_node_id == dst_super_node_id:
                    labels_to_inner_edge_ids[label].append(edge_id)
                else:
                    labels_to_outer_edge_ids[label].append(edge_id)
                    if src_super_node_id != super_node_id:
                        outer_node_ids.append(src_node_id)
                    else:
                        outer_node_ids.append(dst_node_id)
                        dst_super_node_id_label_to_total_edges.setdefault(
                            (dst_super_node_id, label), 0) + 1

            # Add all the stats on the super node in the form of attributes
            # (1) Compression
            # Number of nodes inside super node
            node_weight = inner_node_ids.Len()
            self.evaluation_graph.AddFltAttrDatN(super_node_id, node_weight, 
                NodeAttributeConstants.NODE_WEIGHT)

            # Number of edges inside super node
            edge_weight = sum(edge_ids.Len() 
                for edge_ids in labels_to_inner_edge_ids.values())
            self.evaluation_graph.AddFltAttrDatN(super_node_id, edge_weight, 
                NodeAttributeConstants.EDGE_WEIGHT)

            # (2) Inner-Connectivity
            # Store label frequency percentage and calculate reachability
            for label, edge_ids in labels_to_inner_edge_ids.items():
                try:
                    percentage = edge_ids.Len() / edge_weight
                except ZeroDivisionError:
                    percentage = 0
                self.evaluation_graph.AddFltAttrDatN(super_node_id, percentage, 
                    LabelAttributeConstants.PERCENTAGE+label)
                
                reach = 0
                if edge_ids.Len() > 0:
                    label_sub_graph_sn = network.GetESubGraph(edge_ids)
                    for NI in label_sub_graph_sn.Nodes():
                        node_id = NI.GetId()
                        bfs_tree = label_sub_graph_sn.GetBfsTree(
                            node_id, True, False)
                        reach = reach + bfs_tree.GetEdges()
                self.evaluation_graph.AddFltAttrDatN(super_node_id, reach, 
                    LabelAttributeConstants.REACH+label)
            
            # (3) Outer-Connectivity
            # computeConcatenationProperties what this function does still 
            # needs implementation

            # Add super_edges and their attributes
            # print(len(dst_super_node_id_label_to_total_edges.keys()))
            for (dst_super_node_id, label), edge_weight in dst_super_node_id_label_to_total_edges.items():
                # print("test2")
                # Add super edge to evaluation graph
                super_edge_id = self.evaluation_graph.AddEdge(super_node_id, dst_super_node_id)
                
                # Number of edges inside super edge
                super_edge_weight = inner_node_ids.Len()
                self.evaluation_graph.AddFltAttrDatE(super_edge_id, 
                    super_edge_weight, EdgeAttributeConstants.EDGE_WEIGHT)

In [38]:
print("Loading dataset...")
pg_paper = make_pg_paper_network()
print("Dataset loaded.")
print("--> Creating session using Property Graph paper graph example...")
pg_paper_session = Session(pg_paper)
print("--> Computing groupings...")
pg_paper_session.compute_groupings()

print("--> Preparing for evaluation...")
pg_paper_evaluation = Evaluation(pg_paper_session)
print("--> Evaluating...")
pg_paper_evaluation.evaluate()
print("--> Building evaluation graph...")
pg_paper_evaluation.build_evaluation_graph()
print("--> Evaluation completed.")
print("--> Total super nodes: %d" % pg_paper_evaluation.evaluation_graph.GetNodes())
print("--> Total super edges: %d" % pg_paper_evaluation.evaluation_graph.GetEdges())

Loading dataset...
Dataset loaded.
--> Creating session using Property Graph paper graph example...
--> Computing groupings...
--> Preparing for evaluation...
--> Evaluating...
Size of l0 grouping: 10
-- Size of subgrouping: 10
14
Size of l5 grouping: 13
-- Size of subgrouping: 3
2
-- Size of subgrouping: 2
1
-- Size of subgrouping: 2
1
-- Size of subgrouping: 2
1
-- Size of subgrouping: 2
1
-- Size of subgrouping: 2
1
Size of l3 grouping: 2
-- Size of subgrouping: 1
0
-- Size of subgrouping: 1
0
--> Building evaluation graph...
--> Evaluation completed.
--> Total super nodes: 9
--> Total super edges: 0


In [54]:
print(pg_paper_evaluation.evaluation_graph.GetNodes())
print(pg_paper_evaluation.evaluation_graph.GetEdges())

10


In [57]:
for EI in pg_paper_evaluation.session.network.Edges():
    print(EI.GetId(), EI.GetSrcNId(), EI.GetDstNId())

0 1 4
1 2 4
2 2 5
3 3 4
4 5 4
5 5 7
6 6 3
7 6 4
8 6 5
9 6 7
10 7 9
11 8 5
12 8 9
13 9 15
14 10 9
15 11 21
16 11 3
17 12 22
18 12 3
19 13 23
20 13 2
21 14 23
22 14 1
23 15 24
24 15 6
25 16 25
26 16 7
27 17 26
28 17 6
29 31 4
30 31 21
31 31 22
32 31 23
33 32 9
34 32 24
35 32 25
36 32 26


In [35]:
labels_edge_ids = {}
for EI in pg_paper.Edges():
    edge_id = EI.GetId()
    attr_values = snap.TStrV()
    pg_paper.AttrValueEI(edge_id, attr_values)
    label = attr_values[0]
    labels_edge_ids.setdefault(label, snap.TIntV()).append(edge_id)

sub_graph = pg_paper.GetESubGraph(labels_edge_ids['l0'])

# shortestPath, NIdToDistH = sub_graph.GetShortPathAll(2, IsDir=True)
# for item in NIdToDistH:
#     print(item, NIdToDistH[item])
# print(shortestPath)

reach = 0
for NI in sub_graph.Nodes():
    node_id = NI.GetId()
    bfs_tree = sub_graph.GetBfsTree(node_id, True, False)
    reach = reach + bfs_tree.GetEdges()
    print(node_id, reach)

1 1
4 1
2 5
5 8
3 9
7 10
6 13
9 13
8 14
10 15


In [None]:
EI = pg_paper.GetEI(13,23)
edge_id = EI.GetId()
attr_values = snap.TStrV()
pg_paper.AttrValueEI(edge_id, attr_values)
print(attr_values[0])

In [None]:
print(residence_hall.GetNodes())
residence_hall_groupings = compute_groupings(residence_hall)