In [9]:
import snap
import os.path
import itertools

import util

# Global Variables

In [10]:
__no_label__ = "NO_LABEL"

In [14]:
from enum import Enum

# Constants used in the paper:
# NODE_WEIGHT_EVALUATION("NUMBER_OF_NODES_INSIDE_OF_SN"),
# NODE_WEIGHT_FREQUENCY("NUMBER_OF_SN_INSIDE_OF_HN"),
# NODE_WEIGHT_EVALUATION_AVG("AVG_WEIGHT_ON_SN"),
# PERCENTAGE("PERCENTAGE"),
# NUMBER_OF_INNER_EDGES("NUMBER_OF_INNER_EDGES"),
# LABEL("LABEL"),
# EDGE_RATIO("EDGE_RATIO"),
# GROUPING("GROUPING"),
# REACHABILITY_COUNT("REACH_NUMBER_OF_INNER_PATHS"),
# PATH_OUT("REACH_PATH_OUT_BY_LABEL"),
# PATH_IN("REACH_PATH_IN_BY_LABEL"),
# EDGE_WEIGHT("EDGE_WEIGHT"),
# PARTICIPATION_LABEL("PARTICIPATION_LABEL"),
# TRAVERSAL_FRONTIERS("TRAVERSAL_FRONTIERS");

class AttributeConstants(Enum):
    # Compression
    NODE_WEIGHT = "NUMBER_OF_INNER_NODES"
    EDGE_WEIGHT = "NUMBER_OF_INNER_EDGES"

    NODE_WEIGHT_AVG = "AVG_WEIGHT_ON_SN"
    
    NODE_WEIGHT_FREQUENCY = "NUMBER_OF_SN_INSIDE_OF_HN"
    
    PERCENTAGE = "PERCENTAGE"
    LABEL = "LABEL"
    GROUPING = "GROUPING"

    EDGE_RATIO = "EDGE_RATIO"
    
    REACHABILITY_COUNT = "REACH_NUMBER_OF_INNER_PATHS"
    PATH_OUT = "REACH_PATH_OUT_BY_LABEL"
    PATH_IN = "REACH_PATH_IN_BY_LABEL"
    PARTICIPATION_LABEL = "PARTICIPATION_LABEL"
    TRAVERSAL_FRONTIERS = "TRAVERSAL_FRONTIERS"

# Each of the following attributes will exist for each l-label, i.e., we would
# have PERCENTAGE_KNOWS, PERCENTAGE_LIKES, for a graph with l-labels = {KNOWS,
# LIKES}.
class LabelAttributeConstants(Enum):
    # Inner-Connectivity
    PERCENTAGE = "LABEL_PERCENTAGE_" # percent of l-labeled inner edges
    REACH = "LABEL_REACH_" # number of pairs connected with an l-labeled inner edge

# Load datasets

In [15]:
# Generic funtion to read edge file into a network
# TODO: 
# allow any graph type not only snap.TNEANet
# check if binary exists, if so, load that (rename function to make graph/net)
def edge_file_to_network(filename, edge_attr, src_node_attr, dst_node_attr, tab_separated=False, dump=False):
    context = snap.TTableContext()

    schema = snap.Schema()
    schema.Add(snap.TStrTAttrPr("srcID", snap.atInt))
    schema.Add(snap.TStrTAttrPr("dstID", snap.atInt))
    
    edgeattrv = snap.TStrV()
    for attr, val in edge_attr.items():
        edgeattrv.Add(attr)
        schema.Add(snap.TStrTAttrPr(attr, val))
    
    srcnodeattrv = snap.TStrV()
    for attr, val in src_node_attr.items():
        srcnodeattrv.Add(attr)
        schema.Add(snap.TStrTAttrPr(attr, val))

    dstnodeattrv = snap.TStrV()
    for attr, val in dst_node_attr.items():
        dstnodeattrv.Add(attr)
        schema.Add(snap.TStrTAttrPr(attr, val))

    if tab_separated:
        separator = '\t'
    else:
        separator = ' '

    table = snap.TTable.LoadSS(schema, filename, context, separator, snap.TBool(False))

    # net will be an object of type snap.TNEANet
    net = table.ToNetwork(snap.TNEANet, "srcID", "dstID", srcnodeattrv, dstnodeattrv, edgeattrv, snap.aaFirst)

    if dump:
        net.Dump()

    # Save to binary
    outfile = filename + ".bin"
    FOut = snap.TFOut(outfile)
    table.Save(FOut)
    FOut.Flush()

    return net

def make_residence_hall_network():
    filename = "data/moreno_oz/out.moreno_oz_oz"

    edge_attr = {"relationship" : snap.atInt}
    src_node_attr = {}
    dst_node_attr = {}

    return edge_file_to_network(filename, edge_attr, src_node_attr, dst_node_attr)

def make_pg_paper_network():
    filename = "data/example/pg_paper.txt"

    edge_attr = {"edgelabel" : snap.atStr}
    src_node_attr = {}
    dst_node_attr = {}

    return edge_file_to_network(filename, edge_attr, src_node_attr, dst_node_attr)

In [16]:
residence_hall = make_residence_hall_network()
pg_paper = make_pg_paper_network()

# Grouping

In [17]:
class Session:
    def __init__(self, network):
        self.network = network
        self.labels_freq = {}
        for EI in network.Edges():
            edge_id = EI.GetId()
            attr_values = snap.TStrV()
            network.AttrValueEI(edge_id, attr_values)
            label = attr_values[0]
            self.labels_freq.setdefault(label, 0) + 1
        self.groupings = {}

    # The 1st attribute of an edge is the edge label
    def __get_edge_ids_per_label(self):
        labels = {}
        for EI in self.network.Edges():
            edge_id = EI.GetId()
            attr_values = snap.TStrV()
            self.network.AttrValueEI(edge_id, attr_values)
            label = attr_values[0]
            labels.setdefault(label, snap.TIntV()).append(edge_id)
        return labels

    def compute_groupings(self):
        labels_edge_ids = self.get_edge_ids_per_label()
        node_ids_in_groupings = snap.TIntV()
        for label, edge_ids in sorted(labels_edge_ids.items(), 
                                        key=lambda item: len(item[1]), 
                                        reverse=True):
            sub_graph = self.network.ConvertESubGraph(snap.TNEANet, edge_ids)
            # We cannot use DelNodes; an exception is thrown if we try to remove a 
            # node that is not there. There is not way to continue after the 
            # exception is thrown, hence not all nodes get removed.
            for node_id in node_ids_in_groupings:
                try:
                    sub_graph.DelNode(node_id)
                except Exception:
                    pass
            for NI in sub_graph.Nodes():
                node_ids_in_groupings.append(NI.GetId())
            if not sub_graph.Empty():
                self.groupings[label] = sub_graph

        # We must place nodes with degree zero in their own grouping
        in_deg_v = self.network.GetNodeInDegV()
        out_deg_v = self.network.GetNodeOutDegV()
        zero_deg_nodes = snap.TIntV()
        for node_id_in_deg in in_deg_v:
            if node_id_in_deg.GetVal2() == 0:
                zero_deg_nodes.Add(node_id_in_deg.GetVal1())
        for node_id_in_deg in out_deg_v:
            if node_id_in_deg.GetVal2() != 0:
                zero_deg_nodes.DelIfIn(node_id_in_deg.GetVal1())
        if not zero_deg_nodes.Empty():
            sub_graph = self.network.ConvertSubGraph(snap.TNEANet, zero_deg_nodes)
            self.groupings[__no_label__] = sub_graph

# Evaluation

In [18]:
class SuperNode:
    def __init__(self, grouping_label, sub_graph):
        self.grouping_label = grouping_label
        self.sub_graph = sub_graph
    
    def compute_stats(self, network):
        total_edges = self.sub_graph.GetEdges()
        l_reach = {}
        # maybe repurpose get_edge_ids_per_label
        for EI in self.sub_graph.Edges():
            src_node_id = EI.GetSrcNId()
            dst_node_id = EI.GetDstNId()
            edge_id = network.GetEI(src_node_id, dst_node_id).GetId()
            attr_values = snap.TStrV()
            network.AttrValueEI(edge_id, attr_values)
            label = attr_values[0]
            l_reach[label] = l_reach.get(label, 0) + 1
            

class Evaluation:
    def __init__(self, session):
        self.session = session
        self.super_edge_id_counter = itertools.count()
        self.super_node_id_counter = itertools.count()
        self.evaluation_graph = snap.TNEANet.New()
        for attr in AttributeConstants:
            self.evaluation_graph.AddFltAttrN(attr)
        for attr in LabelAttributeConstants:
            for label in self.session.labels_freq.keys():
                self.evaluation_graph.AddFltAttrN(attr+label)
        self.super_nodes = {}


    def add_super_node(self, super_node):
        super_node_id = next(self.super_edge_id_counter)
        super_node = SuperNode(super_node_id, super_node)


    def __build_super_nodes(self):
        network = self.session.network
        labels = self.session.labels_freq.keys()
        labels_edge_ids_sn = {}
        for label in labels:
            labels_edge_ids_sn[label] = snap.TIntV()

        for super_node_id, super_node in self.super_nodes.items():
            # Add super node to evaluation graph
            self.evaluation_graph.AddNode(super_node_id)

            # Add all the stats on the super node in the form of attributes
            # (1) Compression
            # Number of nodes inside super node
            node_weight = super_node.sub_graph.GetNodes()
            self.evaluation_graph.AddFltAttrDatN(super_node_id, node_weight, 
                AttributeConstants.NODE_WEIGHT)

            # Number of edges inside super node
            edge_weight = super_node.sub_graph.GetEdges()
            self.evaluation_graph.AddFltAttrDatN(super_node_id, edge_weight, 
                AttributeConstants.EDGE_WEIGHT)

            # (2) Inner-Connectivity
            # Get all edge ids per label inside super node
            # REFACTOR INTO UTIL FUNCTION
            for EI in self.sub_graph.Edges():
                src_node_id = EI.GetSrcNId()
                dst_node_id = EI.GetDstNId()
                edge_id = network.GetEI(src_node_id, dst_node_id).GetId()
                attr_values = snap.TStrV()
                network.AttrValueEI(edge_id, attr_values)
                label = attr_values[0]
                labels_edge_ids_sn[label].append(edge_id)

            # Store label frequency percentage and calculate reachability
            for label, edge_ids in labels_edge_ids_sn.items():
                percentage = edge_ids.Len() / edge_weight
                self.evaluation_graph.AddFltAttrDatN(super_node_id, percentage, 
                    LabelAttributeConstants.PERCENTAGE+label)
                
                reach = 0
                if edge_ids.Len() > 0:
                    label_sub_graph_sn = super_node.GetESubGraph(edge_ids)
                    for NI in label_sub_graph_sn.Nodes():
                        node_id = NI.GetId()
                        bfs_tree = label_sub_graph_sn.GetBfsTree(
                            node_id, True, False)
                        reach = reach + bfs_tree.GetEdges()
                self.evaluation_graph.AddFltAttrDatN(super_node_id, reach, 
                    LabelAttributeConstants.REACH+label)

    def __build_super_edges(self):
        # We need to calculate the (3) Outer-Connectivity stats
        return

    def build(self):
        # First we must add all the super nodes and their respective stats
        self.__build_super_nodes()

        # Now that we can compute the super edges
        self.__build_super_edges()


    def evaluation(network, groupings):
        evaluation = Evaluation(network)
        for label, sub_graph in groupings.items():
            print("Size of %s grouping: %d" % (label, sub_graph.GetNodes()))
            components = sub_graph.GetWccs()
            for component in components:
                print("-- Size of subgrouping: %d" % component.Len())
                # Need to turn the component into a list because ConvertSubGraph 
                # only takes TIntV and component is of type TCnCom. According to 
                # the docs it should be of type TIntV (it essentially is), but I 
                # don't know how to get python to see that.
                component_as_list = list(component)
                wcc_sub_graph = network.ConvertSubGraph(snap.TNEANet, component_as_list)
                super_node = SuperNode(label, wcc_sub_graph)
                evaluation.add_super_node(super_node)

                
        # for EI in super_nodes[1].sub_graph.Edges():
        #     src_node_id = EI.GetSrcNId()
        #     dst_node_id = EI.GetDstNId()
        #     edge_id = network.GetEI(src_node_id, dst_node_id).GetId()
        #     attr_values = snap.TStrV()
        #     network.AttrValueEI(edge_id, attr_values)
        #     print(attr_values[0])
        return evaluation

IndentationError: expected an indented block (<ipython-input-18-a1c98a9d6560>, line 84)

In [35]:
labels_edge_ids = {}
for EI in pg_paper.Edges():
    edge_id = EI.GetId()
    attr_values = snap.TStrV()
    pg_paper.AttrValueEI(edge_id, attr_values)
    label = attr_values[0]
    labels_edge_ids.setdefault(label, snap.TIntV()).append(edge_id)

sub_graph = pg_paper.GetESubGraph(labels_edge_ids['l0'])

# shortestPath, NIdToDistH = sub_graph.GetShortPathAll(2, IsDir=True)
# for item in NIdToDistH:
#     print(item, NIdToDistH[item])
# print(shortestPath)

reach = 0
for NI in sub_graph.Nodes():
    node_id = NI.GetId()
    bfs_tree = sub_graph.GetBfsTree(node_id, True, False)
    reach = reach + bfs_tree.GetEdges()
    print(node_id, reach)

1 1
4 1
2 5
5 8
3 9
7 10
6 13
9 13
8 14
10 15


In [19]:
print(pg_paper.GetNodes())
pg_paper_groupings = compute_groupings(pg_paper)
evaluation = evaluation(pg_paper, pg_paper_groupings)

25


NameError: name 'compute_groupings' is not defined

In [None]:
EI = pg_paper.GetEI(13,23)
edge_id = EI.GetId()
attr_values = snap.TStrV()
pg_paper.AttrValueEI(edge_id, attr_values)
print(attr_values[0])

In [None]:
print(residence_hall.GetNodes())
residence_hall_groupings = compute_groupings(residence_hall)