## Edge Entropy

uncertain graph approach based on important nodes


In [7]:
import numpy as np
import networkx as nx

import itertools

## used to generate non-overlapping triangles
from random import shuffle, uniform
from itertools import combinations

In [2]:
## we use the DD199 graph for testing
import pandas as pd
DD199 = nx.read_edgelist("Data/DD199/DD199.edges", nodetype=int)

BAG = nx.barabasi_albert_graph(20,2)

In [3]:
def select_nodes(G:nx.Graph):
    '''
    Return a list of candidate nodes Vs
    selects 1/10 of total nodes by highest betweenness centrality, such that the deg centrality is >=4
    '''
    BC = nx.betweenness_centrality(G)                                           ## 1
    DC = dict(G.degree())       ## use degree rather than deg centrality, bc of how it is def in paper
    sortedV = [k for k, v in sorted(BC.items(), key=lambda item: item[1])]      ## 2 sort Vertices acc to BC
    endIdx = int(np.ceil(G.number_of_nodes()/10))
    Vs = sortedV[:endIdx]                             ## 3
    for vi in Vs:                                                               ## 3
        if DC[vi] < 4:                                                          ## 4
            ##continue
            Vs.remove(vi)                                                       ## 5
    return Vs

In [22]:
def getDisconnectedTriangles(G:nx.Graph):
    """Takes a graph and returns a set of edges corresponding to a set of disconnected triangles
    """
    allCliques = nx.enumerate_all_cliques(G)
    allTriangles = [x for x in allCliques if len(x) == 3]   ## list of lists with 3 els, els are in some order
    shuffle(allTriangles)

    disconnTriangles = []
    for t in allTriangles:  ## go through the list in a random order
        ## t is a list of length 3
        if [edge for edge in combinations(t,2) if edge in disconnTriangles]:
            continue
        else:
            disconnTriangles += list(combinations(t,2))

    return disconnTriangles

In [5]:
def generate_uncertainty_graph(G:nx.Graph, Vn:list):
    '''
    select node from important node set (Vn),
    generate a subgraph with adjacent nodes, by taking node as center
    add edges in accordance with triadic closure,
    select all triangles with no common edges from the subgraph
    when prob values of edges are assigned we get an uncertain subgraph
    merge all subgraphs gives a complete uncertain graph
    '''
    graphsList = []
    for vi in Vn:       ## 1
        ## SGvi is the subgraph generated by vi and all of its neighbors
        SGvi = G.subgraph([vi,*G.neighbors(vi)]).copy()    ## 2 ## .copy() makes it its own graph
        for (vs,vt) in itertools.combinations(G.neighbors(vi), 2):  ## 3        ## only through neighbors, since all nodes alr. att to vi
            SGvi.add_edge(vs, vt)                                   ## 5
        ## SGvi is the complete subgraph generated by vi and its neighbors
        ## get triangle set T -> get set T1 from T -> presumably triangles with no overlapping edges?
        T1 = getDisconnectedTriangles(SGvi) ## could theoretically do this from nodelist
        prob = len(T1) / SGvi.number_of_edges()
        probDict = {edge:prob for edge in SGvi.edges()}
        ## for i in T1
            ##  inject the prob for the three edges
            ## I am choosing to interpret the probability for each edge as num edges in triangle set / total edges in subgraph
            ## could also use for each edge the deg(i)*deg(j) / total num edges in graph
        nx.set_edge_attributes(SGvi, probDict, "prob")
        graphsList.append(SGvi)
    ##  merge SGvi to get the anonymised graph

    return nx.compose_all(graphsList)


In [34]:
def fast_gen_probDict(G:nx.Graph, Vn:list):
    '''select node from important node set Vn, get all triangles without common edges
    Return graph with edge property prob '''
    probDict = {}
    for vi in Vn:
        SGvi_nodes = list(G.neighbors(vi)) + [vi]
        ## SGvi is the complete subgraph
        SGvi_triangles = list(combinations(SGvi_nodes,3))
        shuffle(SGvi_triangles)
        T1 = []
        for t in SGvi_triangles:
            if [edge for edge in combinations(t,2) if edge in T1]:
                continue
            else:
                T1 += list(combinations(t,2))
        prob = len(T1)/ ((G.degree(vi)+1)*(G.degree(vi))/2)
        probDict.update({edge:prob for edge in combinations(SGvi_nodes,2)}) ## can use combo len 2, bc SGvi is complete
    return probDict
        

In [35]:
def sample_uncertain_graph(G:nx.Graph):
    
    Vn = select_nodes(G)
    ##uncertainGraph = generate_uncertainty_graph(G, Vn)
    ##edgeProbs = nx.get_edge_attributes(uncertainGraph, "prob")
    edgeProbs = fast_gen_probDict(G,Vn)
    Ganon = nx.Graph()
    for edge in edgeProbs:
        if uniform(0,1) <= edgeProbs[edge]:
            Ganon.add_edge(*edge)

    return Ganon

### Testing