## ANNM
    - add noise nodes

NOTE: compare to pseudoNode anon to see if it is the same
maybe do node addition similarly
need to track max degree of noise nodes somehow

In [2]:
import numpy as np
import networkx as nx
import random

from functools import partial
from itertools import combinations, groupby

## we use the DD199 graph for testing
import pandas as pd
DD199 = nx.read_edgelist("Data/DD199/DD199.edges", nodetype=int)

BAG = nx.barabasi_albert_graph(20,2)

In [5]:
def degreeGroupsCreation(G:nx.Graph):
    '''Returns degSeq = [(node,deg)] sorted highest deg first, and degSeqGrouped = [[(node,degMax)],...,[(node,degMin)]]'''
    d = dict(G.degree())
    degSeq = sorted(d.items(), key = lambda item : item[1], reverse=True)  ## list of (node, degree)
    degSeqGroupsed = [[pair for pair in value] for key,value in groupby(degSeq, lambda x: x[1])]    ## lists of (node,deg) each list has only 1 deg
    return degSeq, degSeqGroupsed

In [18]:
def degreePrioritization(G:nx.Graph, degSeqGrouped, k):
    '''Returns insecureGroupPriorities = {groupID: priority} where groupID is the index of the group in degSeqGrouped'''
    insecureGroupPriorities = {}
    for i in range(len(degSeqGrouped)):
        s = len(degSeqGrouped[i])
        if s <= k-1:
            insecureGroupPriorities[i] = int((k-s < s/2))    ## evaluates to 1 if true, 0 if D >= S/2
    return insecureGroupPriorities

In [None]:
def targetGroupsCreation(G:nx.Graph, P, degSeqGrouped, k):
    '''P is the priority of insecure groups'''
    ## groups with priority 1 first, using members of groups with priority 0, we complete groups priority 1
    
    ## iterate through them backwards, so that they are sorted lowest deg to highest
    groupsPriority1 = [key for key,value in P.items() if value == 1][::-1]
    groupsPriority0 = [key for key,value in P.items() if value == 0][::-1]
    gPO = groupsPriority1 + groupsPriority0 ## list reordering nodes

    Groups   = {i:degSeqGrouped[gPO[i]] for i in range(len(gPO))}   ## newID : [(node, deg)]
    G_p = {i:P[gPO[i]] for i in range(len(gPO))}               ## newID : priority
    Groups_prime = {}

    order = len(gPO)
    existGroups0 = len(groupsPriority0)
    for i in range(order):
        if G_p[i] == 1 and existGroups0:
            Groups_prime[i] = Groups[i]     ## 19
            Groups[i] = []          ## empty it so that we can keep easy track of non-added elements later
            for j in range(i+1,order):      ## starting at i+1 guarantees j>i
                if G_p[j] == 0:
                    while len(Groups_prime[i]) <=k and Groups[j] != []:   ## moves on to next j if the first one is empty
                        Groups_prime[i].append(Groups[j][0])
                        if len(Groups[j]) == 1:
                            existGroups0 -= 1
                        Groups[j] = Groups[j][1:]
        elif G_p[i] == 1 and not existGroups0:
            Groups_prime[i] = Groups[i]
            Groups[i] = []
            for j in range(i+1, order): ## for j to order, if (j>i)
                while len(Groups_prime[i]) <= k and Groups[j] != []:
                    Groups_prime[i].append(Groups[j][0])
                    Groups[j] = Groups[j][1:]
        else: ## if P[gPO[i]] == 0:
            Groups_prime[i] = Groups[i]
            for j in range(i+1, len(gPO)):
                while len(Groups_prime[i]) <= k and Groups[j] != []:
                    Groups_prime[i].append(Groups[j][0])
                    Groups[j] = Groups[j][1:]
                        
    leftoverNodes = [pair for group in Groups.values() for pair in group]
    if len(leftoverNodes)>k-1:
        Groups_prime[order] = leftoverNodes
    else:
        ## add to maximal group with priority 1
        lastIndexp1 = max([i for i in range(len(gPO)) if P[gPO[i]]==1])
        Groups_prime[lastIndexp1] += leftoverNodes

    return Groups_prime, gPO

In [66]:
def addingNoiseAnon(G:nx.Graph, group_prime):
    bc = dict(nx.betweenness_centrality(G))
    bcset = sorted(bc.items(), key = lambda item : item[1])  ## list of (node, bc) lowest to highest
    bco = {bcset[i][0]:i for i in range(len(bcset))}      ## vertex pos in bcset, so node: pos
    gp_working = {groupID:nodes for (groupID,nodes) in group_prime.items() if nodes != []}
    group_prime_bcSorted = {k: sorted(v, key = lambda item: bc[item[0]]) for (k,v) in gp_working.items()}
    ## should be a dict of same form as group_prime, except sorted by bc groupID : [(node,deg)]

    noisetag = True
    noisecounter = 0
    Ganon = G.copy()

    newNodes = {}

    for groupID in group_prime_bcSorted:    ## for each group:
        ## consider node with lowest betweenness centrality -> iterate through my sorted list
        group = group_prime_bcSorted[groupID]
        targetDeg = max([deg for (node,deg) in group])
        if targetDeg not in newNodes:
            newNodes[targetDeg] = []
        for (node,deg) in group:
            if noisetag:
                for i in range(targetDeg - deg):
                    newNodeID = Ganon.number_of_nodes()
                    newNodes[targetDeg].append(newNodeID)
                    Ganon.add_node(newNodeID)
                    Ganon.add_edge(newNodeID, node) ## connect to the node
                    noisecounter += 1
                    noisetag = False
            else:
                for new_node in sorted(newNodes[targetDeg], key = lambda x: Ganon.degree(x)):
                    ## add to nodes with lowest deg first
                    if Ganon.degree(node) >= targetDeg:     ## do not add edge if unneccessary
                        continue
                    if Ganon.degree(new_node) < targetDeg:
                        ## maximum degree of noise nodes is target degree   -> make sure there is not a massive degree difference
                        Ganon.add_edge(new_node, node)
                for i in range(targetDeg - Ganon.degree(node)):     ## if existing nodes are insufficient, add new ones
                    ## if that is not enough noise nodes add more from node with next lowest betweenness centrality
                    newNodeID = Ganon.number_of_nodes()
                    newNodes[targetDeg].append(newNodeID)
                    Ganon.add_node(newNodeID)
                    Ganon.add_edge(newNodeID, node)
                    noisetag = True

    ## try to connect such that all nodes are k-anon
    ## if not possible, move to node with next lowest betweenness centrality and continue
    ## kanon noise nodes

    unsecNoiseNodes = {}
    for deg in newNodes:
        for node in newNodes[deg]:
            if Ganon.degree(node) not in newNodes:
                unsecNoiseNodes[node] = deg - Ganon.degree(node)    ## node: deg deficiency

    unsecNodesSorted = sorted(unsecNoiseNodes.items(),key = lambda item: item [1], reverse=True)        ## sorted by deg def

    for i in range(len(unsecNodesSorted)):
        nodei = unsecNodesSorted[i][0]
        for (nodej,ddefj) in unsecNodesSorted[i:]:
            if unsecNoiseNodes[nodei] >0 and unsecNoiseNodes[nodej] >0:
                Ganon.add_edge(nodej, nodei)
                unsecNoiseNodes[nodej] -= 1
                unsecNoiseNodes[nodei] -= 1
        unsecNodesSorted = sorted(unsecNoiseNodes.items(),key = lambda item: item [1], reverse=True)    ## resort -> still checks 0 entries
    
    ## does not actually guarantee k-anon for noise nodes
    return Ganon

In [62]:
def ANNManon(G:nx.Graph, k = 10):
    if k == None:
        k = int(G.number_of_nodes()/20) ## idk find a consistent level for this later
    
    degSeq, degSeqGrouped = degreeGroupsCreation(G)
    insecGroupP = degreePrioritization(G, degSeqGrouped, k)
    group_prime, gPO = targetGroupsCreation(G,insecGroupP,degSeqGrouped, k)
    Ganon = addingNoiseAnon(G, group_prime)
    return Ganon