In [None]:
%load_ext autoreload
%autoreload 2
%pylab inline

In [None]:
import matplotlib.pyplot as plt # for plotting
import seaborn           as sb  # for plotting
import pandas            as pd  # for plotting adjacency matrices
import networkx          as nx  # for plotting and pagerank
import collections

colours = ["windows blue", "amber", "pale red", "greyish", "faded green", "dusty purple", "orange", "turquoise", "magenta"]

sb.set()
sb.set_style("white")
sb.set_palette(sb.xkcd_palette(colours))

In [None]:
# import our simulator
from network_simulation import *

In [None]:
# generate a network
G, attacks, hubs, attacker = generate_graph( num_nodes         =    40
                                           , num_hubs          =     3
                                           , ticks             = 10000
                                           , seed              =     5
                                           , attacker_at_hub   = False
                                           , attacker_activity =     0.002 # should we include attacks when we learn?
                                           , hub_fixation      =     0
                                           )

We split attacks in half and attack one half in the training data, the other one we will try to detect

In [None]:
# add half of the attacks to the graph
for (source, target) in attacks[:len(attacks)//2]:
    w = 1
    if (source, target) in G.edges:
        w += G.edges[(source, target)]["weight"]
    #G.add_edge(source, target, weight = w)

# keep the other half for detection later
kept_attacks = attacks[len(attacks)//2:]

In [None]:
# out of curiosity, plot the adjacency matrix
plt.matshow(nx.adjacency_matrix(G).todense())

In [None]:
print(f"Kept {len(kept_attacks)} attacks")
kept_attacks

In [None]:
print("hubs    ", list(hubs))
print("attacker", attacker)

In [None]:
# plot the network and mark regular nodes, hubs, and the attacker
colours = { "regular"  : sb.color_palette()[0] # windows blue
          , "hub"      : sb.color_palette()[1] # amber
          , "attacker" : sb.color_palette()[2] # pale red
          }

pos          = { node : (G.nodes[node]["x"], G.nodes[node]["y"]) for node in G.nodes }
node_colours = []
for node in G.nodes:
    if node in hubs:
        node_colours.append(colours["hub"])
    elif node == attacker:
        node_colours.append(colours["attacker"])
    else:
        node_colours.append(colours["regular"])
edges        = G.edges
weights      = [1+log(G[u][v]["weight"]) for u,v in edges]

plt.figure(figsize=(6,6))
nx.draw_networkx(G, pos = pos, node_color = node_colours, width = weights)

In [None]:
# plot the degrees per node
xys = dict(G.degree)
plt.scatter(xys.keys(), xys.values(), marker = "x")
plt.hlines(average(list(xys.values())), xmin=0, xmax=len(xys))
plt.xlim(0, len(xys))

In [None]:
# find the flow distribution with standard PageRank
pagerank = nx.pagerank(G, alpha = 0.85)

In [None]:
# calculate the transition probabilities
transition_probabilities = dict()

for s in G.nodes:
    transition_probabilities[s] = collections.defaultdict(float)
    w_total = 0
    for t in G.neighbors(s):
        w_total += G.edges[(s,t)]["weight"]
    for t in G.neighbors(s):
        transition_probabilities[s][t] = G.edges[(s,t)]["weight"] / w_total

In [None]:
def do_random_walk(transition_probabilities, source, target):
    steps = 0
    while source != target:
        neigbours = list(transition_probabilities[source].items())
        source = np.random.choice( a    = [n for (n,_) in neigbours]
                                 , size = 1
                                 , p    = [p for (_,p) in neigbours]
                                 )[0]
        steps += 1
    return steps

Calculate all median number of steps between pairs of nodes

In [None]:
median_steps_connected    = dict()
median_steps_disconnected = dict()

for i, source in enumerate(G.nodes, start = 1):
    print(f"{i % 10}", end = "")
    for target in G.nodes:
        if source != target:
            steps = np.median([do_random_walk(transition_probabilities, source, target) for _ in range(15)])
            if (source, target) in G.edges:
                median_steps_connected[(source, target)]    = steps
            else:
                median_steps_disconnected[(source, target)] = steps

Let's see if we can detect attacks

In [None]:
l = list(median_steps_connected.values())
threshold = sorted(l)[:round(0.95*len(l))][-1]
print(threshold)

l2 = list(median_steps_disconnected.values())
print(sorted(l2)[:round(0.95*len(l2))][-1])

In [None]:
median_steps_attack = []
detected = 0

for (target, attacker) in kept_attacks:
    p = transition_probabilities[target][attacker]
    print(f"Attack from from {attacker} on {target}, transition probability is {p}")
    
    if (target, attacker) in G.edges:
        steps = median_steps_connected[(target, attacker)]
    else:
        steps = median_steps_disconnected[(target, attacker)]
    median_steps_attack.append(steps)
    print(f"Median number of steps is {steps}.", end = "")
    if steps > threshold:
        print(f" THIS IS AN ATTACK!")
        detected += 1
    else:
        print()
    print()

print(f"Detected {detected}/{len(kept_attacks)} attacks.")

In [None]:
plt.figure(figsize=(16,4))
sb.distplot(list(median_steps_connected.values()))
sb.distplot(list(median_steps_disconnected.values()))
sb.distplot(list(median_steps_attack))
plt.xlim(0,150)