# Sandbox
For testing and developing new Cyber Security Assessment tools in an interactive and persistent development environment.

In [None]:
import itertools
import math
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import scipy.stats.distributions as distr
import seaborn as sns

from cyber import Defence, CommmonDefences, CyberComponent
from tree import TreeNode, Link
from comm_network import Aggregator, Device, CommNetwork
from comm_network import LevelOfRedundancy
from attackers import RandomAttacker

## Procedural Generation
### Abstract Tree
Consists of Devices and Aggregators. 
* Aggregators (internal nodes) require a **Hard** amount of effort to compromise and have a 50% chance of being compromised if the necessary effort is spent
* Devices (leaf nodes) require an **Easy** amount of effort to compromise and also have a 50% chance of being compromised if the necesssary effort is spent
* Control Center (root node) is **Very Hard** to compromise

Controllable parameters include:
* Number of devices (leaf nodes)
* Number of Entrypoints (points where cyberattacks can originate)
* Level of Redundancy (number of children per parent node)
* Random deviation in Redundancy
* Sibling to Sibling communication (lateral edges between nodes on the same level)

In [None]:
seed = np.random.randint(low=0, high=52600)
seed = 27194
print(f"Seed: {seed}")
np.random.seed(seed)
pcn = CommNetwork(n_devices=15, n_entrypoints=1, redundancy=5, redundancy_deviation=1, enable_sibling_to_sibling_comm=True)
root = pcn.root
print(CommNetwork.show_tree(root))
tree = pcn.graph
print(f"Number of Components: {pcn.n_components}")

## Monte Carlo
Build an approximate profile of the network's cyber security by launching many cyber attacks. The higher N_ATTACKS the more precise the resulting distribution is, however this comes at the cost of increased computation time.
The more nodes are compromised, the more successful the attack.

### Active Graph Only
Only perform Monte Carlo simulation on the currently active network.

In [None]:
N_ATTACKS = 1000
BUDGET = 5200
compromised_array = np.zeros(shape=N_ATTACKS, dtype=np.int16)
effort_array = np.zeros(shape=N_ATTACKS, dtype=np.float32)
for attack_no in range(N_ATTACKS):
    attacker = RandomAttacker(budget=BUDGET, verbose=False)
    nodes_compromised, total_effort_spent = attacker.attack_network(pcn)
    # print(f"Nodes Compromised: {[n.id for n in nodes_compromised]} ({len(nodes_compromised)})")
    compromised_array[attack_no] = len([n for n in nodes_compromised if isinstance(n, Device)])
    effort_array[attack_no] = total_effort_spent
    pcn.reset()

In [None]:
# Histogram
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(8,6))
fig.suptitle(f"Budget: {BUDGET}, No. of Components: {pcn.n_components}, No. of Entrypoints: {pcn.n_entrypoints}")
sns.histplot(compromised_array, discrete=True, stat="probability", ax=axes[0])
axes[0].set(xticks=np.arange(0, len(pcn.graph.nodes())), xlabel="No. of Devices Compromised")
sns.histplot(effort_array, binwidth=1, ax=axes[1])
axes[1].set(xlabel="Effort Spent")
plt.tight_layout()
plt.show()

### Varied Parameter
Perform monte carlo simulation while varying particular parameter, such as the level of redundancy in the network. 

In [None]:
N_ATTACKS = 1000
N_DEVICES = 30
BUDGET = 52
SEED = np.random.randint(low=0, high=52600)
N_ENTRYPOINTS = 1 # Total budget is multiplied by this!
MIN_REDUNDANCY = 2
MAX_REDUNDANCY = N_DEVICES
REDUNDANCY_STEP = 2
REDUNDANCY_DEVIATION = 1
redundancies = np.arange(MIN_REDUNDANCY, MAX_REDUNDANCY, REDUNDANCY_STEP)

print(f"Seed: {SEED}")
np.random.seed(SEED)

compromised_array = np.zeros(shape=(N_ATTACKS, len(redundancies)), dtype=np.int16)
effort_array = np.zeros(shape=(N_ATTACKS, len(redundancies)), dtype=np.float32)
for i, redundancy in enumerate(redundancies):
    print("Redundancy:", redundancy)
    pcn = CommNetwork(n_devices=N_DEVICES,
                      n_entrypoints=N_ENTRYPOINTS,
                      redundancy=redundancy, # TODO: Rename to opposite of redundancy (higher value is less redundant!)
                      redundancy_deviation=REDUNDANCY_DEVIATION,
                      enable_sibling_to_sibling_comm=True)
    # budget = budget_per_device*no_of_devices
    for attack_no in range(N_ATTACKS):
        attacker = RandomAttacker(budget=BUDGET, verbose=False)
        nodes_compromised, total_effort_spent = attacker.attack_network(pcn)
        compromised_array[attack_no, i] = len(nodes_compromised)
        effort_array[attack_no, i] = total_effort_spent
        pcn.reset()
print({k:v for k,v in zip(*np.unique(compromised_array, return_counts=True))})

In [None]:
import pandas as pd
df = pd.DataFrame(compromised_array, columns=redundancies)
df = df.melt(var_name='Redundancy')

display(df)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8,6))
sns.histplot(df, x="value", hue="Redundancy", discrete=True, ax=ax)
sns.move_legend(ax, "upper right", ncols=4, title="Redundancy")
ax.set(xlabel="No. of Devices Compromised", ylabel="Count")


## Visualization
Plot the structure of the communication network. 

In [None]:
def hierarchy_pos(G:nx.DiGraph, root:TreeNode, width:float=1., vert_gap:float=0.2, vert_loc:float=0, xcenter:float=0.5):

    '''
    Credit: Joel (https://stackoverflow.com/a/29597209/2966723) 
    Licensed under CC Attribution-Share Alike 
    
    
    If the graph is a tree this will return the positions to plot this in a 
    hierarchical layout.
    
    G (networkx.DiGraph): Graph (must be a tree)
    root (Node): Root node of current graph
    width (float): Horizontal space allocated for this branch - avoids overlap with other branches. Defaults to 1.0
    vert_gap (float): Gap between levels of hierarchy. Defaults to 0.2
    vert_loc (float): Vertical location of root. Defaults to 0.0
    xcenter (float): Horizontal location of root. Defaults to 0.5
    '''
    # if not nx.is_tree(G):
    #     raise TypeError('cannot use hierarchy_pos on a graph that is not a tree')

    if root is None:
        if isinstance(G, nx.DiGraph):
            root = next(iter(nx.topological_sort(G)))  #allows back compatibility with nx version 1.11
        else:
            root = np.random.choice(list(G.nodes))

    def _hierarchy_pos(G, root, width=1., vert_gap = 0.2, vert_loc = 0, xcenter = 0.5, pos = None, parent = None):
        '''
        see hierarchy_pos docstring for most arguments

        pos: a dict saying where all nodes go if they have been assigned
        parent: parent of this branch. - only affects it if non-directed

        '''
    
        if pos is None:
            pos = {root:(xcenter,vert_loc)}
        else:
            pos[root] = (xcenter, vert_loc)
        children = root.children # list(G.neighbors(root))
        if not isinstance(G, nx.DiGraph) and parent is not None:
            for child in children:
                child.remove_parents(parent)  
        if len(children) !=0:
            dx = width/len(children) 
            nextx = xcenter - width/2 - dx/2
            for child in sorted(children, key=lambda child:child.id):
                nextx += dx
                pos = _hierarchy_pos(G,child, width = dx, vert_gap = vert_gap, 
                                    vert_loc = vert_loc-vert_gap, xcenter=nextx,
                                    pos=pos, parent = root)
        return pos

            
    return _hierarchy_pos(G, root, width, vert_gap, vert_loc, xcenter)

node_color_mask = np.full(tree.number_of_nodes(), fill_value="#1f78b4", dtype=object)
node_edge_color_mask = np.full(tree.number_of_nodes(), fill_value="#000000", dtype=object)
edge_color_mask = np.full(tree.number_of_edges(), fill_value="#000000", dtype=object)
node_shape_mask = np.full(tree.number_of_nodes(), fill_value="s", dtype=object)
root_idx = None
for i, node in enumerate(tree.nodes()):
    if node.is_leaf:
        # Dark Green if the Leaf Node (Device) is an entry point
        node_color_mask[i] = "green" if node.is_accessible else "lightgreen"
    else:
        # Dark Blue if the Internal Node (Aggregator) is an entry point
        node_color_mask[i] = "#1f78b4" if node.is_accessible else "#1f98ff"
    # If the Node has no parent, it is the root of the Tree (the control center)
    if len(node.parents) == 0:
        root_idx = i
        node_color_mask[i] = "coral"
    if node.is_compromised:
        # Compromised/hacked nodes have a red outline around them
        node_edge_color_mask[i] = "#ff0000"

for j, (start_node, end_node) in enumerate(tree.edges()):
    # Edges / Communication Channels between 2 compromised nodes are compromised
    if start_node.is_compromised and end_node.is_compromised:
        edge_color_mask[j] = "#ff0000"

# >> Plotting <<
fig, axes = plt.subplots(nrows=1, ncols=2,  figsize=(24,6), width_ratios=[0.6, 0.4])
label_map = {node:node.id for node in tree.nodes()}

# Hierarchical / Tree Visualization of Communication Network
tree_pos = hierarchy_pos(nx.to_undirected(tree), root)
nx.draw_networkx_nodes(tree, pos=tree_pos, ax=axes[0],
                       node_size=800, node_shape="s", node_color=node_color_mask,
                       linewidths=1.0, edgecolors=node_edge_color_mask)
nx.draw_networkx_labels(tree, pos=tree_pos, labels=label_map, ax=axes[0], font_size=10)
nx.draw_networkx_edges(tree, pos=tree_pos, ax=axes[0], edge_color=edge_color_mask)

# Spring Visualization of Communication Network
spring_pos = nx.layout.spring_layout(tree)
nx.draw_networkx_nodes(tree, pos=spring_pos, ax=axes[1],
                       node_size=400, node_shape="s", node_color=node_color_mask, 
                       linewidths=1.0, edgecolors=node_edge_color_mask, )
nx.draw_networkx_labels(tree, pos=spring_pos, labels=label_map, ax=axes[1], font_size=10)
nx.draw_networkx_edges(tree, pos=spring_pos, ax=axes[1], edge_color=edge_color_mask)
plt.tight_layout()
plt.show()

## Static Analysis
Given an infinite budget, breaksdown the probability of compromising components in the network. The resulting probabilities are exact (except for floating point precision issues) but do not scale well to larger communication networks (> 5 nodes). Useful as a static feature of a communication network. 

In [None]:
import copy
from fractions import Fraction
# TODO: Account for probability of 0 devices being compromised

def iterate_over_paths(path, prob, reachable_nodes={}, visited_nodes={}, id_to_node={}):
    current_id = path[-1]
    current_node = id_to_node[current_id]
    visited_previously = current_id in visited_nodes
    if not visited_previously:
        visited_nodes[current_id] = None
    
    neighbouring_nodes = {k.id:None for k in current_node.get_neighbours()}
    reachable_nodes.update(neighbouring_nodes)
    reachable_nodes = {k:None for k in reachable_nodes if k not in visited_nodes}
    success_prob = current_node.get_prob_to_compromise()
    # If we fail, this path terminates
    yield path, prob*(1-success_prob), True
    if visited_previously:
        return
    n_reachable = len(reachable_nodes)
    reachable_ids =  list(reachable_nodes.keys())
    for reachable_node_id in reachable_ids:
        yield from iterate_over_paths(path+[reachable_node_id], prob*success_prob*(1/n_reachable),
                                      copy.copy(reachable_nodes), copy.copy(visited_nodes),
                                      id_to_node=id_to_node)
        
    # No more nodes reachable (entire network compromised)
    if len(reachable_nodes) == 0:
        yield path, prob*success_prob, False
def get_all_paths(graph):
    n_nodes = len(graph.nodes())
    id_to_node = {node.id:node for node in graph.nodes()}
    start_ids = list(id_to_node.keys())
    # Different starting locations
    for start_node_id in start_ids:
        yield from iterate_over_paths([start_node_id], prob=1/n_nodes,
                                      reachable_nodes={}, visited_nodes={},
                                      id_to_node=id_to_node)
     
sum_probs = 0.0
n_probs = {}
for path_no, (path, prob, ends_on_failure) in enumerate(get_all_paths(pcn.graph)):
    print(f"Path {path_no} :: Prob {str(Fraction(prob).limit_denominator()):<15} :: {'-'.join([str(node) for node in path])} :: {ends_on_failure}")
    if (len(path) > 1 and ends_on_failure) or (not ends_on_failure):
        path_length = len(path) - 1 if ends_on_failure else len(path)
        n_probs[path_length] = prob if path_length not in n_probs else n_probs[path_length] + prob
    sum_probs += prob
print(f"No. of Paths: {path_no}. Sum of Probabilities: {sum_probs} ({Fraction(sum_probs).limit_denominator()})")
n_probs[0] = sum(n_probs.values()) # TODO: Verify this
print("\n".join(f"{k} devices: {v}" for k,v in sorted(n_probs.items(),key=lambda item: item[0])))

In [None]:
A = nx.adjacency_matrix(pcn.graph, nodelist=sorted(pcn.graph.nodes(), key=lambda node: node.id)).todense()
print("A\n", A)
np.fill_diagonal(A, val=0)
A2 = A@A
np.fill_diagonal(A2, val=0)
print("A^2\n", A2)
A3 = A2@A
np.fill_diagonal(A3, val=0)
print("A^3\n", A3)

In [None]:
time_required = 0.0
nodes = pcn.graph.nodes()
node_probs = {}
for node in nodes:
    # print(node)
    probability_to_compromise = 1.0
    for defence_name, defence in node.defences.items():
        expected_effort = defence.effort_distribution.expect()
        time_required += expected_effort
        # print("\t", defence_name)
        probability_to_compromise *= defence.p
    node_probs[node] = probability_to_compromise

prob_to_compromise_n_devices = {}
all_nodes = set(nodes)
cumulative = 0.0
for n_devices in range(pcn.n_components, 0, -1):
    prob_to_compromise_n_devices[n_devices] = cumulative
    for combination in itertools.combinations(nodes, n_devices):
        probability_to_compromise = 1.0
        combination = set(combination)
        missing_nodes = all_nodes.difference(combination)
        for node in combination:
            probability_to_compromise *= node_probs[node]
        for node in missing_nodes:
            probability_to_compromise *= (1 - node_probs[node])
        prob_to_compromise_n_devices[n_devices] += probability_to_compromise 
    print(f"{n_devices} Devices: {prob_to_compromise_n_devices[n_devices]}")
    cumulative += prob_to_compromise_n_devices[n_devices]
print(prob_to_compromise_n_devices)

In [None]:
# If the probability of compromising all components is the same,
# we can use the Binomial distribution function
# Takes: 12.6 µs
N = pcn.n_components
k = 2
p = 0.5
cumulative = 0.0
for k in range(pcn.n_components, 0, -1):
    prob = math.comb(N, k)*math.pow(p, k)*math.pow(1-p,N-k)
    print(f"{k} Devices: {cumulative + prob}")
    cumulative += prob

In [None]:
import scipy.stats.distributions as distr
distr_lookup = {
    "TruncNorm": distr.truncnorm, # Continuous, loc=mean (float), scale=standard deviation (float)
    "Exponential": distr.expon, # Continuous, scale = 1 / lambda (float)
    "Gamma": distr.gamma, # Continuous, a = shape parameter (integer)
    "Bernoulli": distr.bernoulli, # Discrete
}
n_attacks = 20
is_successful = distr.bernoulli(0.5).rvs(size=n_attacks).astype(bool)
time_taken = distr.expon(scale=0.0).rvs(size=n_attacks)[is_successful]
print(f"Successful Attacks {sum(is_successful)}/{n_attacks}\nTime Taken per Successful Attack: {time_taken}")

## Communication Network Specifications
Explores how we can supply structured information to our procedural network generation algorithm. Includes information such as the types of components and defences we expect to see in the communication network.

In [None]:
import json
import inspect
from scipy.stats import distributions as distr
from cyber import Defence
from pathlib import Path as p
class SpecEncoder(json.JSONEncoder):

    def default(self, obj):
        # Scipy Distributions
        if hasattr(obj, "dist"):
            return {"distribution": {"name":obj.dist.name, "kwds": obj.kwds}}
        # Let the base class default method raise the TypeError
        return json.JSONEncoder.default(self, obj)
    
def is_distribution_generator(member):
    if inspect.isclass(member):
        return member.__name__.endswith("_gen")
    return False

class SpecDecoder(json.JSONDecoder):
    def __init__(self, *args, **kwargs):
        json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs)
        valid_names = set(name[:-4] for name, _ in inspect.getmembers(distr, predicate=is_distribution_generator))
        self.distr_lookup = {name:member for name, member in inspect.getmembers(distr) if name in valid_names}

    def object_hook(self, dct):
        # Scipy Distributions
        if 'distribution' in dct:
            dist_dict = dct["distribution"]
            if dist_dict['name'] in self.distr_lookup:
                return self.distr_lookup[dist_dict['name']](**dist_dict["kwds"])
        return dct

# dumped = json.dumps(distr.gamma(a=2.0, scale=1.0), cls=SpecEncoder)
with open(p.cwd() / "SmartMeterNetworkSpecifications.json", "r") as f:
    specs = json.load(f, cls=SpecDecoder)
print(specs)
print(json.dumps(specs, cls=SpecEncoder))
