# Sandbox
For testing and developing new Cyber Security Assessment tools in an interactive and persistent development environment.

In [None]:
import itertools
import json
import copy
import random
import warnings
import math
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import scipy.stats.distributions as distr
import seaborn as sns
import pandapower
from pathlib import Path as p

from cyber import Defence, Vulnerability, CommmonDefences, CyberComponent
from tree import TreeNode, Link
from comm_network import Aggregator, Device, CommNetwork
from attackers import RandomAttacker
from visualise import plot_communication_network

## Procedural Generation
### Abstract Tree
Consists of Devices and Aggregators. 
* Aggregators (internal nodes) require a **Hard** amount of effort to compromise and have a 50% chance of being compromised if the necessary effort is spent
* Devices (leaf nodes) require an **Easy** amount of effort to compromise and also have a 50% chance of being compromised if the necesssary effort is spent
* Control Center (root node) is **Very Hard** to compromise

Controllable parameters include:
* Number of devices (leaf nodes)
* Number of Entrypoints (points where cyberattacks can originate)
* Number of children per parent node (inversely proportional to redundancy)
* Random deviation in number of children
* Sibling to Sibling communication (lateral edges between nodes on the same level)

In [None]:
seed = np.random.randint(low=0, high=52600)
np.random.seed(seed); random.seed(seed)
print(f"Seed: {np.random.get_state()[1][0]}")

with warnings.catch_warnings():
    warnings.filterwarnings(action="ignore", category=FutureWarning)
    grid=pandapower.networks.case14()
    print(grid)
    pcn = CommNetwork(n_devices=30, n_entrypoints=1, children_per_parent=0, child_no_deviation=5, 
                    network_specs=p.cwd() / "specifications" / "WAMS_specifications.json", 
                    # "Default_specifications.json", "SmartMeter_specifications.json", "SCADA_specifications.json", "WAMS_specifications.json"
                    grid=grid,
                    enable_sibling_to_sibling_comm=True)
print(CommNetwork.show_tree(pcn.root))
print(f"Number of Components: {pcn.n_components}")

## Visualization
Plot the structure of the communication network. 

In [None]:
plot_communication_network(pcn, palette="tab10") # "tab10" (default), "Set2", "Paired", "flare"

## Monte Carlo
Build an approximate profile of the network's cyber security by launching many cyber attacks. The higher N_ATTACKS the more precise the resulting distribution is, however this comes at the cost of increased computation time.
The more nodes are compromised, the more successful the attack.

### Active Graph Only
Only perform Monte Carlo simulation on the currently active network.

In [None]:
N_ATTACKS = 1000
BUDGET = 52
compromised_array = np.zeros(shape=N_ATTACKS, dtype=np.int16)
effort_array = np.zeros(shape=N_ATTACKS, dtype=np.float32)
for attack_no in range(N_ATTACKS):
    attacker = RandomAttacker(budget=BUDGET, verbose=False)
    nodes_compromised, total_effort_spent = attacker.attack_network(pcn)
    compromised_array[attack_no] = len([n for n in nodes_compromised if isinstance(n, Device)])
    effort_array[attack_no] = total_effort_spent
    pcn.reset()

In [None]:
# Histogram
fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(8,6))
fig.suptitle(f"Attacker: {attacker.__name__}, Budget: {BUDGET}\nNetwork Size: {pcn.n_components}, No. of Devices: {pcn.n_devices}, No. of Entrypoints: {pcn.n_entrypoints}", 
             y=-0.05, fontsize="medium", ma="center")
sns.histplot(compromised_array, discrete=True, stat="probability", ax=axes[0])
axes[0].set(xticks=np.arange(0, len(pcn.graph.nodes())), xlabel="No. of Devices Compromised")
sns.histplot(effort_array, binwidth=1, ax=axes[1])
axes[1].set(xlabel="Effort Spent")
plt.tight_layout()
plt.show()

### Varied Parameter
Perform monte carlo simulation while varying particular parameter, such as the level of redundancy in the network. 

In [None]:
import os
import multiprocess as mp
N_ATTACKS = 10000
N_DEVICES = 30
BUDGET = 52
SPEC = p.cwd() / "SmartMeterNetworkSpecifications.json"
SEED = np.random.randint(low=0, high=52600)
N_ENTRYPOINTS = 1 # Total budget is multiplied by this!
MIN_CHILDREN = 2
MAX_CHILDREN = N_DEVICES
CHILD_NO_STEP = 2
CHILD_NO_DEVIATION = 0
no_of_children = np.arange(MIN_CHILDREN, MAX_CHILDREN, CHILD_NO_STEP)
network_specs = dict(n_devices=N_DEVICES,
                     n_entrypoints=N_ENTRYPOINTS,
                     network_specs=SPEC,
                     child_no_deviation=CHILD_NO_DEVIATION,
                     enable_sibling_to_sibling_comm=True)

print(f"Seed: {SEED}")
np.random.seed(SEED)

compromised_array = np.zeros(shape=(N_ATTACKS, len(no_of_children)), dtype=np.int16)
effort_array = np.zeros(shape=(N_ATTACKS, len(no_of_children)), dtype=np.float32)
print(f"CPU Thread Count: {mp.cpu_count()-2}")

def monte_carlo(process_idx, seed, n_attacks, budget, **network_kwargs):
    import os
    import numpy as np
    from comm_network import CommNetwork, Device
    from attackers import RandomAttacker
    
    # Procedurally generate a communication network with specific redundancy
    np.random.seed(seed)
    pcn = CommNetwork(**network_kwargs)

    # Store effort and no. of devices compromised
    compromised_array = np.zeros(shape=n_attacks, dtype=np.int16)
    effort_array = np.zeros(shape=n_attacks, dtype=np.float32)

    for attack_no in range(n_attacks):
        attacker = RandomAttacker(budget=budget, verbose=False)
        nodes_compromised, total_effort_spent = attacker.attack_network(pcn)
        compromised_array[attack_no] = len([n for n in nodes_compromised if isinstance(n, Device)])
        effort_array[attack_no] = total_effort_spent
        # Entrypoint changes with each attack (i.e. same network different entrypoint)
        pcn.reset() 
    return process_idx, compromised_array, effort_array

with mp.Pool(processes=len(no_of_children)) as pool:
    results = []
    for i, children_per_parent in enumerate(no_of_children):
        print("Children per parent:", children_per_parent)
        kwds = {**network_specs, **dict(children_per_parent=children_per_parent)}
        results.append(
            pool.apply_async(monte_carlo, args=[i, SEED, N_ATTACKS, BUDGET], kwds=kwds)
        )
    
    for result in results:
        process_idx, compromises, efforts = result.get()
        compromised_array[:, process_idx] = compromises
        effort_array[:, process_idx] = efforts

In [None]:
import pandas as pd
df = pd.DataFrame(compromised_array, columns=no_of_children)
df = df.melt(var_name='Children')

display(df)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(8,6))
sns.histplot(df, x="value", hue="Children", discrete=True, stat="probability", common_norm=False, ax=ax)
sns.move_legend(ax, "upper right", ncols=4, title="Children per Parent")
ax.set(xlabel="No. of Devices Compromised")
plt.show()

## Static Analysis
Given an infinite budget, breaksdown the probability of compromising components in the network. The resulting probabilities are exact (except for floating point precision issues) but do not scale well to larger communication networks (> 5 nodes). Useful as a static feature of a communication network. 

In [None]:
import copy
from fractions import Fraction
import multiprocess as mp
# TODO: Account for probability of 0 devices being compromised

def iterate_over_paths(path, prob, reachable_nodes={}, visited_nodes={}, id_to_node={}):
    current_id = path[-1]
    current_node = id_to_node[current_id]
    visited_previously = current_id in visited_nodes
    if not visited_previously:
        visited_nodes[current_id] = None
    
    neighbouring_nodes = {k.id:None for k in current_node.get_neighbours()}
    reachable_nodes.update(neighbouring_nodes)
    reachable_nodes = {k:None for k in reachable_nodes if k not in visited_nodes}
    success_prob = current_node.get_prob_to_compromise()
    # If we fail, this path terminates
    yield path, prob*(1-success_prob), True
    if visited_previously:
        return
    n_reachable = len(reachable_nodes)
    reachable_ids =  list(reachable_nodes.keys())
    for reachable_node_id in reachable_ids:
        yield from iterate_over_paths(path+[reachable_node_id], prob*success_prob*(1/n_reachable),
                                      copy.copy(reachable_nodes), copy.copy(visited_nodes),
                                      id_to_node=id_to_node)
        
    # No more nodes reachable (entire network compromised)
    if len(reachable_nodes) == 0:
        yield path, prob*success_prob, False

def get_all_paths(graph):
    n_nodes = len(graph.nodes())
    id_to_node = {node.id:node for node in graph.nodes()}
    start_ids = list(id_to_node.keys())
    # Different starting locations
    for start_node_id in start_ids:
        yield from iterate_over_paths([start_node_id], prob=1/n_nodes,
                                      reachable_nodes={}, visited_nodes={},
                                      id_to_node=id_to_node)
     
sum_probs = 0.0
n_probs = {}
for path_no, (path, prob, ends_on_failure) in enumerate(get_all_paths(pcn.graph)):
    print(f"Path {path_no} :: Prob {str(Fraction(prob).limit_denominator()):<15} :: {'-'.join([str(node) for node in path])} :: {ends_on_failure}")
    if (len(path) > 1 and ends_on_failure) or (not ends_on_failure):
        path_length = len(path) - 1 if ends_on_failure else len(path)
        n_probs[path_length] = prob if path_length not in n_probs else n_probs[path_length] + prob
    sum_probs += prob
print(f"No. of Paths: {path_no}. Sum of Probabilities: {sum_probs} ({Fraction(sum_probs).limit_denominator()})")
n_probs[0] = sum(n_probs.values()) # TODO: Verify this
print("\n".join(f"{k} devices: {v}" for k,v in sorted(n_probs.items(),key=lambda item: item[0])))

In [None]:
A = nx.adjacency_matrix(pcn.graph, nodelist=sorted(pcn.graph.nodes(), key=lambda node: node.id)).todense()
print("A\n", A)
np.fill_diagonal(A, val=0)
A2 = A@A
np.fill_diagonal(A2, val=0)
print("A^2\n", A2)
A3 = A2@A
np.fill_diagonal(A3, val=0)
print("A^3\n", A3)

In [None]:
time_required = 0.0
nodes = pcn.graph.nodes()
node_probs = {}
for node in nodes:
    # print(node)
    probability_to_compromise = 1.0
    for defence_name, defence in node.defences.items():
        expected_effort = defence.effort_distribution.expect()
        time_required += expected_effort
        # print("\t", defence_name)
        probability_to_compromise *= defence.p
    node_probs[node] = probability_to_compromise

prob_to_compromise_n_devices = {}
all_nodes = set(nodes)
cumulative = 0.0
for n_devices in range(pcn.n_components, 0, -1):
    prob_to_compromise_n_devices[n_devices] = cumulative
    for combination in itertools.combinations(nodes, n_devices):
        probability_to_compromise = 1.0
        combination = set(combination)
        missing_nodes = all_nodes.difference(combination)
        for node in combination:
            probability_to_compromise *= node_probs[node]
        for node in missing_nodes:
            probability_to_compromise *= (1 - node_probs[node])
        prob_to_compromise_n_devices[n_devices] += probability_to_compromise 
    print(f"{n_devices} Devices: {prob_to_compromise_n_devices[n_devices]}")
    cumulative += prob_to_compromise_n_devices[n_devices]
print(prob_to_compromise_n_devices)

In [None]:
# If the probability of compromising all components is the same,
# we can use the Binomial distribution function
# Takes: 12.6 µs
N = pcn.n_components
k = 2
p = 0.5
cumulative = 0.0
for k in range(pcn.n_components, 0, -1):
    prob = math.comb(N, k)*math.pow(p, k)*math.pow(1-p,N-k)
    print(f"{k} Devices: {cumulative + prob}")
    cumulative += prob

In [None]:
import scipy.stats.distributions as distr
distr_lookup = {
    "TruncNorm": distr.truncnorm, # Continuous, loc=mean (float), scale=standard deviation (float)
    "Exponential": distr.expon, # Continuous, scale = 1 / lambda (float)
    "Gamma": distr.gamma, # Continuous, a = shape parameter (integer)
    "Bernoulli": distr.bernoulli, # Discrete
}
n_attacks = 20
is_successful = distr.bernoulli(0.5).rvs(size=n_attacks).astype(bool)
time_taken = distr.expon(scale=0.0).rvs(size=n_attacks)[is_successful]
print(f"Successful Attacks {sum(is_successful)}/{n_attacks}\nTime Taken per Successful Attack: {time_taken}")

## Communication Network Specifications
Explores how we can supply structured information to our procedural network generation algorithm. Includes information such as the types of components and defences we expect to see in the communication network.

In [None]:
# seed = np.random.randint(low=0, high=52600)
seed = 27194
print(f"Seed: {seed}")
np.random.seed(seed)
pcn = CommNetwork(n_devices=15, n_entrypoints=1, children_per_parent=5, child_no_deviation=1,
                  network_specs="SmartMeterNetworkSpecifications.json",
                  enable_sibling_to_sibling_comm=True)


## Criticality

In [None]:
import inspect
import warnings
import numpy as np
import pandapower as pp
import pandapower.networks as grids
grid = pp.create_empty_network()
grid_filter = lambda module: inspect.isfunction(module) and not module.__name__.startswith("_")
grid_map = {grid_name:grid_creator for grid_name, grid_creator in \
            inspect.getmembers(grids, predicate=grid_filter)}
grid_options = list(grid_map.keys())
print(", ".join(grid_map.keys()))
CHOSEN_GRID = "mv_oberrhein" # "create_cigre_network_mv" # Can be None
kwargs = dict(scenario="generation", include_substations=True) #  dict(with_der="all")
with warnings.catch_warnings():
    warnings.filterwarnings(action="ignore", category=FutureWarning)
    grid_name = np.random.choice(grid_options) if CHOSEN_GRID is None else CHOSEN_GRID
    print(f"Grid: {grid_name}")
    grid = grid_map[grid_name](**kwargs)
    print(grid)
    # Controllable
    n_controllable = sum(getattr(grid, attr).shape[0] for attr in ["gen", "shunt", "trafo", "switch"])
    print(f"No. of controllable elements: {n_controllable} (generators, shunts, transformers and switches)")
    # Sensor-Only
    n_sensor_only = sum(getattr(grid, attr).shape[0] for attr in ["bus", "load", "line"])
    print(f"No. of sensor-only elements: {n_sensor_only} (buses, loads and lines)")
    print(f"Total (possible) no. of devices: {n_sensor_only+n_controllable} (generators, shunts, transformers and switches, buses, loads and lines)")

# grid.switch.closed = True
pp.plotting.simple_plot(grid, respect_switches=True, plot_line_switches=True, plot_loads=True, plot_gens=True, plot_sgens=True, )


In [None]:
import grid2op
from network_specification import SpecDecoder

def evaluate_grid2op_conditions(device_type:dict, obs:grid2op.Observation, obj_ids):
    """
    Checks each object ID at a specific substation to see if it meets the conditions
    given in the network's JSON specifications. 

    Returns:
        list: IDs that satisfy the condition.
    """
    conditions = device_type.get("conditions", None)
    device_ids = copy.deepcopy(obj_ids)
    if conditions is not None and len(obj_ids) > 0:
        for condition in conditions:
            # TODO: Check this works for multiple conditions
            values = getattr(obs, condition["attribute"])[list(set(device_ids))]
            match condition["action"]:
                case "filter":
                    matching_ids = np.where((values >= condition.get("lb", -math.inf)) & \
                                            (values < condition.get("ub", math.inf)))
                    device_ids = device_ids[matching_ids]
                case "split":
                    limit = condition.get("limit", math.inf)
                    ids_to_split = device_ids[np.where(values > limit)]
                    
                    new_ids = [obj_id for obj_id in device_ids if obj_id not in ids_to_split]
                    for i, id_to_split in enumerate(ids_to_split):
                        new_ids.extend([id_to_split]*math.ceil(values[i] / limit))
                    device_ids = new_ids
    return device_ids

with open(p.cwd() / "specifications" / "SmartMeter_specifications.json", "r", encoding="utf-8") as f:
    specs = json.load(f, cls=SpecDecoder)

device_types = specs["device"]["types"]
device_type_prob = specs["device"].get("proportion", [1/len(device_types)]*len(device_types))
# Device Type is based on compatibility with power grid element in PandaPower
compatabilities = {}
for device_type in device_types:
    compatible_devices = device_type.get("compatible")
    for compatible_device in compatible_devices:
        if compatible_device not in compatabilities:
            compatabilities[compatible_device] = [device_type]
        else:
            compatabilities[compatible_device] = compatabilities[compatible_device] + [device_type]

# Map device type (by name) to probability that device is of that type
probs = {device_type.get("name"): prob for device_type, prob in zip(device_types, device_type_prob)}

# Grid2Op Obj name mapping
grid2op_naming = {"load": "loads_id", "gen":"generators_id", "line":"lines_or_id", "storage":"storages_id"}

no_of_devices = 0
device_map = {}
for sub_no, sub_name in enumerate(env.name_sub):
    connected_objs = env.get_obj_connect_to(substation_id=sub_no)
    
    for attr, compatible_device_types in compatabilities.items():
        obj_ids = connected_objs[grid2op_naming[attr]]
        print(f"Substation: {obj_ids}", end=" ")
        obj_ids = evaluate_grid2op_conditions(device_type, obs, obj_ids)
        print(obj_ids)
        no_of_attr_devices =len(obj_ids)
        for i in range(no_of_devices, no_of_devices+no_of_attr_devices):
            # Recalculate probability of choosing each compatible device
            # Retains original proportion, probabilities must sum to 1
            local_probs = np.array([probs[device_type.get("name")] for device_type in compatible_device_types])
            local_probs = (1/sum(local_probs))*local_probs
            device_map[i] = np.random.choice(compatible_device_types, p=local_probs)
        no_of_devices += no_of_attr_devices
print(device_map.keys())
device_map = device_map.items()
