# Sandbox
For testing and developing new Cyber Security Assessment tools in an interactive and persistent development environment.

In [None]:
import itertools
import json
import copy
import random
import warnings
import math
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import scipy.stats.distributions as distr
import seaborn as sns
import pandapower
from pathlib import Path as p

from cyber.assets import Defence, Vulnerability, CommmonDefences, CyberDevice
from communication.graph import CommNode, CommEdge
from communication.network import Aggregator, Device, CommNetwork
from attackers.random_attacker import RandomAttacker
from cyber.analysis import Analyzer
from visualization import plot_communication_network

## Procedural Generation
### Abstract Tree
Consists of Devices and Aggregators. 
* Aggregators (internal nodes) require a **Hard** amount of effort to compromise and have a 50% chance of being compromised if the necessary effort is spent
* Devices (leaf nodes) require an **Easy** amount of effort to compromise and also have a 50% chance of being compromised if the necesssary effort is spent
* Control Center (root node) is **Very Hard** to compromise

Controllable parameters include:
* Number of devices (leaf nodes)
* Number of Entrypoints (points where cyberattacks can originate)
* Number of children per parent node (inversely proportional to redundancy)
* Random deviation in number of children
* Sibling to Sibling communication (lateral edges between nodes on the same level)

In [None]:
seed = np.random.randint(low=0, high=52600)
np.random.seed(seed); random.seed(seed)
print(f"Seed: {np.random.get_state()[1][0]}")

with warnings.catch_warnings():
    warnings.filterwarnings(action="ignore", category=FutureWarning)
    grid=pandapower.networks.create_cigre_network_mv(with_der="all")
    print(grid)
    pandapower.plotting.simple_plot(grid, plot_loads=True)
    pcn = CommNetwork(n_devices=3, n_entrypoints=1, children_per_parent=0, child_no_deviation=5, 
                    network_specs=p.cwd() / "specifications" / "SCADA_specifications.json", 
                    # "Default_specifications.json", "SmartMeter_specifications.json", "SCADA_specifications.json", "WAMS_specifications.json"
                    grid=grid,
                    enable_sibling_to_sibling_comm=True)
print(CommNetwork.show_tree(pcn.root))
print(f"Number of Components: {pcn.n_components}")

analyzer = Analyzer(pcn)

In [None]:
import pandas as pd
from collections import defaultdict
from procedural.specification import SpecDecoder

with open(p.cwd() / "specifications" / "SmartMeter_specifications.json", "r", encoding="utf-8") as f:
    specs = json.load(f, cls=SpecDecoder)
prop = None
n_devices = 10

components = []

cat_spec = specs["device"]["categories"]
cat_lookup = {cat["name"]:cat for cat in cat_spec}
categories = list(cat_lookup.keys())

# Proportion of devices of each type (default: uniform)
uniform_device_types = [1/len(categories)]*len(categories)
device_type_prob = specs["device"].get("proportion", uniform_device_types) if prop is None else prop
if grid is None: 
    # Device Type is based on statistic / expected proportion
    device_population = np.random.choice(categories, p=device_type_prob, replace=True, size=n_devices)
    device_map = [(i, cat_name, 1) for i, cat_name in enumerate(device_population)]
else: 
    # Apply rules in Specifications to assign 1 or more devices to equipment in the grid.

    # Map device category (by name) to probability that device is of that category
    prob_lookup = {cat_name: prob for cat_name, prob in zip(categories, device_type_prob)}

    compat = {}
    for i, cat_name in enumerate(categories):
        cat = cat_lookup[cat_name]
        comp_devices = cat.get("compatible")
        for comp_device, conditions in comp_devices.items():
            equip_df = getattr(grid, comp_device)

            # DataFrame with probability of choosing each Device Category (e.g. different types of smart meters)
            if comp_device not in compat:
                compat[comp_device] = dict(
                    probs=pd.DataFrame(np.zeros((equip_df.shape[0], len(categories))), columns=categories),
                    splits=pd.DataFrame(np.zeros((equip_df.shape[0], len(categories))), columns=categories, dtype=np.int16),
                )
            # Find Equipment that meets Conditions
            if "filter" in conditions:
                condition = conditions["filter"]
                criteria = equip_df.get(condition["attribute"])
                mask = (criteria >= condition.get("lb", -math.inf)) & \
                       (criteria <= condition.get("ub",  math.inf))
                compat[comp_device]["probs"].iloc[mask, i] = prob_lookup[cat_name]
            else:
                compat[comp_device]["probs"].iloc[:, i] = prob_lookup[cat_name]

            if "split" in conditions:
                condition = conditions["split"]
                criteria = equip_df.get(condition["attribute"])
                min_splits = criteria.floordiv(condition.get("limit", math.inf)).astype(np.int16)
                leftover_split = (criteria.mod(condition.get("limit", math.inf)) > 0).astype(np.int16)
                compat[comp_device]["splits"].iloc[:, i] = min_splits + leftover_split
            
    select_compatible_device_category = lambda p: np.random.choice(categories, p=p)
    
    no_of_devices = 0
    device_map = []
    for comp_device in compat.keys():
        equip_df = getattr(grid, comp_device)

        # Normalize probabilities (must sum to 1)
        probs = compat[comp_device]["probs"]
        probs = probs.div(probs.sum(axis=1), axis=0).dropna()

        # Select device category
        equip_df["Category"] = probs.apply(select_compatible_device_category, axis=1)
        equip_df.dropna(subset=["Category"], inplace=True)

        # Split device if equipment exceeds size limit
        select_no_of_splits = lambda row: row[equip_df.Category.loc[row.name].item()]
        equip_df["Splits"] = compat[comp_device]["splits"].apply(select_no_of_splits, axis=1)
        
        device_map.extend([(i, equip_df.iloc[i, -2], equip_df.iloc[i, -1]) for i in range(equip_df.shape[0])])


# Create Devices
for i, cat_name, n_splits in device_map:
    cat = cat_lookup[cat_name]
    device_name = cat.get("name", "Device")
    device_attrs =  CommNetwork.get_binary_attributes(cat,
                    ["is_sensor", "is_controller", "is_accessible", "is_autonomous"])
    for j in range(n_splits):
        device = Device(name=device_name,
                        is_controller=device_attrs["is_controller"],
                        is_sensor=device_attrs["is_sensor"],
                        is_autonomous=device_attrs["is_autonomous"],
                        is_accessible=device_attrs["is_accessible"],)
        CommNetwork.attach_cyber_characteristics(device, cat)
        components.append(device)
        # self.node_ids.append(device.id)
        # self.id_to_node[device.id] = device
        # self.n_components += 1

    # Find all equipment that is compatible with a specific device type
    # If equipment is only compatible with that device type, assign it
    # Otherwise, pick based on proportion

   

#     split_mask = criteria > condition.get("limit")
#     to_split, not_to_split = df[split_mask], df[~split_mask]
#     for equip in not_to_split.name:
#         compat[comp_device][equip].append(cat_name)

In [None]:
len(components)

In [None]:
compat["bus"].iloc[:, :] = np.random.random(compat["bus"].shape)
for comp_device in compat.keys():
    compat[comp_device] = compat[comp_device].div(compat[comp_device].sum(axis=1), axis=0).dropna()

In [None]:
compat["bus"]

In [None]:
np.random.choice(compat["bus"].columns, size=1, replace=True, p=compat["bus"].iloc[0]).item()

## Visualization
Plot the structure of the communication network. 

In [None]:
plot_communication_network(pcn, palette="tab10") # "tab10" (default), "Set2", "Paired", "flare"

## Analysis

### Monte Carlo
Build an approximate profile of the network's cyber security by launching many cyber attacks. The higher N_ATTACKS the more precise the resulting distribution is, however this comes at the cost of increased computation time.
The more nodes are compromised, the more successful the attack.

#### Active Graph Only
Only perform Monte Carlo simulation on the currently active network.

In [None]:

N_ATTACKS = 1000
BUDGET = 52
ATTACKER_VARIANT = RandomAttacker
compromised_array, effort_array = analyzer.monte_carlo_analysis(n_attacks=N_ATTACKS, attacker_variant=ATTACKER_VARIANT, budget=BUDGET)
analyzer.plot_monte()

#### Varied Parameter
Perform monte carlo simulation while varying particular parameter, such as the level of redundancy in the network. 

In [None]:
import os
import multiprocess as mp
N_ATTACKS = 1000
N_DEVICES = 30
BUDGET = 52
SPEC = p.cwd() / "specifications" / "SmartMeter_specifications.json" 
SEED = np.random.randint(low=0, high=52600)
N_ENTRYPOINTS = 1 # Total budget is multiplied by this!
MIN_CHILDREN = 2
MAX_CHILDREN = N_DEVICES
CHILD_NO_STEP = 2
CHILD_NO_DEVIATION = 0
no_of_children = np.arange(MIN_CHILDREN, MAX_CHILDREN, CHILD_NO_STEP)
network_specs = dict(n_devices=N_DEVICES,
                     n_entrypoints=N_ENTRYPOINTS,
                     network_specs=SPEC,
                     child_no_deviation=CHILD_NO_DEVIATION,
                     enable_sibling_to_sibling_comm=True)

compromised_array, effort_array = analyzer.monte_carlo_multi_analysis(seed, "children_per_parent", no_of_children, budget=BUDGET, n_attacks=N_ATTACKS, **network_specs)
analyzer.plot_monte()

### Static Analysis
Given an infinite budget, breaksdown the probability of compromising components in the network. The resulting probabilities are exact (except for floating point precision issues) but do not scale well to larger communication networks (> 5 nodes). Useful as a static feature of a communication network. 

In [None]:
n_probs = analyzer.static_analysis(show_paths=False, verbose=True)
analyzer.plot_static()

In [None]:
# Adjacency Matrix
# Does not handle self-loops / backtracking
# Consequently, probabilities will differ from combinatorial approach

def superscript(num:int):
    sup_map = {0: f"\N{SUPERSCRIPT ZERO}", 1: f"\N{SUPERSCRIPT ONE}", 2: f"\N{SUPERSCRIPT TWO}", 3: f"\N{SUPERSCRIPT THREE}", 4: f"\N{SUPERSCRIPT FOUR}", 
           5: f"\N{SUPERSCRIPT FIVE}",  6: f"\N{SUPERSCRIPT SIX}", 7: f"\N{SUPERSCRIPT SEVEN}", 8: f"\N{SUPERSCRIPT EIGHT}", 9: f"\N{SUPERSCRIPT NINE}"}
    return "".join(sup_map[digit] for digit in map(int, str(num)))

np.set_printoptions(precision=2, floatmode="maxprec_equal")
nodes = sorted(pcn.graph.nodes(), key=lambda node: node.id)
prob_lookup = [node.get_prob_to_compromise() for node in nodes]
print(f"Probabilities: {prob_lookup}")


A = nx.adjacency_matrix(pcn.graph, nodelist=nodes, weight="p").todense()
n_probs = {}
oldA = np.eye(A.shape[0])
for i in range(len(nodes)):
    newA = oldA@A
    # np.fill_diagonal(newA, val=0)
    print(f"A{superscript(i+1)}\n", newA)
    n_probs[i+1] = np.triu(newA, k=1).sum()
    print(f"A{superscript(i+1)}: {n_probs[i+1]}")
    oldA = newA

In [None]:
# Mutually Exclusive Approach
# Assumes you can jump and independently attack any node (i.e. ignores communication connections!)

time_required = 0.0
nodes = pcn.graph.nodes()
node_probs = {node: node.get_prob_to_compromise() for node in nodes}

n_probs = {}
all_nodes = set(nodes)
cumulative = 0.0
for n_devices in range(pcn.n_components, 0, -1):
    n_probs[n_devices] = cumulative
    for combination in itertools.combinations(nodes, n_devices):
        probability_to_compromise = 1.0
        combination = set(combination)
        missing_nodes = all_nodes.difference(combination)
        for node in combination:
            probability_to_compromise *= node_probs[node]
        for node in missing_nodes:
            probability_to_compromise *= (1 - node_probs[node])
        n_probs[n_devices] += probability_to_compromise 
    cumulative += n_probs[n_devices]
print("\n".join(f"{k} devices: {v}" for k,v in sorted(n_probs.items(), key=lambda item: item[0])))
print("Sum:", sum(n_probs.values()))

In [None]:
# If the probability of compromising all components is the same,
# we can use the Binomial distribution function
# Takes: 12.6 µs
N = pcn.n_components
k = 2
p = 0.5
cumulative = 0.0
for k in range(N, 0, -1):
    prob = math.comb(N, k)*math.pow(p, k)*math.pow(1-p,N-k)
    print(f"{k} Devices: {cumulative + prob}")
    cumulative += prob

In [None]:
import scipy.stats.distributions as distr
distr_lookup = {
    "TruncNorm": distr.truncnorm, # Continuous, loc=mean (float), scale=standard deviation (float)
    "Exponential": distr.expon, # Continuous, scale = 1 / lambda (float)
    "Gamma": distr.gamma, # Continuous, a = shape parameter (integer)
    "Bernoulli": distr.bernoulli, # Discrete
}
n_attacks = 20
is_successful = distr.bernoulli(0.5).rvs(size=n_attacks).astype(bool)
time_taken = distr.expon(scale=0.0).rvs(size=n_attacks)[is_successful]
print(f"Successful Attacks {sum(is_successful)}/{n_attacks}\nTime Taken per Successful Attack: {time_taken}")

## Communication Network Specifications
Explores how we can supply structured information to our procedural network generation algorithm. Includes information such as the types of components and defences we expect to see in the communication network.

In [None]:
# seed = np.random.randint(low=0, high=52600)
seed = 27194
print(f"Seed: {seed}")
np.random.seed(seed)
pcn = CommNetwork(n_devices=15, n_entrypoints=1, children_per_parent=5, child_no_deviation=1,
                  network_specs="SmartMeterNetworkSpecifications.json",
                  enable_sibling_to_sibling_comm=True)


## Power System Component Association

In [None]:
import inspect
import warnings
import numpy as np
import pandapower as pp
import pandapower.networks as grids
grid = pp.create_empty_network()
grid_filter = lambda module: inspect.isfunction(module) and not module.__name__.startswith("_")
grid_map = {grid_name:grid_creator for grid_name, grid_creator in \
            inspect.getmembers(grids, predicate=grid_filter)}
grid_options = list(grid_map.keys())
print(", ".join(grid_map.keys()))
CHOSEN_GRID = "mv_oberrhein" # "create_cigre_network_mv" # Can be None
kwargs = dict(scenario="generation", include_substations=True) #  dict(with_der="all")
with warnings.catch_warnings():
    warnings.filterwarnings(action="ignore", category=FutureWarning)
    grid_name = np.random.choice(grid_options) if CHOSEN_GRID is None else CHOSEN_GRID
    print(f"Grid: {grid_name}")
    grid = grid_map[grid_name](**kwargs)
    print(grid)
    # Controllable
    n_controllable = sum(getattr(grid, attr).shape[0] for attr in ["gen", "shunt", "trafo", "switch"])
    print(f"No. of controllable elements: {n_controllable} (generators, shunts, transformers and switches)")
    # Sensor-Only
    n_sensor_only = sum(getattr(grid, attr).shape[0] for attr in ["bus", "load", "line"])
    print(f"No. of sensor-only elements: {n_sensor_only} (buses, loads and lines)")
    print(f"Total (possible) no. of devices: {n_sensor_only+n_controllable} (generators, shunts, transformers and switches, buses, loads and lines)")

# grid.switch.closed = True
pp.plotting.simple_plot(grid, respect_switches=True, plot_line_switches=True, plot_loads=True, plot_gens=True, plot_sgens=True, )


In [None]:
seed = np.random.randint(low=0, high=52600)
np.random.seed(seed); random.seed(seed)
print(f"Seed: {np.random.get_state()[1][0]}")

with warnings.catch_warnings():
    warnings.filterwarnings(action="ignore", category=FutureWarning)
    grid = pandapower.networks.mv_oberrhein(scenario="generation") # pandapower.networks.case14()
    print(grid)
    pcn = CommNetwork(n_devices=30, n_entrypoints=1, children_per_parent=0, child_no_deviation=5, 
                    network_specs=p.cwd() / "specifications" / "SCADA_specifications.json", 
                    # "Default_specifications.json", "SmartMeter_specifications.json", "SCADA_specifications.json", "WAMS_specifications.json"
                    grid=grid,
                    enable_sibling_to_sibling_comm=True)
print(CommNetwork.show_tree(pcn.root))
print(f"Number of Components: {pcn.n_components}")