Import the librairies and modules:

In [31]:
import csv
import re
import numpy as np
from pathlib import Path
from BAG_Code_tw520.BayesianAttackGraph import parse_dot
from BAG_Code_tw520.createANDtable import create_AND_table
from BAG_Code_tw520.createORtable import create_OR_table
from BAG_Code_tw520.Tools_tree import tokenizer, Parser
import BAG_Code_tw520.Loopy as Loopy

from pgmpy.inference.ExactInference import BeliefPropagation
from pgmpy.factors.discrete import TabularCPD

To prepare the simulation use the following to put the graph in memory:

In [32]:
# Name of the simulation
simulation = "HighLevel_no_HP"

# Constante for probability one
ONE = 0.999

In [33]:
def build_tools_tree(BAG, ast, id, ONE, kts_base_score):
    if ast.type == 'AND':
        prob = 1
        for child in ast.children:
            prob = prob * build_tools_tree(BAG, child, ast.__repr__(), ONE, kts_base_score)
        return prob
    elif ast.type == 'OR':
        prod = 1        
        for child in ast.children:
            prod = prod * (1 - build_tools_tree(BAG, child, ast.__repr__(), ONE, kts_base_score))
        return 1 - prod
    else:
        return kts_base_score[ast.__repr__()]

In [34]:
def to_dot(BAG, nodes, evidences, path, basename, display_kts=False, calculate_probabilities=False):
    nodes_BAG = BAG.nodes()
    evidences = {str(k): 1 for k in evidences}
    dot = 'digraph G {\nranksep=0.2;\n'
    regex_rule = r"\d+:RULE \d+ \((.*?)\):\d+\.\d+"
    regex_state = r"\d+:(.*?):\d+"
    print(BAG)
    # Exact inference
    if calculate_probabilities ==1:
        nodes_wo_evidences = [n for n in nodes_BAG.keys() if n not in evidences.keys()]
        nodes_1 = nodes_wo_evidences[:len(nodes_wo_evidences)//3]
        nodes_2 = nodes_wo_evidences[len(nodes_wo_evidences)//3:(len(nodes_wo_evidences)*2)//3]
        nodes_3 = nodes_wo_evidences[(len(nodes_wo_evidences)*2)//3:]
        prop = BeliefPropagation(BAG)
        total_prob1 = prop.query(nodes_1, evidence=evidences)
        total_prob2 = prop.query(nodes_2, evidence=evidences)
        total_prob3 = prop.query(nodes_3, evidence=evidences)
    # Loopy propagation
    if calculate_probabilities == 2:
        marginals = Loopy.RunLBP(Loopy.CreateFactorGraph(Loopy.ToMarkov(BAG)), evidences)
        marginals = list(marginals.values())[0]
    # Return to general case
    honeynodes = [str(n) for n in range(15,52)]
    dynamic_nodes = [str(n) for n in [20, 21,26]]

    def node_to_dot(node):
        prob = 1
        if node not in evidences.keys() and calculate_probabilities == 1:
            # prob = prop.query([node], evidence=evidences).values[1]
            if node in nodes_1:
                prob = total_prob1.marginalize([n for n in nodes_1 if n != node], inplace=False).values[1]
            elif node in nodes_2:
                prob = total_prob2.marginalize([n for n in nodes_2 if n != node], inplace=False).values[1]
            elif node in nodes_3:
                prob = total_prob3.marginalize([n for n in nodes_3 if n != node], inplace=False).values[1]
        elif calculate_probabilities == 2:
            prob = marginals[node][1]
        else:
            prob = 1
        probH = int(prob * 255)
        color = '#' + format(probH, '02X') +''+ format(255-probH, '02X') + '00'
        try:
            CVE = nodes[node]['CVE']
            shape = nodes[node]['shape']
            LABEL = nodes[node]['label']
        except:
            CVE = ''
            shape = ''
            LABEL = ''
        color = 'lightblue' if node in dynamic_nodes else 'blue' if node in honeynodes else color
        prob = "{:.4f}".format(prob)
        if CVE != 'null':
            return f'  \"{node}\" [label=\"{node}\\n{CVE}\\n{prob}\", color=\"{color}\", penwidth=3, shape=\"{shape}\"];\n'
        else:
            rule = re.search(regex_rule, LABEL)
            state = re.search(regex_state, LABEL)
            if rule:
                return f'  \"{node}\" [label=\"{node}\\n{rule.group(1)}\\n{prob}\", color=\"{color}\", penwidth=3, shape=\"{shape}\"];\n'
            elif state:
                return f'  \"{node}\" [label=\"{node}\\n{state.group(1)}\\n{prob}\", color=\"{color}\", penwidth=3, shape=\"{shape}\"];\n'
            else:
                return f'  \"{node}\" [label="{node}\\n{LABEL}\\n{prob}", color="{color}", penwidth=3, shape="{shape}"];\n'
            
    # Display the nodes
    if display_kts:
        for node in BAG.nodes():
            dot += node_to_dot(node)
    else:
        for node in nodes.keys():
            dot += node_to_dot(node)
    # Display the edges 
    for edge in BAG.edges():
        if display_kts or edge[0] in nodes.keys():
            dot += f'  \"{edge[0]}\" -> \"{edge[1]}\";\n'
    dot += '}' # End of the dot file
    # Write the dot file
    with open(path / (basename + evidences_to_string(evidences) + ".dot"), 'w') as f:
        f.write(dot)

def kts_layer(BAG, ONE, nodes):
    with open('./Threat_Inteligence/kts_base_score.csv', mode='r') as kts_basescore_file:
        reader = csv.DictReader(kts_basescore_file)
        kts_base_score = {}
        for row in reader:
            kts_base_score[row['kts']] = float(row['base_score'])
    # The skills layer:
    cpd_Lskills = TabularCPD('Lskills', 2, [[1-kts_base_score['L']], [kts_base_score['L']]])
    cpd_Hskills = TabularCPD('Hskills', 2, [[1-kts_base_score['H']], [kts_base_score['H']]])
    
    # The knowledge layer:
    cpd_knowledge = [TabularCPD(knowledge, 2, [[0.3], [0.7]]) for knowledge in ['Known vulnerabilities', 'CQCM', 'No credentials', 'MITM', 'Permissions move', 'Privilege escalation', 'Lateral move', 'ADCS']]

    # Import all the dependencies
    cpt_l4 = create_AND_table([ONE, ONE, ONE, ONE])
    with open('./Threat_Inteligence/CVE_knowledge_tooling_skills.csv', mode='r') as ktsfile:
        reader = csv.DictReader(ktsfile)
        kts_dict = {}
        for row in reader:
            cve = row['Vulnerability']
            tmp = {'tool': row['tool'], 'skills': row['skills'], 'Type': row['Type']}
            kts_dict[cve] = tmp
    for node in nodes.items():
            id = node[0]
            node = node[1]
            if node['CVE'] != "null":
                row = kts_dict[node['CVE']]
                tool_score = build_tools_tree(BAG, Parser(tokenizer(row['tool'])).parse(), id, ONE, kts_base_score)
                skills_score = kts_base_score[row['skills']]
                k_score = build_tools_tree(BAG, Parser(tokenizer(row['Type'])).parse(), id, ONE, kts_base_score)
                # prob = kts_base_score[row['Type']] * kts_base_score[row['skills']] * kts_base_score[row['tool']]
                prob = tool_score * skills_score * k_score
                prob = prob + 0.01 - prob*0.01
                cpt_l4 = [[1, 1-prob], [0, prob]]
                parents = BAG.get_parents(id)
                BAG.remove_cpds(id)
                BAG.add_cpds(TabularCPD(id, 2, cpt_l4, parents, evidence_card=2*np.ones(len(parents))))
    # We add all the necessary CPDs to the BAG
    for cpd in [cpd_Lskills, cpd_Hskills] + cpd_knowledge:
        if BAG.__contains__(cpd.variable):
            BAG.add_cpds(cpd)

def rmv_node(BAG, node):
    edge = [(u,node) for u in BAG.get_parents(node)]
    BAG.remove_edges_from(edge)
    BAG.add_cpds(TabularCPD(node, 2, create_AND_table([0]).T))

def evidences_to_string(evidences):
    return ''.join([f'_{k}' for k, v in evidences.items()])

    

In [35]:

# Path to the folder containing the tree
path = Path.cwd() / ("Personnal_simulations/output_" + simulation + "/strongly_connected_components/")
file_name = "ag-nocycles.dot"
path_to_dot = path / file_name
basename = "no_honey"

# We all read from the file, adding probabilities in the same time
BAG, edges, nodes = parse_dot(open(path_to_dot, 'r').read(), ONE)

# This is the reference BAG, before the attacker has compromised any node
BAG_ref = BAG.copy()
prop0 = BeliefPropagation(BAG_ref)

# We create a dictionary to get the node number from the label
inverted_nodes = {int(v['label'].split(':')[0]): k for k, v in nodes.items()}
kts_layer(BAG, ONE, nodes)

In [36]:
to_dot(BAG_ref, nodes, [], path, "reference", display_kts=False, calculate_probabilities=0)

BayesianNetwork with 14 nodes and 14 edges


In [37]:
evidences = [[], [9], [9, 7], [9, 7, 5], [9, 13, 5]]
for evidence in evidences:
    to_dot(BAG, nodes, evidence, path, basename, display_kts=False, calculate_probabilities=True)
print(BAG.get_cpds('10'))

BayesianNetwork with 14 nodes and 14 edges
BayesianNetwork with 14 nodes and 14 edges
BayesianNetwork with 14 nodes and 14 edges
BayesianNetwork with 14 nodes and 14 edges
BayesianNetwork with 14 nodes and 14 edges
+-------+-------+---------------------+
| 11    | 11(0) | 11(1)               |
+-------+-------+---------------------+
| 10(0) | 1.0   | 0.9519839999999999  |
+-------+-------+---------------------+
| 10(1) | 0.0   | 0.04801600000000001 |
+-------+-------+---------------------+


In [None]:
evidence = []
to_dot(BAG, nodes, evidence, path, basename, display_kts=False, calculate_probabilities=True)

BayesianNetwork with 39 nodes and 81 edges


In [None]:
evidences = [[9,7], [9], [9, 7, 19]]
for evidence in evidences:
    to_dot(BAG, nodes, evidence, path, basename, display_kts=False, calculate_probabilities=True)

BayesianNetwork with 39 nodes and 81 edges
BayesianNetwork with 39 nodes and 81 edges
BayesianNetwork with 39 nodes and 81 edges


In [None]:
evidences = [[9, 7, 19, 17], [9, 7, 19, 17, 15]]
for evidence in evidences:
    to_dot(BAG, nodes, evidence, path, basename, display_kts=False, calculate_probabilities=True)

BayesianNetwork with 39 nodes and 81 edges
BayesianNetwork with 39 nodes and 81 edges
