Import the librairies and modules:

In [9]:
import csv
import re
import numpy as np
from pathlib import Path
from BAG_Code_tw520.BayesianAttackGraph import parse_dot
from BAG_Code_tw520.createANDtable import create_AND_table
from BAG_Code_tw520.createORtable import create_OR_table
from BAG_Code_tw520.Tools_tree import tokenizer, Parser, build_tools_tree

from pgmpy.inference.ExactInference import BeliefPropagation
from pgmpy.factors.discrete import TabularCPD

To prepare the simulation use the following to put the graph in memory:

In [10]:
# Name of the simulation
simulation = "HightLevel"

# Constante for probability one
ONE = 0.999

In [11]:
def to_dot(BAG, nodes, evidences, display_kts=False, calculate_probabilities=False):
    dot = 'digraph G {\n'
    regex = r"\d+:RULE \d+ \((.*?)\):\d+\.\d+"
    print(BAG)
    nodes_wo_evidences = [n for n in nodes.keys() if n not in evidences.keys()]
    nodes_1 = nodes_wo_evidences[:len(nodes_wo_evidences)//3]
    nodes_2 = nodes_wo_evidences[len(nodes_wo_evidences)//3:(len(nodes_wo_evidences)*2)//3]
    nodes_3 = nodes_wo_evidences[(len(nodes_wo_evidences)*2)//3:]
    honeynodes = [str(n) for n in range(30,52)]
    if calculate_probabilities:
        prop = BeliefPropagation(BAG)
        total_prob1 = prop.query(nodes_1, evidence=evidences)
        total_prob2 = prop.query(nodes_2, evidence=evidences)
        total_prob3 = prop.query(nodes_3, evidence=evidences)
    def node_to_dot(node):
        if node not in evidences.keys() and calculate_probabilities:
            # prob = prop.query([node], evidence=evidences).values[1]
            if node in nodes_1:
                prob = total_prob1.marginalize([n for n in nodes_1 if n != node], inplace=False).values[1]
            elif node in nodes_2:
                prob = total_prob2.marginalize([n for n in nodes_2 if n != node], inplace=False).values[1]
            elif node in nodes_3:
                prob = total_prob3.marginalize([n for n in nodes_3 if n != node], inplace=False).values[1]
            print(node, prob)        
        else:
            prob = 1
        probH = int(prob * 255)
        color = '#' + format(probH, '02X') +''+ format(255-probH, '02X') + '00'
        CVE = nodes[node]['CVE']
        LABEL = nodes[node]['label']
        number = LABEL.split(":")[0]
        color = 'blue' if number in honeynodes else color
        prob = "{:.3f}".format(prob)
        shape = nodes[node]['shape']
        if CVE != 'null':
            return f'  {node} [label=\"{node} ({number})\\n{CVE}\\n{prob}\", color=\"{color}\", penwidth=3, shape=\"{shape}\"];\n'
        else:
            correspondance = re.search(regex, LABEL)
            if correspondance:
                return f'  {node} [label=\"{node} ({number})\\n{correspondance.group(1)}\\n{prob}\", color=\"{color}\", penwidth=3, shape=\"{shape}\"];\n'
            else:
                return f'  {node} [label="{node} ({number})\\n{LABEL}\\n{prob}", color="{color}", penwidth=3, shape="{shape}"];\n'
    for node in nodes.keys():
        dot += node_to_dot(node)
    for edge in BAG.edges():
        if display_kts or edge[0] in nodes.keys():
            dot += f'  \"{edge[0]}\" -> \"{edge[1]}\";\n'
    dot += '}'
    return dot

def kts_layer(BAG, ONE, nodes):    
    # The skills layer:
    cpd_Lskills = TabularCPD('Lskills', 2, [[0.1], [0.9]])
    cpd_Hskills = TabularCPD('Hskills', 2, [[0.9], [0.1]])
    # cpd_Lskills = TabularCPD('Lskills', 2, [[0.5], [0.5]])

    
    # The knowledge layer:
    cpd_knowledge = [TabularCPD(knowledge, 2, [[0.5], [0.5]]) for knowledge in ['Known vulnerabilities', 'CQCM', 'No credentials', 'MITM', 'Permissions move', 'Privilege escalation', 'Lateral move', 'ADCS']]

    # Import all the dependencies
    cpt_l4 = create_AND_table([ONE, ONE, ONE, ONE])
    with open('./Threat_Inteligence/CVE_knowledge_tooling_skills.csv', mode='r') as ktsfile:
        reader = csv.DictReader(ktsfile)
        kts_dict = {}
        for row in reader:
            cve = row['Vulnerability']
            tmp = {'tool': row['tool'], 'skills': row['skills'], 'Type': row['Type']}
            kts_dict[cve] = tmp
    for node in nodes.items():
            id = node[0]
            node = node[1]
            if node['CVE'] != "null":
                row = kts_dict[node['CVE']]
                build_tools_tree(BAG, Parser(tokenizer(row['tool'])).parse(), id, ONE)
                BAG.add_edge(row['skills'] + 'skills', id)
                build_tools_tree(BAG, Parser(tokenizer(row['Type'])).parse(), id, ONE)
                parents = BAG.get_parents(id)
                BAG.add_cpds(TabularCPD(id, 2, cpt_l4.T, parents, evidence_card=2*np.ones(len(parents))))
    # We add all the necessary CPDs to the BAG
    for cpd in [cpd_Lskills, cpd_Hskills] + cpd_knowledge:
        if BAG.__contains__(cpd.variable):
            BAG.add_cpds(cpd)

In [12]:

# Path to the folder containing the tree
path = Path.cwd() / ("Personnal_simulations/output_" + simulation + "/strongly_connected_components/")
file_name = "ag-nocycles.dot"
path_to_dot = path / file_name
output_file = path / (file_name[:-4] + "_colored.dot")

# We all read from the file, adding probabilities in the same time
BAG, edges, nodes = parse_dot(open(path_to_dot, 'r').read(), ONE)

# This is the reference BAG, before the attacker has compromised any node
BAG_ref = BAG.copy()
prop0 = BeliefPropagation(BAG_ref)

# We create a dictionary to get the node number from the label
inverted_nodes = {int(v['label'].split(':')[0]): k for k, v in nodes.items()}


In [13]:
# BAG.remove_edges_from([(12,11 ) , (11,10), (10,9), (9,8)])
# BAG.remove_nodes_from([11,10,9])
# nodes = {k: v for k, v in nodes.items() if k not in [11,10,9]}
# BAG.add_edges_from([(12,8)])
# BAG.add_cpds(TabularCPD(8, 2, create_AND_table([ONE]).T, [12], evidence_card=2*np.ones(1)))

In [14]:
kts_layer(BAG, ONE, nodes)



In [15]:
evidence = {}
with open(path / ("output_" + str(27) + ".dot"), 'w') as f:
    f.write(to_dot(BAG, nodes, evidence, display_kts=False, calculate_probabilities=False))
with open(path / ("BAG_ref" + ".dot"), 'w') as f:
    f.write(to_dot(BAG_ref, nodes, evidence))

BayesianNetwork with 44 nodes and 79 edges
BayesianNetwork with 28 nodes and 32 edges


## When the attacker become dcsync and we want to recalculate for domain admin
src_node = [7, 10]
dst_node = [ 14, 16, 12, 2]

In [16]:
evidence = {17:1, 27:1, 21:1}
with open(path / ("output_" + str(21) + ".dot"), 'w') as f:
    f.write(to_dot(BAG, nodes, evidence, display_kts=False, calculate_probabilities=True))

BayesianNetwork with 44 nodes and 79 edges
1 0.07802117249696613
2 0.08567427419017368
3 0.9881804791506632
4 0.9978672418161094
5 0.9995196184625778
6 0.9291904574326048
7 0.9312149323229669
8 0.9269948461222266
9 0.9293623931612245
10 0.931725699728357
11 0.999999430810387
12 0.07628497848064304
13 0.07872270428630333
14 0.07448745817975554
15 0.08422273174232942
16 0.08865829746235948
18 0.09546127602605328
19 0.9897851099870381
20 0.9945392766438585
22 0.07542820376339958
23 0.9289958541102785
24 0.056021892377660185
25 0.05909660266791672
26 0.07133484613074408
28 0.9894979809649975


In [17]:
tab = BeliefPropagation(BAG).query(['Hskills'], evidence=evidence)
print(tab)

+------------+----------------+
| Hskills    |   phi(Hskills) |
| Hskills(0) |         0.8375 |
+------------+----------------+
| Hskills(1) |         0.1625 |
+------------+----------------+
