Import the librairies and modules:

In [1]:
import csv
import re
import numpy as np
from pathlib import Path
from BAG_Code_tw520.BayesianAttackGraph import parse_dot
from BAG_Code_tw520.createANDtable import create_AND_table
from BAG_Code_tw520.createORtable import create_OR_table
from BAG_Code_tw520.Tools_tree import tokenizer, Parser, build_tools_tree
import BAG_Code_tw520.Loopy as Loopy

from pgmpy.inference.ExactInference import BeliefPropagation
from pgmpy.factors.discrete import TabularCPD

Tokens: [('VAR', 'Responder'), ('AND', '&'), ('LPAREN', '('), ('VAR', 'impacket'), ('OR', '|'), ('VAR', 'Metasploit'), ('RPAREN', ')')]
AST: Responder&impacket|Metasploit
&
Responder
|
impacket
Metasploit
Compiled Expression: (Responder & (impacket | Metasploit))


To prepare the simulation use the following to put the graph in memory:

In [2]:
# Name of the simulation
simulation = "HightLevel"

# Constante for probability one
ONE = 0.999

In [3]:
def to_dot(BAG, nodes, evidences, display_kts=False, calculate_probabilities=False):
    dot = 'digraph G {\n'
    regex = r"\d+:RULE \d+ \((.*?)\):\d+\.\d+"
    print(BAG)
    # Exact inference
    if calculate_probabilities ==1:
        nodes_wo_evidences = [n for n in nodes.keys() if n not in evidences.keys()]
        nodes_1 = nodes_wo_evidences[:len(nodes_wo_evidences)//3]
        nodes_2 = nodes_wo_evidences[len(nodes_wo_evidences)//3:(len(nodes_wo_evidences)*2)//3]
        nodes_3 = nodes_wo_evidences[(len(nodes_wo_evidences)*2)//3:]
        prop = BeliefPropagation(BAG)
        total_prob1 = prop.query(nodes_1, evidence=evidences)
        total_prob2 = prop.query(nodes_2, evidence=evidences)
        total_prob3 = prop.query(nodes_3, evidence=evidences)
    # Loopy propagation
    if calculate_probabilities == 2:
        marginals = Loopy.RunLBP(Loopy.CreateFactorGraph(Loopy.ToMarkov(BAG)), evidences)
        marginals = list(marginals.values())[0]
    # Return to general case
    honeynodes = [str(n) for n in range(26,52)]

    def node_to_dot(node):
        if node not in evidences.keys() and calculate_probabilities == 1:
            # prob = prop.query([node], evidence=evidences).values[1]
            if node in nodes_1:
                prob = total_prob1.marginalize([n for n in nodes_1 if n != node], inplace=False).values[1]
            elif node in nodes_2:
                prob = total_prob2.marginalize([n for n in nodes_2 if n != node], inplace=False).values[1]
            elif node in nodes_3:
                prob = total_prob3.marginalize([n for n in nodes_3 if n != node], inplace=False).values[1]
        elif node not in evidences.keys() and calculate_probabilities == 2:
            prob = marginals[node][1]
        else:
            prob = 1
        probH = int(prob * 255)
        color = '#' + format(probH, '02X') +''+ format(255-probH, '02X') + '00'
        CVE = nodes[node]['CVE']
        LABEL = nodes[node]['label']
        number = LABEL.split(":")[0]
        color = 'blue' if number in honeynodes else color
        prob = "{:.3f}".format(prob)
        shape = nodes[node]['shape']
        if CVE != 'null':
            return f'  {node} [label=\"{node} ({number})\\n{CVE}\\n{prob}\", color=\"{color}\", penwidth=3, shape=\"{shape}\"];\n'
        else:
            correspondance = re.search(regex, LABEL)
            if correspondance:
                return f'  {node} [label=\"{node} ({number})\\n{correspondance.group(1)}\\n{prob}\", color=\"{color}\", penwidth=3, shape=\"{shape}\"];\n'
            else:
                return f'  {node} [label="{node} ({number})\\n{LABEL}\\n{prob}", color="{color}", penwidth=3, shape="{shape}"];\n'
            
    for node in nodes.keys():
        dot += node_to_dot(node)
    for edge in BAG.edges():
        if display_kts or edge[0] in nodes.keys():
            dot += f'  \"{edge[0]}\" -> \"{edge[1]}\";\n'
    dot += '}'
    return dot

def kts_layer(BAG, ONE, nodes):    
    # The skills layer:
    cpd_Lskills = TabularCPD('Lskills', 2, [[0.1], [0.9]])
    cpd_Hskills = TabularCPD('Hskills', 2, [[0.9], [0.1]])
    # cpd_Lskills = TabularCPD('Lskills', 2, [[0.5], [0.5]])

    
    # The knowledge layer:
    cpd_knowledge = [TabularCPD(knowledge, 2, [[0.5], [0.5]]) for knowledge in ['Known vulnerabilities', 'CQCM', 'No credentials', 'MITM', 'Permissions move', 'Privilege escalation', 'Lateral move', 'ADCS']]

    # Import all the dependencies
    cpt_l4 = create_AND_table([ONE, ONE, ONE, ONE])
    with open('./Threat_Inteligence/CVE_knowledge_tooling_skills.csv', mode='r') as ktsfile:
        reader = csv.DictReader(ktsfile)
        kts_dict = {}
        for row in reader:
            cve = row['Vulnerability']
            tmp = {'tool': row['tool'], 'skills': row['skills'], 'Type': row['Type']}
            kts_dict[cve] = tmp
    for node in nodes.items():
            id = node[0]
            node = node[1]
            if node['CVE'] != "null":
                row = kts_dict[node['CVE']]
                build_tools_tree(BAG, Parser(tokenizer(row['tool'])).parse(), id, ONE)
                BAG.add_edge(row['skills'] + 'skills', id)
                build_tools_tree(BAG, Parser(tokenizer(row['Type'])).parse(), id, ONE)
                parents = BAG.get_parents(id)
                BAG.add_cpds(TabularCPD(id, 2, cpt_l4.T, parents, evidence_card=2*np.ones(len(parents))))
    # We add all the necessary CPDs to the BAG
    for cpd in [cpd_Lskills, cpd_Hskills] + cpd_knowledge:
        if BAG.__contains__(cpd.variable):
            BAG.add_cpds(cpd)

def rmv_node(BAG, node, edges):
    edge = [(u,node) for u in BAG.get_parents(node)]
    BAG.remove_edges_from(edge)
    BAG.add_cpds(TabularCPD(node, 2, create_AND_table([0]).T))

    

In [4]:

# Path to the folder containing the tree
path = Path.cwd() / ("Personnal_simulations/output_" + simulation + "/strongly_connected_components/")
file_name = "ag-nocycles.dot"
path_to_dot = path / file_name
basename = "loopy_"

# We all read from the file, adding probabilities in the same time
BAG, edges, nodes = parse_dot(open(path_to_dot, 'r').read(), ONE)

# This is the reference BAG, before the attacker has compromised any node
BAG_ref = BAG.copy()
prop0 = BeliefPropagation(BAG_ref)

# We create a dictionary to get the node number from the label
inverted_nodes = {int(v['label'].split(':')[0]): k for k, v in nodes.items()}


In [5]:
# BAG.remove_edges_from([(12,11 ) , (11,10), (10,9), (9,8)])
# BAG.remove_nodes_from([11,10,9])
# nodes = {k: v for k, v in nodes.items() if k not in [11,10,9]}
# BAG.add_edges_from([(12,8)])
# BAG.add_cpds(TabularCPD(8, 2, create_AND_table([ONE]).T, [12], evidence_card=2*np.ones(1)))

In [6]:
kts_layer(BAG, ONE, nodes)



In [7]:
# rmv_node(BAG, 22, edges)
# BAG.get_parents(22)

In [12]:
evidence = {"15":1, "24":1}
with open(path / (basename + "15_24" ".dot"), 'w') as f:
    f.write(to_dot(BAG, nodes, evidence, display_kts=True, calculate_probabilities=2))
with open(path / ("BAG_ref" + ".dot"), 'w') as f:
    f.write(to_dot(BAG_ref, nodes, evidence))

BayesianNetwork with 45 nodes and 85 edges
marginals {VarDict(num_states=array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2], dtype=int64), _hash=3099039872000000000, variable_names=('11', '10', '16', 'Hashcat|John', 'Lskills', 'CrackingHash', 'Responder&Impacket', 'Hskills', 'LateralMove', '9', '8', 'Cme', '7', '6', '23', '5', '12', '4', '27', 'Rubeus', 'Kerberos', '15', '14', '26', 'Lsassy|Impacket', '13', '22', 'Impacket', 'PrivilegeEscalation', '3', '2', '28', 'Impacket|Rubeus', '1', '21', '24', '20', '25', '19', '18', '17', 'Lsassy', 'Responder', 'Hashcat', 'John')): {'1': Array([0.9574864 , 0.04251368], dtype=float32), '10': Array([0.69155765, 0.30844238], dtype=float32), '11': Array([1.4330055e-05, 9.9998569e-01], dtype=float32), '12': Array([0.966795  , 0.03320495], dtype=float32), '13': Array([0.96537083, 0.03462919], dtype=float32), '14': Array([0.99099594, 0.00900407], dty

In [None]:
evidence = {15:1}
with open(path / ("output_" + str(15) + ".dot"), 'w') as f:
    f.write(to_dot(BAG, nodes, evidence, display_kts=False, calculate_probabilities=True))

BayesianNetwork with 45 nodes and 81 edges


In [11]:
evidence = {15:1, 24:1}
with open(path / ("output_" + "15_24" + ".dot"), 'w') as f:
    f.write(to_dot(BAG, nodes, evidence, display_kts=False, calculate_probabilities=1))

BayesianNetwork with 45 nodes and 85 edges


ValueError: Node 15 not in not in graph

In [None]:
evidence = {15:1, 5:1}
with open(path / ("output_" + "15_5" + ".dot"), 'w') as f:
    f.write(to_dot(BAG, nodes, evidence, display_kts=False, calculate_probabilities=True))

BayesianNetwork with 45 nodes and 85 edges


## When the attacker become dcsync and we want to recalculate for domain admin
src_node = [7, 10]
dst_node = [ 14, 16, 12, 2]

In [None]:
tab = BeliefPropagation(BAG).query(['Hskills'], evidence=evidence)
print(tab)

+------------+----------------+
| Hskills    |   phi(Hskills) |
| Hskills(0) |         0.0009 |
+------------+----------------+
| Hskills(1) |         0.9991 |
+------------+----------------+
