# Set working directory

In [1]:
import os
cwd = os.path.split(os.getcwd())
if cwd[-1] == 'tutorials':
    os.chdir('..')

assert os.path.split(os.getcwd())[-1] == 'BRON'

# Import modules

In [2]:
import pandas as pd
import csv
import json
import statistics
import time
from memory_profiler import memory_usage
from typing import Tuple, Set, List, Dict
from path_search.path_search_BRON import main_attack
from meta_analysis.find_riskiest_software import load_graph_network, riskiest_software

# BRON-JSON

BRON-JSON is the JSON-based implementation of BRON. Run the next code cell to build BRON-JSON.

In [3]:
from download_threat_information.download_threat_data import _download_attack, _download_capec, _download_cwe, _download_cve, main
from download_threat_information.parsing_scripts.parse_attack_tactic_technique import link_tactic_techniques
from download_threat_information.parsing_scripts.parse_cve import parse_cve_file
from download_threat_information.parsing_scripts.parse_capec_cwe import parse_capec_cwe_files
from BRON.build_BRON import build_graph, BRON_PATH

# Download threat information
out_path = 'download_threat_information'
cve_years = ['2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011',
             '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020']
main(cve_years)

# Parse threat data
filename = os.path.join(out_path, 'raw_enterprise_attack.json')
link_tactic_techniques(filename, out_path)

cve_path = os.path.join(out_path, 'raw_CVE.json.gz')
save_path_file = "cve_map_cpe_cwe_score.json"
save_file = os.path.join(out_path, save_path_file)
parse_cve_file(cve_path, save_file)

capec_file = os.path.join(out_path, 'raw_CAPEC.json')
cwe_file = os.path.join(out_path, 'raw_CWE.zip')
parse_capec_cwe_files(capec_file, cwe_file, save_path=out_path)

# Build BRON
BRON_folder_path = 'full_data/full_output_data'
os.makedirs(BRON_folder_path, exist_ok=True)
input_data_folder = 'download_threat_information'
BRON_original_id_to_bron_id_path = os.path.join(BRON_folder_path, BRON_PATH)
os.makedirs(BRON_original_id_to_bron_id_path, exist_ok=True)
build_graph(BRON_folder_path, input_data_folder)

# BRON-Graph-DB

BRON-Graph-DB stores BRON in ArangoDB. Run the following code cell to connect to BRON-Graph-DB.

In [4]:
import arango

SERVER_IP = 'bron.alfa.csail.mit.edu'
USERNAME = 'guest'
PASSWORD = 'guest'
DB = 'BRON'
client = arango.ArangoClient(hosts=f"http://{SERVER_IP}:8529")
db = client.db(DB, username=USERNAME, password=PASSWORD, auth_method="basic")

# Path search queries

Two queries require searching graph paths. For them, the input is a CSV file of node IDs and the output is a CSV file with the IDs of nodes connected to each of the input nodes along an edge in BRON.

The first query finds the threats connected to the top 10 CVEs which involves 390 nodes. The second query finds the threats and vulnerabilities connected to the top 25 CWEs which involves 322K nodes.

# Query: Threats connected to top 10 CVEs

In [5]:
top_10_cves_starting_file = 'tutorials/top_10_cves_starting_point.csv'
top_10_cves_results_file = 'tutorials/top_10_cves_search_results.csv'

## BRON-JSON

In [6]:
top_10_cves_times_BRON_JSON = []
for i in range(30):
    start_time = time.time()
    main_attack(BRON_folder_path, top_10_cves_starting_file, top_10_cves_results_file, 'cve', length=False)
    top_10_cves_times_BRON_JSON.append(time.time() - start_time)

print("Min: ", min(top_10_cves_times_BRON_JSON))
print("Max: ", max(top_10_cves_times_BRON_JSON))
print("Mean: ", statistics.mean(top_10_cves_times_BRON_JSON))
print("SD: ", statistics.stdev(top_10_cves_times_BRON_JSON))

Min:  13.152904987335205
Max:  18.00979495048523
Mean:  15.323847134908041
SD:  1.7126983611103115


In [7]:
def top_10_cves_path_search_BRON_JSON():
    main_attack(BRON_folder_path, top_10_cves_starting_file, top_10_cves_results_file, 'cve', length=False)

top_10_cves_mem_usages_BRON_JSON = []
for i in range(30):
    mem_usage = memory_usage(top_10_cves_path_search_BRON_JSON)
    top_10_cves_mem_usages_BRON_JSON.append(max(mem_usage))

print("Min: ", min(top_10_cves_mem_usages_BRON_JSON))
print("Max: ", max(top_10_cves_mem_usages_BRON_JSON))
print("Mean: ", statistics.mean(top_10_cves_mem_usages_BRON_JSON))
print("SD: ", statistics.stdev(top_10_cves_mem_usages_BRON_JSON))

Min:  3029.9453125
Max:  3196.4609375
Mean:  3109.1575520833335
SD:  72.61742354161925


## BRON-Graph-DB

In [8]:
query_template_bron_id = """
FOR c IN {}
    FILTER c.original_id == "{}"
    RETURN c._key
"""

query_template_connections = """
WITH tactic, technique, capec, cwe, cve, cpe
FOR vertex
    IN 1..5
    {} "{}"
    GRAPH "BRONGraph"
    OPTIONS {{ uniqueVertices: 'global', bfs: true }}
    RETURN DISTINCT vertex._key
"""

def execute_query(query: str) -> Set[str]:
    assert db.aql.validate(query)
    cursor = db.aql.execute(query)
    results = {_ for _ in cursor}
    return results

def convert_original_to_bron_id(data_type: str, original_ids: Tuple[str, ...]) -> Tuple[str, ...]:
    bron_ids_list = []
    for original_id in original_ids:
        query_bron_id = query_template_bron_id.format(data_type, original_id)
        results_bron_id = execute_query(query_bron_id)
        bron_ids_list.append(results_bron_id.pop())
    return tuple(bron_ids_list)

def save_search_results_csv(connections_list: List[Dict[str, Set[str]]], results_file: str):
    csv_columns = ['tactic', 'technique', 'capec', 'cwe', 'cve', 'cpe']
    with open(results_file, 'w') as f:
        writer = csv.DictWriter(f, fieldnames=csv_columns)
        writer.writeheader()
        for data in connections_list:
            writer.writerow(data)

def path_search_BRON_Graph_DB(data_type: str, starting_file: str, results_file: str, length: bool=False):
    with open(starting_file) as f:
        original_ids_list = [tuple(line) for line in csv.reader(f)]
    original_ids = original_ids_list[0]
    bron_ids = convert_original_to_bron_id(data_type, original_ids)
    
    directions = ('INBOUND', 'OUTBOUND')
    connections_list = [] # List of dictionaries for each ID
    for bron_id in bron_ids:
        connections = {'tactic': set(), 'technique': set(), 'capec': set(), 'cwe': set(), 'cve': set(), 'cpe': set()}
        connections[data_type].add(bron_id) # Add known connection of itself
        full_bron_id = f'{data_type}/{bron_id}'
        
        for direction in directions:
            query_connections = query_template_connections.format(direction, full_bron_id)
            results_connections = execute_query(query_connections)
            for result in results_connections:
                result_split = result.split('_')
                connections[result_split[0]].add(result)

        if length: # Store number of data types instead of IDs
            connections_count = dict()
            for data_type_key, entries in connections.items():
                connections_count[data_type_key] = len(entries)
            connections_list.append(connections_count)
        else:
            connections_list.append(connections)
    
    save_search_results_csv(connections_list, results_file)

In [9]:
top_10_cves_times_BRON_Graph_DB = []
for i in range(30):
    start_time = time.time()
    path_search_BRON_Graph_DB('cve', top_10_cves_starting_file, top_10_cves_results_file)
    top_10_cves_times_BRON_Graph_DB.append(time.time() - start_time)

print("Min: ", min(top_10_cves_times_BRON_Graph_DB))
print("Max: ", max(top_10_cves_times_BRON_Graph_DB))
print("Mean: ", statistics.mean(top_10_cves_times_BRON_Graph_DB))
print("SD: ", statistics.stdev(top_10_cves_times_BRON_Graph_DB))

Min:  4.139862060546875
Max:  6.321671962738037
Mean:  4.903802156448364
SD:  0.7657081047300861


In [11]:
def top_10_cves_path_search_BRON_Graph_DB():
    path_search_BRON_Graph_DB('cve', top_10_cves_starting_file, top_10_cves_results_file)
    
top_10_cves_mem_usages_BRON_Graph_DB = []
for i in range(30):
    mem_usage = memory_usage(top_10_cves_path_search_BRON_Graph_DB)
    top_10_cves_mem_usages_BRON_Graph_DB.append(max(mem_usage))

print("Min: ", min(top_10_cves_mem_usages_BRON_Graph_DB))
print("Max: ", max(top_10_cves_mem_usages_BRON_Graph_DB))
print("Mean: ", statistics.mean(top_10_cves_mem_usages_BRON_Graph_DB))
print("SD: ", statistics.stdev(top_10_cves_mem_usages_BRON_Graph_DB))

Min:  325.3359375
Max:  325.57421875
Mean:  325.3795572916667
SD:  0.09544102076156989


# Query: Threats and vulnerabilities connected to top 25 CWEs

In [12]:
top_25_cwes_starting_file = 'tutorials/top_25_cwes_starting_point.csv'
top_25_cwes_results_file = 'tutorials/top_25_cwes_search_results.csv'

## BRON-JSON

In [13]:
top_25_cwes_times_BRON_JSON = []
for i in range(30):
    start_time = time.time()
    main_attack(BRON_folder_path, top_25_cwes_starting_file, top_25_cwes_results_file, 'cwe', length=False)
    top_25_cwes_times_BRON_JSON.append(time.time() - start_time)

print("Min: ", min(top_25_cwes_times_BRON_JSON))
print("Max: ", max(top_25_cwes_times_BRON_JSON))
print("Mean: ", statistics.mean(top_25_cwes_times_BRON_JSON))
print("SD: ", statistics.stdev(top_25_cwes_times_BRON_JSON))

Min:  13.084337949752808
Max:  14.7556631565094
Mean:  13.598588188489279
SD:  0.5982005094070681


In [14]:
def top_25_cwes_path_search_BRON_JSON():
    main_attack(BRON_folder_path, top_25_cwes_starting_file, top_25_cwes_results_file, 'cwe', length=False)

top_25_cwes_mem_usages_BRON_JSON = []
for i in range(30):
    mem_usage = memory_usage(top_25_cwes_path_search_BRON_JSON)
    top_25_cwes_mem_usages_BRON_JSON.append(max(mem_usage))

print("Min: ", min(top_25_cwes_mem_usages_BRON_JSON))
print("Max: ", max(top_25_cwes_mem_usages_BRON_JSON))
print("Mean: ", statistics.mean(top_25_cwes_mem_usages_BRON_JSON))
print("SD: ", statistics.stdev(top_25_cwes_mem_usages_BRON_JSON))

Min:  2905.5390625
Max:  3054.42578125
Mean:  3010.9264322916665
SD:  61.050130059064394


## BRON-Graph-DB

In [15]:
top_25_cwes_times_BRON_Graph_DB = []
for i in range(30):
    start_time = time.time()
    path_search_BRON_Graph_DB('cwe', top_25_cwes_starting_file, top_25_cwes_results_file)
    top_25_cwes_times_BRON_Graph_DB.append(time.time() - start_time)

print("Min: ", min(top_25_cwes_times_BRON_Graph_DB))
print("Max: ", max(top_25_cwes_times_BRON_Graph_DB))
print("Mean: ", statistics.mean(top_25_cwes_times_BRON_Graph_DB))
print("SD: ", statistics.stdev(top_25_cwes_times_BRON_Graph_DB))

Min:  27.932520866394043
Max:  36.45831918716431
Mean:  33.20283730824789
SD:  3.1230440321029898


In [16]:
def top_25_cwes_path_search_BRON_Graph_DB():
    path_search_BRON_Graph_DB('cwe', top_25_cwes_starting_file, top_25_cwes_results_file)

top_25_cwes_mem_usages_BRON_Graph_DB = []
for i in range(30):
    mem_usage = memory_usage(top_25_cwes_path_search_BRON_Graph_DB)
    top_25_cwes_mem_usages_BRON_Graph_DB.append(max(mem_usage))

print("Min: ", min(top_25_cwes_mem_usages_BRON_Graph_DB))
print("Max: ", max(top_25_cwes_mem_usages_BRON_Graph_DB))
print("Mean: ", statistics.mean(top_25_cwes_mem_usages_BRON_Graph_DB))
print("SD: ", statistics.stdev(top_25_cwes_mem_usages_BRON_Graph_DB))

Min:  351.984375
Max:  354.0625
Mean:  353.3326822916667
SD:  0.8012457367321904


# Query: Riskiest software

This query outputs the Affected Product Configuration with the highest sum of CVSS scores for connected Vulnerabilities, which involves 2,453K nodes.

## BRON-JSON

In [26]:
riskiest_software_times_BRON_JSON = []
for i in range(30):
    start_time = time.time()
    graph = load_graph_network(f'{BRON_folder_path}/BRON.json')
    riskiest_software(graph)
    riskiest_software_times_BRON_JSON.append(time.time() - start_time)

print("Min: ", min(riskiest_software_times_BRON_JSON))
print("Max: ", max(riskiest_software_times_BRON_JSON))
print("Mean: ", statistics.mean(riskiest_software_times_BRON_JSON))
print("SD: ", statistics.stdev(riskiest_software_times_BRON_JSON))

Min:  19.32332491874695
Max:  41.42757821083069
Mean:  25.214418013890583
SD:  8.203344986935523


In [27]:
def riskiest_software_BRON_JSON():
    graph = load_graph_network(f'{BRON_folder_path}/BRON.json')
    riskiest_software(graph)

riskiest_software_mem_usages_BRON_JSON = []
for i in range(30):
    max_mem_usage = max(memory_usage(riskiest_software_BRON_JSON))
    riskiest_software_mem_usages_BRON_JSON.append(max_mem_usage)

print("Min: ", min(riskiest_software_mem_usages_BRON_JSON))
print("Max: ", max(riskiest_software_mem_usages_BRON_JSON))
print("Mean: ", statistics.mean(riskiest_software_mem_usages_BRON_JSON))
print("SD: ", statistics.stdev(riskiest_software_mem_usages_BRON_JSON))

Min:  3558.15234375
Max:  4172.609375
Mean:  3738.2493489583335
SD:  228.8380976618797


## BRON-Graph-DB

In [28]:
query_riskiest_software = """
WITH cve, cpe
FOR c in cpe
    LET cvss_scores = (
        FOR vertex
            IN 1..1
            INBOUND c._id
            CveCpe
            OPTIONS { uniqueVertices: 'global', bfs: true }
            RETURN vertex.metadata.weight
    )
    RETURN { cpe_node: c.original_id, cvss_score: SUM(cvss_scores) }
"""

def execute_query(query: str) -> Set[str]:
    assert db.aql.validate(query)
    cursor = db.aql.execute(query)
    results = [_ for _ in cursor]
    return results

def riskiest_software_BRON_Graph_DB():
    results_riskiest_software = execute_query(query_riskiest_software)
    highest_software = set()
    highest_score = -1
    for cpe_cvss_dict in results_riskiest_software:
        cpe_node = cpe_cvss_dict['cpe_node']
        cvss_score = cpe_cvss_dict['cvss_score']
        if cvss_score > highest_score:
            highest_software = {cpe_node}
            highest_score = cvss_score
        elif cvss_score == highest_score:
            highest_software.add(cpe_node)
    return highest_software, highest_score

In [29]:
riskiest_software_times_BRON_Graph_DB = []
for i in range(30):
    start_time = time.time()
    riskiest_software_BRON_Graph_DB()
    riskiest_software_times_BRON_Graph_DB.append(time.time() - start_time)

print("Min: ", min(riskiest_software_times_BRON_Graph_DB))
print("Max: ", max(riskiest_software_times_BRON_Graph_DB))
print("Mean: ", statistics.mean(riskiest_software_times_BRON_Graph_DB))
print("SD: ", statistics.stdev(riskiest_software_times_BRON_Graph_DB))

Min:  24.950159072875977
Max:  35.09775495529175
Mean:  27.96189483006795
SD:  3.613808536195016


In [30]:
riskiest_software_mem_usages_BRON_Graph_DB = []
for i in range(30):
    max_mem_usage = max(memory_usage(riskiest_software_BRON_Graph_DB))
    riskiest_software_mem_usages_BRON_Graph_DB.append(max_mem_usage)

print("Min: ", min(riskiest_software_mem_usages_BRON_Graph_DB))
print("Max: ", max(riskiest_software_mem_usages_BRON_Graph_DB))
print("Mean: ", statistics.mean(riskiest_software_mem_usages_BRON_Graph_DB))
print("SD: ", statistics.stdev(riskiest_software_mem_usages_BRON_Graph_DB))

Min:  1201.890625
Max:  1292.265625
Mean:  1217.1516927083333
SD:  36.799029712048245
