TMTRead.py

## the script converts MS TMT models to Threat Dragon models
## goal: parse TMT XML model into a dict that can be dumped into THreat Dragon's json format
## https://github.com/jgadsden/owasp-threat-dragon-models/tree/master/ThreatDragonModels

import os
import tkinter as tk
import xml.etree.ElementTree as ET
from tkinter import filedialog

import CairisWrite

# namespace for prop elements
ele_namespace = {'b': 'http://schemas.datacontract.org/2004/07/ThreatModeling.KnowledgeBase'}
any_namespace = {'a': 'http://schemas.microsoft.com/2003/10/Serialization/Arrays'}


# get guid src/target and vertices
def get_flow_points(_cell, ele):
    for src_guid in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}SourceGuid'):
        _src_guid = src_guid.text
    for tar_guid in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}TargetGuid'):
        _tar_guid = tar_guid.text
    for vert_x in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}HandleX'):
        _vert_x = int(vert_x.text)
    for vert_y in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}HandleY'):
        _vert_y = int(vert_y.text)
    _cell['source'] = dict.fromkeys(['id'])
    _cell['target'] = dict.fromkeys(['id'])
    _cell['source']['id'] = _src_guid
    _cell['target']['id'] = _tar_guid
    _cell['vertices'].append(dict.fromkeys(['x', 'y']))
    _cell['vertices'][0]['x'] = _vert_x
    _cell['vertices'][0]['y'] = _vert_y
    return


# get points for Boundary (BoundaryLines only in MS TMT)
def get_boundary_points(_cell, ele):
    for src_x in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}SourceX'):
        _src_x = int(src_x.text)
    for src_y in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}SourceY'):
        _src_y = int(src_y.text)
    for tar_x in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}TargetX'):
        _tar_x = int(tar_x.text)
    for tar_y in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}TargetY'):
        _tar_y = int(tar_y.text)
    # single verticy in MS TMT lines
    for vert_x in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}HandleX'):
        _vert_x = int(vert_x.text)
    for vert_y in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}HandleY'):
        _vert_y = int(vert_y.text)
    _cell['source'] = dict.fromkeys(['x', 'y'])
    _cell['target'] = dict.fromkeys(['x', 'y'])
    _cell['source']['x'] = _src_x
    _cell['source']['y'] = _src_y
    _cell['target']['x'] = _tar_x
    _cell['target']['y'] = _tar_y
    _cell['vertices'].append(dict.fromkeys(['x', 'y']))
    _cell['vertices'][0]['x'] = _vert_x
    _cell['vertices'][0]['y'] = _vert_y
    return


# Threat Dragon does not support boundary boxes; only lines. Hack to make boxes import
# creates a box from 5 points: source and target are same point and vertices make up the 3
# other points of the rectangle
def calc_boundary_box(cell, ele):
    cell['vertices'].append(dict.fromkeys(['x', 'y']))
    cell['vertices'].append(dict.fromkeys(['x', 'y']))
    cell['vertices'].append(dict.fromkeys(['x', 'y']))
    cell['source'] = dict.fromkeys(['x', 'y'])
    cell['target'] = dict.fromkeys(['x', 'y'])
    for y in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}Height'):
        _height = int(y.text)
    for w in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}Width'):
        _width = int(w.text)
    for x in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}Left'):
        _left = int(x.text)
    for top in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}Top'):
        _top = int(top.text)
    # source and target are same
    cell['source']['x'] = _left
    cell['source']['y'] = _top
    cell['target']['x'] = _left
    cell['target']['y'] = _top
    # vertices make up the 3 other points of the rectangle
    cell['vertices'][0]['x'] = _left + _width
    cell['vertices'][0]['y'] = _top
    cell['vertices'][1]['x'] = _left + _width
    cell['vertices'][1]['y'] = _top + _height
    cell['vertices'][2]['x'] = _left
    cell['vertices'][2]['y'] = _top + _height
    return cell


def get_ele_size(cell, ele):
    for y in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}Height'):
        cell['size']['height'] = int(y.text)
    for width in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}Width'):
        cell['size']['width'] = int(width.text)
    for x in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}Left'):
        cell['position']['x'] = int(x.text)
    for top in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}Top'):
        cell['position']['y'] = int(top.text)
    return cell


# this function finded values from prop name
def get_ele_prop(ele, prop_name):
    # element properties are at this level
    for props in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}Properties'):
        for types in props.findall('.//a:anyType', any_namespace):
            # get all child elements of anyType element, all properties located here
            for dis_name in types.findall('.//b:DisplayName', ele_namespace):
                temp_prop = dis_name.text
                # TODO: handle getting values from SelectedIndex if index avilable
                # selection = types.find('.//b:SelectedIndex', ele_namespace)
                if temp_prop == prop_name:
                    for val in types.findall('.//b:Value', ele_namespace):
                        value = val.text
                    return value
    return None


def set_cell_attribs(cell, _name):
    scope = ""
    threats = ""
    if len(cell['threats']) == 0:
        del cell['threats']
        cell['hasOpenThreats'] = False
    if (cell['hasOpenThreats']):
        threats = str('hasNoOpenThreats')
    else:
        threats = str('hasNoOpenThreats')

    if (cell['outOfScope']):
        scope = str('isOutOfScope')
    else:
        scope = str('isInScope')
    if cell['type'] == "tm.Flow":
        cell['attrs'] = dict.fromkeys(['.marker-target', '.connection'])
        # check and set both hasNoOpenThreats isInScope vars based on MS TMT
        cell['attrs']['.marker-target'] = dict.fromkeys(['class'])
        # build sentance
        cell['attrs']['.marker-target']['class'] = "marker-target " + threats + " isInScope"
        cell['attrs']['.connection'] = dict.fromkeys(['class'])
        cell['attrs']['.connection']['class'] = "connection " + threats + " " + scope
    # everything that's not a flow
    else:
        cell['attrs'] = dict.fromkeys(['.element-shape', 'text', '.element-text'])
        cell['attrs']['.element-shape'] = dict.fromkeys(['class'])
        cell['attrs']['.element-shape']['class'] = "element-shape " + threats + " " + scope
        cell['attrs']['.element-text'] = dict.fromkeys(['class'])
        cell['attrs']['.element-text']['class'] = "element-text " + threats + " isInScope"
        cell['attrs']['text'] = dict.fromkeys(['text'])
        cell['attrs']['text']['text'] = _name
    return cell


# find type, source, target, and vertices
def find_ele_type(tmt_type, ele, _name):
    tmt_type = tmt_type['{http://www.w3.org/2001/XMLSchema-instance}type']
    if tmt_type == "Connector" or tmt_type == "LineBoundary" or tmt_type == "BorderBoundary":
        # flows have source and target, so choose different dict format
        cell = dict.fromkeys(
            ['name', 'type', 'size', 'smooth', 'source', 'target', 'vertices', 'id', 'z', 'hasOpenThreats', 'threats',
             'attrs'])
        cell['name'] = get_ele_prop(ele, 'Name')
        cell['vertices'] = list()
        if tmt_type == "Connector":
            cell['labels'] = list()
            cell['labels'].append(dict.fromkeys(['position', 'attrs']))
            cell['labels'][0]['position'] = 0.5
            cell['labels'][0]['attrs'] = dict.fromkeys(['text'])
            cell['labels'][0]['attrs']['text'] = dict.fromkeys(['text', 'font-weight', 'font-size'])
            cell['labels'][0]['attrs']['text']['text'] = _name
            cell['labels'][0]['attrs']['text']['font-weight'] = str(400)
            cell['labels'][0]['attrs']['text']['font-size'] = 'small'
            ele_type = "tm.Flow"
        elif tmt_type == "LineBoundary" or tmt_type == "BorderBoundary":
            ele_type = "tm.Boundary"
            if tmt_type == "BorderBoundary":
                cell = calc_boundary_box(cell, ele)
            cell['attrs'] = dict()
        else:
            return None
        #  get cords from MS TMT "lines" since boundaries and lines are different in MS TMT
        if tmt_type == "LineBoundary":
            get_boundary_points(cell, ele)
        elif tmt_type == "Connector":
            get_flow_points(cell, ele)
        cell['smooth'] = True
        cell['size'] = dict.fromkeys(['width', 'height'])
        # defaults size for boundary or flows
        cell['size']['width'] = int(10)
        cell['size']['height'] = int(10)

    # must be a process, datastore, or EI
    else:
        cell = dict.fromkeys(
            ['name', 'type', 'size', 'position', 'angle', 'id', 'z', 'hasOpenThreats', 'threats', 'attrs'])
        cell['name'] = get_ele_prop(ele, 'Name')
        cell['size'] = dict.fromkeys(['width', 'height'])
        cell['position'] = dict.fromkeys(['x', 'y'])
        cell['angle'] = int(0)
        if tmt_type == "StencilRectangle":
            ele_type = "tm.Actor"
        elif tmt_type == "StencilEllipse":
            ele_type = "tm.Process"
        elif tmt_type == "StencilParallelLines":
            ele_type = "tm.Store"
        else:
            return None
        cell = get_ele_size(cell, ele)
    cell['threats'] = list()
    cell['type'] = ele_type
    return cell


# get and element (or cell)
def get_element(ele, _z, _root, _m_guid):
    # GUID also at this level
    for ele4 in ele.findall('{http://schemas.microsoft.com/2003/10/Serialization/Arrays}Value'):
        # find element type and get cell dict format
        name = get_ele_prop(ele4, 'Name')
        cell = find_ele_type(ele4.attrib, ele4, name)

        if (get_ele_prop(ele4, 'Out Of Scope') == 'true'):
            cell['outOfScope'] = True
            cell['reasonOutOfScope'] = get_ele_prop(ele4, 'Reason For Out Of Scope')
        else:
            cell['outOfScope'] = False
        # get GUID
        for guid in ele4.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}Guid'):
            cell['id'] = guid.text
        # add threats in for flows only
        if _root and _m_guid:
            cell = get_threats(_root, cell, _m_guid)
        cell = set_cell_attribs(cell, name)
        cell['z'] = _z
    return cell


# given all the elements, calulate and save the max dimentions for x and y
# used to determine screen size
def cal_max_size(ele):
    x = 0
    y = 0
    temp_w = 0
    temp_h = 0
    for ele4 in ele.findall('{http://schemas.microsoft.com/2003/10/Serialization/Arrays}Value'):
        for ele_y in ele4.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}Height'):
            y = int(ele_y.text)
        for ele_x in ele4.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}Left'):
            x = int(ele_x.text)
        for top in ele4.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}Top'):
            temp_h = int(top.text)
        for width in ele4.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}Width'):
            temp_w = int(width.text)
    x = temp_w + x
    y = temp_h + y
    return x, y


# find size of the diagram from the maximum calulated dims
def get_diagram_size(_root):
    max_x = 0
    max_y = 0
    for ele2 in _root.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}Borders'):
        for borders in ele2.findall('{http://schemas.microsoft.com/2003/10/Serialization/Arrays}KeyValueOfguidanyType'):
            x, y = cal_max_size(borders)
            if x > max_x:
                max_x = x
            if y > max_y:
                max_y = y
    for ele2 in _root.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}Lines'):
        for lines in ele2.findall('{http://schemas.microsoft.com/2003/10/Serialization/Arrays}KeyValueOfguidanyType'):
            x, y = cal_max_size(lines)
            if x > max_x:
                max_x = x
            if y > max_y:
                max_y = y
    dims = dict.fromkeys(['height', 'width'])
    # some boundaries had trouble showing. Increase window by 10% to fit things
    # TODO: this offset only happens on +x,-y; will need to also shift all elements position 13% on -x,+y
    #       to make everything look really pretty
    dims['height'] = round(max_y * 1.13)
    dims['width'] = round(max_x * 1.13)
    return dims


def get_notes(_root):
    msgs = []
    for notes in _root.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}Notes'):
        for note in notes.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}Note'):
            for id in note.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}Id'):
                _id = id.text
            for message in note.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}Message'):
                _message = message.text
            msgs.append([_id, _message])
    return msgs


# get the summary info for the model
# missing: Assumptions, ExternalDependencies,
def get_sum(_root):
    _sum = dict.fromkeys(['title', 'owner', 'description'])
    for sum in _root.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}MetaInformation'):
        for _title in sum.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}ThreatModelName'):
            title = _title.text
        for _owner in sum.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}Owner'):
            owner = _owner.text
        for _desc in sum.findall(
                '{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}HighLevelSystemDescription'):
            desc = _desc.text
    if not title:
        title = "TMT import"
    if not desc:
        desc = "Imported from Microsoft Threat Modeling Tool .tm7 file"
    _sum['title'] = title
    _sum['owner'] = owner
    _sum['description'] = desc
    return _sum


# get the contributors as a list
def get_contribs(_root):
    for sum in _root.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}MetaInformation'):
        for _contribs in sum.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}Contributors'):
            if _contribs.text is None:
                return None
            contribs = _contribs.text.split(',')
    contrib_list = []
    c_dict = dict.fromkeys(['name'])
    for p in contribs:
        c_dict['name'] = p
        contrib_list.append(c_dict)
    return contrib_list


# get the reviewers as a list
def get_reviewers(_root):
    for sum in _root.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}MetaInformation'):
        for _reviewrs in sum.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}Reviewer'):
            reviewers = _reviewrs.text
    return reviewers


# get threats here. Threats are only in "interactors" (or flows) in MS TMT
# check diagram guid and cell guid before adding

def get_threats(_root, _cell, d_guid):
    for ele in _root.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}ThreatInstances'):
        for threat in ele.findall('.//a:*', any_namespace):
            flo_guid = ""
            for t_vals in threat.findall('.//a:Value', any_namespace):
                for f_guid in t_vals.findall('.//b:FlowGuid', ele_namespace):
                    flo_guid = f_guid.text
                for dia_guid in t_vals.findall('.//b:DrawingSurfaceGuid', ele_namespace):
                    # compare guids
                    if dia_guid.text == d_guid and flo_guid == _cell['id']:
                        # get threat info and add to cell here
                        threat_mits = ""
                        threat_dict = dict.fromkeys(
                            ['status', 'severity', 'modelType', 'type', 'title', 'description', 'mitigation'])
                        m_type = "STRIDE"
                        for state in t_vals.findall('.//b:State', ele_namespace):
                            # convert to TD's status
                            # TD does not support "not applicable" and "needs investigation" threat statuses, default to Open
                            t_status = "Open"
                            if state.text == "Mitigated":
                                t_status = "Mitigated"
                        for priority in t_vals.findall('.//b:Priority', ele_namespace):
                            t_severity = priority.text
                        for props in t_vals.findall('.//b:Properties', ele_namespace):
                            for prop2 in props.findall('.//a:*', any_namespace):
                                for _key in prop2.findall('.//a:Key', any_namespace):
                                    if _key.text == 'Title':
                                        for _val in prop2.findall('.//a:Value', any_namespace):
                                            threat_title = _val.text
                                    elif _key.text == 'UserThreatDescription':
                                        for _val in prop2.findall('.//a:Value', any_namespace):
                                            threat_desc = _val.text
                                    elif _key.text == 'UserThreatCategory':
                                        for _val in prop2.findall('.//a:Value', any_namespace):
                                            threat_cat = _val.text
                                    # "Mitigations" is not a default threat propery in MS TMT like it is in TD
                                    # Therefore we are searching for a custom prop that could be called different things
                                    elif "itigations" in _key.text:
                                        for _val in prop2.findall('.//a:Value', any_namespace):
                                            threat_mits = _val.text
                        # build thraet dict and add
                        threat_dict['status'] = t_status
                        threat_dict['severity'] = t_severity
                        threat_dict['modelType'] = m_type
                        threat_dict['type'] = threat_cat
                        threat_dict['title'] = threat_title
                        threat_dict['description'] = threat_desc
                        if threat_mits != None:
                            threat_dict['mitigation'] = threat_mits
                        _cell['threats'].append(threat_dict)
    return _cell


def main():
    root = tk.Tk()
    root.withdraw()

    try:
        file_path = filedialog.askopenfilename(parent=root, filetypes=[("MS threat model files", "*.tm7")])
    except FileNotFoundError:
        print('Must choose file path, quitting... ')
        quit()
    if not file_path:
        print('Must choose file path, quitting... ')
        quit()

    root.destroy()
    tree = ET.parse(file_path)
    root = tree.getroot()

    # get file name
    base_name = os.path.splitext(file_path)[0]
    file_path = base_name + '.json'

    model = dict.fromkeys(['summary', 'detail'])
    summary = get_sum(root)
    model['summary'] = summary
    model['detail'] = dict.fromkeys(['contributors', 'diagrams', 'reviewer'])
    model['detail']['contributors'] = get_contribs(root)
    model['detail']['reviewer'] = get_reviewers(root)

    # find all note elements
    notes = get_notes(root)

    diagrams = list()
    model['detail']['diagrams'] = diagrams
    # add diagrams
    # with open(file_path, 'w') as outfile:
    # get elements, borders, and notes
    for child in root.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}DrawingSurfaceList'):
        diagram_num = 0
        # indexing/numbering for TD elements
        z = 1
        # Add name here
        model['detail']['diagrams'].append(
            dict.fromkeys(['title', 'thumbnail', 'guid', 'id', 'diagramJson', 'size', 'diagramType']))
        # default to STRIDE for MS TMT, although you can use different methodology/category in an MS template, it's not
        # common. "STRIDE per element" is MS TMT's defualt methodology note that with the way MS TMT uses "STRIDE per element",
        #  all MS threats origionate in FLOWS only (generated threats are sorted by "interactor") even if they deal with the target element
        model['detail']['diagrams'][diagram_num]['diagramType'] = "STRIDE"
        model['detail']['diagrams'][diagram_num]['thumbnail'] = "./public/content/images/thumbnail.stride.jpg"
        # cells contain all stencils and flows
        model['detail']['diagrams'][diagram_num]['diagramJson'] = dict.fromkeys(['cells'])
        model['detail']['diagrams'][diagram_num]['diagramJson']['cells'] = list()
        for ele in child.findall(
                '{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}DrawingSurfaceModel'):
            model['detail']['diagrams'][diagram_num]['size'] = get_diagram_size(ele)
            for d_guid in ele.findall(
                    '{http://schemas.datacontract.org/2004/07/ThreatModeling.Model.Abstracts}Guid'):
                m_guid = d_guid.text
                model['detail']['diagrams'][diagram_num]['guid'] = m_guid
            for header in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}Header'):
                model['detail']['diagrams'][diagram_num]['title'] = header.text
            for ele2 in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}Borders'):
                # this level enumerates a model's elements
                for borders in ele2.findall(
                        '{http://schemas.microsoft.com/2003/10/Serialization/Arrays}KeyValueOfguidanyType'):
                    stencil = get_element(borders, z, None, None)
                    model['detail']['diagrams'][diagram_num]['diagramJson']['cells'].append(stencil)
                    z = z + 1
            for ele2 in ele.findall('{http://schemas.datacontract.org/2004/07/ThreatModeling.Model}Lines'):
                # this level enumerates a model's elements
                for lines in ele2.findall(
                        '{http://schemas.microsoft.com/2003/10/Serialization/Arrays}KeyValueOfguidanyType'):
                    # Flows. Unlike stencils, flows have a source and target guids
                    line = get_element(lines, z, root, m_guid)
                    model['detail']['diagrams'][diagram_num]['diagramJson']['cells'].append(line)
                    z = z + 1
            model['detail']['diagrams'][diagram_num]['id'] = diagram_num
            diagram_num = diagram_num + 1
    CairisWrite.convert(model, base_name)