In [170]:
import json
import xml.etree.ElementTree as ET
data_dir = "../ics_cwe/"
# Load the XML file
tree = ET.parse(data_dir+'cwec_v4.12.xml')
root = tree.getroot()

# Get the XML root attributes and its immediate children
root_attrib = root.attrib
children_tags = [child.tag for child in root]

(root.tag, root_attrib, children_tags)

('{http://cwe.mitre.org/cwe-7}Weakness_Catalog',
 {'Name': 'CWE',
  'Version': '4.12',
  'Date': '2023-06-29',
  '{http://www.w3.org/2001/XMLSchema-instance}schemaLocation': 'http://cwe.mitre.org/cwe-7 http://cwe.mitre.org/data/xsd/cwe_schema_v7.0.xsd'},
 ['{http://cwe.mitre.org/cwe-7}Weaknesses',
  '{http://cwe.mitre.org/cwe-7}Categories',
  '{http://cwe.mitre.org/cwe-7}Views',
  '{http://cwe.mitre.org/cwe-7}External_References'])

In [171]:
weaknesses_section = root.find('{http://cwe.mitre.org/cwe-7}Weaknesses')
weakness_entries = list(weaknesses_section)

In [172]:
# Parse the XML
from collections import defaultdict
namespace = {
    'cwe': 'http://cwe.mitre.org/cwe-7',
    'xhtml': 'http://www.w3.org/1999/xhtml'
}
# Extract Weakness information
weakness_info = defaultdict(list)

for weakness in root.findall('.//cwe:Weakness', namespace):
    weakness_id = weakness.get('ID')
    weakness_name = weakness.get('Name')
    
    description = weakness.find('.//cwe:Description', namespace).text.strip() if weakness.find('.//cwe:Description', namespace) is not None else 'N/A'
    extended_description_elems = weakness.findall('.//cwe:Extended_Description/xhtml:p', namespace)
    if not extended_description_elems:
        extended_description = weakness.find('.//cwe:Extended_Description', namespace).text.strip() if weakness.find('.//cwe:Extended_Description', namespace) is not None else 'N/A'
    else:
        extended_description = ''.join(ET.tostring(e, encoding='unicode', method='text').strip() for e in extended_description_elems)
    print("# ID #")
    print(weakness_id)
    # print("# description #")
    # print(description)
    # print("# extended_description #")
    # print(extended_description.strip())
    modes_of_introduction = ""
    for intro in weakness.findall('.//cwe:Introduction', namespace):
        phase = intro.find('.//cwe:Phase', namespace).text.strip() if intro.find('.//cwe:Phase', namespace) is not None else 'N/A'
        #note = intro.find('.//cwe:Note', namespace).text.strip() if intro.find('.//cwe:Note', namespace) is not None else 'N/A'
        note_elems = intro.findall('.//cwe:Note/xhtml:p', namespace)
        if not note_elems:
            note = intro.find('.//cwe:Note', namespace).text.strip() if intro.find('.//cwe:Note', namespace) is not None else 'N/A'
        else:
            note = ''.join(ET.tostring(e, encoding='unicode', method='text').strip() for e in note_elems)
            
        if(phase!='N/A' and note!='N/A'):
            modes_of_introduction+=(phase+": "+note)
        elif(note!='N/A'):
            modes_of_introduction+=(". "+note)

    # print("# modes_of_introduction #")
    # print(modes_of_introduction)
    consequences = ""
    for consequence in weakness.findall('.//cwe:Consequence', namespace):
        scopes = [scope.text.strip() for scope in consequence.findall('.//cwe:Scope', namespace)]
        impacts = [impact.text.strip() for impact in consequence.findall('.//cwe:Impact', namespace)]
        note_elems = consequence.findall('.//cwe:Note/xhtml:p', namespace)
        if not note_elems:
            note = consequence.find('.//cwe:Note', namespace).text.strip() if consequence.find('.//cwe:Note', namespace) is not None else 'N/A'
        else:
            note = ''.join(ET.tostring(e, encoding='unicode', method='text').strip() for e in note_elems)
        scope_str = "Scopes: "
        for scope in scopes:
            if(scope=='N/A'):
                continue
            scope_str+=scope
        impact_str = "Impacts: "
        for impact in impacts:
            if(impact=='N/A'):
                continue
            impact_str+=impact
        if(note!='N/A'):
            consequences+=(scope_str+". "+ impact_str+". Note: "+ note)
    # print("# consequences #")
    # print(consequences)
    detection_methods = ""
    for method in weakness.findall('.//cwe:Detection_Method', namespace):
        method_name = method.find('.//cwe:Method', namespace).text.strip() if method.find('.//cwe:Method', namespace) is not None else 'N/A'
        #description = method.find('.//cwe:Description', namespace).text if method.find('.//cwe:Description', namespace) is not None else 'N/A'
        
        description_elems = method.findall('.//cwe:Description/xhtml:p', namespace)
        if not description_elems:
            description = method.find('.//cwe:Description', namespace).text.strip() if method.find('.//cwe:Description', namespace) is not None else 'N/A'
        else:
            description = ''.join(ET.tostring(e, encoding='unicode', method='text').strip() for e in description_elems)
        effectiveness = method.find('.//cwe:Effectiveness', namespace).text.strip() if method.find('.//cwe:Effectiveness', namespace) is not None else 'N/A'
        text=""
        if(method_name!='N/A'):
            text+=" Method Name: "+method_name
        if(description!='N/A'):
            text+=". Description: "+description
        detection_methods+=text
    # print("# detection_methods #")
    # print(detection_methods)
    mitigations = ""
    for mitigation in weakness.findall('.//cwe:Mitigation', namespace):
        phase = mitigation.find('.//cwe:Phase', namespace).text.strip() if mitigation.find('.//cwe:Phase', namespace) is not None else 'N/A'
        
        description = ''.join(ET.tostring(e, encoding='unicode', method='text').strip() for e in mitigation.findall('.//cwe:Description', namespace))
        mitigations+=(phase+" : "+description)
    # print("# mitigations #")
    # print(mitigations)
    
    examples = ""

    for example in weakness.findall('.//cwe:Demonstrative_Example', namespace):
        intro_text_elems = example.findall('.//cwe:Intro_Text/xhtml:p', namespace)
        if not intro_text_elems:
            intro_text = example.find('.//cwe:Intro_Text', namespace).text.strip() if example.find('.//cwe:Intro_Text', namespace) is not None else 'N/A'
        else:
            intro_text = ''.join(ET.tostring(e, encoding='unicode', method='text').strip() for e in intro_text_elems)
        body_text_elems = example.findall('.//cwe:Body_Text/xhtml:p', namespace)
        if not body_text_elems:
            body_text = example.find('.//cwe:Body_Text', namespace).text.strip() if example.find('.//cwe:Body_Text', namespace) is not None else 'N/A'
        else:
            body_text = ''.join(ET.tostring(e, encoding='unicode', method='text').strip() for e in body_text_elems)
        
        if(intro_text!='N/A' and body_text!='N/A'):
            examples+=(intro_text+". "+ body_text)
        elif(body_text!='N/A'):
            examples+=(". "+body_text)
        elif(intro_text!='N/A'):
            examples+=(". "+intro_text)
    # print("# examples #")
    # print(examples)
    weakness_info[weakness_id] = {
        'Name': weakness_name,
        'Description': description.strip(),
        'Extended_Description': extended_description.strip(),
        'Modes_Of_Introduction': modes_of_introduction.strip(),
        'Common_Consequences': consequences.strip(),
        'Detection_Methods': detection_methods.strip(),
        'Potential_Mitigations': mitigations.strip(),
        'Demonstrative_Examples': examples.strip(),
    }


# ID #
1004
# ID #
1007
# ID #
102
# ID #
1021
# ID #
1022
# ID #
1023
# ID #
1024
# ID #
1025
# ID #
103
# ID #
1037
# ID #
1038
# ID #
1039
# ID #
104
# ID #
1041
# ID #
1042
# ID #
1043
# ID #
1044
# ID #
1045
# ID #
1046
# ID #
1047
# ID #
1048
# ID #
1049
# ID #
105
# ID #
1050
# ID #
1051
# ID #
1052
# ID #
1053
# ID #
1054
# ID #
1055
# ID #
1056
# ID #
1057
# ID #
1058
# ID #
1059
# ID #
106
# ID #
1060
# ID #
1061
# ID #
1062
# ID #
1063
# ID #
1064
# ID #
1065
# ID #
1066
# ID #
1067
# ID #
1068
# ID #
1069
# ID #
107
# ID #
1070
# ID #
1071
# ID #
1072
# ID #
1073
# ID #
1074
# ID #
1075
# ID #
1076
# ID #
1077
# ID #
1078
# ID #
1079
# ID #
108
# ID #
1080
# ID #
1082
# ID #
1083
# ID #
1084
# ID #
1085
# ID #
1086
# ID #
1087
# ID #
1088
# ID #
1089
# ID #
109
# ID #
1090
# ID #
1091
# ID #
1092
# ID #
1093
# ID #
1094
# ID #
1095
# ID #
1096
# ID #
1097
# ID #
1098
# ID #
1099
# ID #
11
# ID #
110
# ID #
1100
# ID #
1101
# ID #
1102
# ID #
1103
# ID #
1104
# ID #
1105
# I

In [173]:
# 1421 = note
# 1253 = intro_text
# 1191 = description

In [174]:
weakness_info['1267']

{'Name': 'Policy Uses Obsolete Encoding',
 'Description': 'Security Token Decoders should be reviewed for design inconsistency and common weaknesses.\n      Access and programming flows should be tested in both pre-silicon and post-silicon testing.',
 'Extended_Description': 'Within a System-On-a-Chip (SoC), various circuits and hardware engines generate transactions for the purpose of accessing (read/write) assets or performing various actions (e.g., reset, fetch, compute, etc.). Among various types of message information, a typical transaction is comprised of source identity (identifying the originator of the transaction) and a destination identity (routing the transaction to the respective entity). Sometimes the transactions are qualified with a Security Token. This Security Token helps the destination agent decide on the set of allowed actions (e.g., access to an asset for reads and writes). A policy encoder is used to map the bus transactions to Security Tokens that in turn are us

In [176]:
# Save File

In [175]:
with open(data_dir+'cwec_v4.12.json','w') as fp:
    json.dump(weakness_info,fp)