# CWE

In [None]:
conda install xmltodict

In [1]:
import json
import os
import requests
import xmltodict
import zipfile

## 1. Pobieranie danych

In [2]:
CWE_FILE_URL = "https://cwe.mitre.org/data/xml/cwec_latest.xml.zip"
CWE_OUTPUT_DIR = "cwe_output"
FILE_DIR_NAME = "files"

CWE_OUTPUT_DIRPATH = os.path.join(FILE_DIR_NAME, CWE_OUTPUT_DIR)

# stałe dotyczące zipa
cwe_zip_output_filename = f'{CWE_FILE_URL.split("/")[-1].split(".")[0]}.zip'
CWE_ZIP_FILEPATH = os.path.join(FILE_DIR_NAME, CWE_OUTPUT_DIR, cwe_zip_output_filename)

# stałe dotyczące xml
CWE_DICT_RAW_FILENAME = 'cwe_raw.json'
CWE_DICT_RAW_FILEPATH = os.path.join(CWE_OUTPUT_DIRPATH, CWE_DICT_RAW_FILENAME)

### 1.1 pobieranie i zapisywanie zipa

In [3]:
data = requests.get(CWE_FILE_URL)
with open(CWE_ZIP_FILEPATH, "wb") as f:
    f.write(data.content)
    
print(CWE_ZIP_FILEPATH)

files\cwe_output\cwec_latest.zip


### 1.2 rozpakowywanie zipa

In [4]:
with zipfile.ZipFile(CWE_ZIP_FILEPATH, "r") as zip_ref:
    extracted_file_names = zip_ref.namelist()
    zip_ref.extractall(CWE_OUTPUT_DIRPATH)

# tylko jedne plik w zipie
extracted_file = extracted_file_names[0]
xml_filepath = os.path.join(CWE_OUTPUT_DIRPATH, extracted_file)
print(xml_filepath)

files\cwe_output\cwec_v4.9.xml


### 1.3 zamiana xml na jsona

In [5]:
xml_data = open(xml_filepath, "r", encoding="utf-8").read()
parsed_data = xmltodict.parse(xml_data)
xml_dict =  dict(parsed_data)

### 1.4 zapisywanie jsona

In [6]:
with open(CWE_DICT_RAW_FILEPATH, "w", encoding="utf-8") as f:
    json.dump(xml_dict, f, indent=4)

### 1.5 Usuwanie zipa

In [7]:
os.remove(CWE_ZIP_FILEPATH)

### 1.6 Usuwanie pliku xml

In [8]:
# tu nie może być stała, bo to z pliku zipa nazwa pliku
os.remove(xml_filepath)

## 2. Parsowanie danych

In [9]:
CWE_PARSED_JSON_FILENAME = "cwe_parsed.json"
CWE_PARSED_JSON_FILEPATH = os.path.join(CWE_OUTPUT_DIRPATH, CWE_PARSED_JSON_FILENAME)
CWE_PARSED_JSON_FILEPATH

'files\\cwe_output\\cwe_parsed.json'

### 2.1  wczytywanie pliku json 

In [10]:
with open(CWE_DICT_RAW_FILEPATH, 'r', encoding="utf-8") as f:
    raw_data = json.loads(f.read())

### 2.2 Parsowanie danych 

In [11]:
parsed_result = []
weakness = raw_data.get("Weakness_Catalog").get("Weaknesses").get("Weakness")

counter = 1

for w in weakness:
    try:  # mismash json
        extended_description = w["Extended_Description"].get("xhtml:p", [""])
        extended_description = (
            extended_description[0]
            if isinstance(extended_description, list)
            else extended_description
        )
        extended_description = extended_description.replace("\n\t\t\t\t\t", " ")
    
    
    except AttributeError:
        extended_description = w["Extended_Description"]
        extended_description = extended_description.replace("\n\t\t\t\t\t", " ")

    except KeyError:
        extended_description = (
            None  # some CWEs don't have Extended_Description at all
        )

        
    cwe_code = f'CWE-{w["@ID"]}' if w["@ID"].isnumeric() else w["@ID"]
    description = w["Description"].replace("\n\t\t\t\t\t", " ")

    parsed_result.append(
        {
            "artificialId": counter,  # needed for dummy data in JS
            "id": cwe_code,
            "name": w["@Name"],
            "abstraction": w["@Abstraction"],
            "structure": w["@Structure"],
            "status": w["@Status"],
            "description": description,
            "extendedDescription": extended_description,
        }
    )
    counter += 1

### 2.2 zapisu sparsowanego pliku json

In [12]:
with open(CWE_PARSED_JSON_FILEPATH, "w", encoding="utf-8") as f:
    json.dump(parsed_result, f, indent=4)

### 2.3 Usuwanie tego surowego jsona

In [15]:
os.remove(CWE_DICT_RAW_FILEPATH)

FileNotFoundError: [WinError 2] The system cannot find the file specified: 'files\\cwe_output\\cwe_raw.json'

In [16]:
# ścieżka do sparsowanego pliku
CWE_PARSED_JSON_FILEPATH

'files\\cwe_output\\cwe_parsed.json'