Explore what is inside a JSON file from a year to better understand how to parse information from it.

In [1]:
import json
import os

In [2]:
#data for 2019
data_path = os.path.join('data','nvdcve-1.1-2019.json')

In [3]:
#read json
with open(data_path) as f:
    data = json.load(f)

In [4]:
#see keys
data.keys()

dict_keys(['CVE_data_type', 'CVE_data_format', 'CVE_data_version', 'CVE_data_numberOfCVEs', 'CVE_data_timestamp', 'CVE_Items'])

In [5]:
data['CVE_data_format']

'MITRE'

In [6]:
data['CVE_data_numberOfCVEs']

'15921'

In [7]:
data['CVE_data_type']

'CVE'

In [8]:
data['CVE_data_timestamp']

'2020-09-12T07:13Z'

In [9]:
type(data['CVE_Items'])

list

In [10]:
len(data['CVE_Items'])

15921

In [11]:
sample_a = data['CVE_Items'][2000]
print(json.dumps(sample_a, indent=4))

{
    "cve": {
        "data_type": "CVE",
        "data_format": "MITRE",
        "data_version": "4.0",
        "CVE_data_meta": {
            "ID": "CVE-2019-10934",
            "ASSIGNER": "cve@mitre.org"
        },
        "problemtype": {
            "problemtype_data": [
                {
                    "description": [
                        {
                            "lang": "en",
                            "value": "CWE-22"
                        }
                    ]
                }
            ]
        },
        "references": {
            "reference_data": [
                {
                    "url": "https://cert-portal.siemens.com/productcert/pdf/ssa-629512.pdf",
                    "name": "https://cert-portal.siemens.com/productcert/pdf/ssa-629512.pdf",
                    "refsource": "MISC",
                    "tags": [
                        "Vendor Advisory"
                    ]
                }
            ]
        },
        "description":

In [12]:
sample_b = data['CVE_Items'][100]
print(json.dumps(sample_b, indent=4))

{
    "cve": {
        "data_type": "CVE",
        "data_format": "MITRE",
        "data_version": "4.0",
        "CVE_data_meta": {
            "ID": "CVE-2019-0115",
            "ASSIGNER": "cve@mitre.org"
        },
        "problemtype": {
            "problemtype_data": [
                {
                    "description": [
                        {
                            "lang": "en",
                            "value": "CWE-20"
                        }
                    ]
                }
            ]
        },
        "references": {
            "reference_data": [
                {
                    "url": "http://www.securityfocus.com/bid/108385",
                    "name": "108385",
                    "refsource": "BID",
                    "tags": [
                        "Third Party Advisory",
                        "VDB Entry"
                    ]
                },
                {
                    "url": "https://www.intel.com/content/www/us/en

In [13]:
sample_c = data['CVE_Items'][4444]
print(json.dumps(sample_c, indent=4))

{
    "cve": {
        "data_type": "CVE",
        "data_format": "MITRE",
        "data_version": "4.0",
        "CVE_data_meta": {
            "ID": "CVE-2019-13676",
            "ASSIGNER": "cve@mitre.org"
        },
        "problemtype": {
            "problemtype_data": [
                {
                    "description": [
                        {
                            "lang": "en",
                            "value": "CWE-732"
                        }
                    ]
                }
            ]
        },
        "references": {
            "reference_data": [
                {
                    "url": "https://chromereleases.googleblog.com/2019/09/stable-channel-update-for-desktop.html",
                    "name": "https://chromereleases.googleblog.com/2019/09/stable-channel-update-for-desktop.html",
                    "refsource": "MISC",
                    "tags": [
                        "Release Notes",
                        "Vendor Advisory"
 

In [14]:
sample_a.keys()

dict_keys(['cve', 'configurations', 'impact', 'publishedDate', 'lastModifiedDate'])

In [15]:
sample_a['cve'].keys()

dict_keys(['data_type', 'data_format', 'data_version', 'CVE_data_meta', 'problemtype', 'references', 'description'])

In [16]:
def flatten_json(y):
    """
    takes in a multi-level dictionary from JSON object and flattens it
    """
    out = {}
    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '_')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i +=1
                
        else:
            out[name[:-1]] = x
    flatten(y)
    return out
        

In [17]:
flatten_json(sample_a)

{'cve_data_type': 'CVE',
 'cve_data_format': 'MITRE',
 'cve_data_version': '4.0',
 'cve_CVE_data_meta_ID': 'CVE-2019-10934',
 'cve_CVE_data_meta_ASSIGNER': 'cve@mitre.org',
 'cve_problemtype_problemtype_data_0_description_0_lang': 'en',
 'cve_problemtype_problemtype_data_0_description_0_value': 'CWE-22',
 'cve_references_reference_data_0_url': 'https://cert-portal.siemens.com/productcert/pdf/ssa-629512.pdf',
 'cve_references_reference_data_0_name': 'https://cert-portal.siemens.com/productcert/pdf/ssa-629512.pdf',
 'cve_references_reference_data_0_refsource': 'MISC',
 'cve_references_reference_data_0_tags_0': 'Vendor Advisory',
 'cve_description_description_data_0_lang': 'en',
 'cve_description_description_data_0_value': 'A vulnerability has been identified in TIA Portal V14 (All versions), TIA Portal V15 (All versions < V15.1 Update 4), TIA Portal V16 (All versions < V16 Update 1). Changing the contents of a configuration file could allow an attacker to execute arbitrary code with SYST

In [18]:
flatten_json(sample_b)

{'cve_data_type': 'CVE',
 'cve_data_format': 'MITRE',
 'cve_data_version': '4.0',
 'cve_CVE_data_meta_ID': 'CVE-2019-0115',
 'cve_CVE_data_meta_ASSIGNER': 'cve@mitre.org',
 'cve_problemtype_problemtype_data_0_description_0_lang': 'en',
 'cve_problemtype_problemtype_data_0_description_0_value': 'CWE-20',
 'cve_references_reference_data_0_url': 'http://www.securityfocus.com/bid/108385',
 'cve_references_reference_data_0_name': '108385',
 'cve_references_reference_data_0_refsource': 'BID',
 'cve_references_reference_data_0_tags_0': 'Third Party Advisory',
 'cve_references_reference_data_0_tags_1': 'VDB Entry',
 'cve_references_reference_data_1_url': 'https://www.intel.com/content/www/us/en/security-center/advisory/INTEL-SA-00218.html',
 'cve_references_reference_data_1_name': 'https://www.intel.com/content/www/us/en/security-center/advisory/INTEL-SA-00218.html',
 'cve_references_reference_data_1_refsource': 'MISC',
 'cve_references_reference_data_1_tags_0': 'Vendor Advisory',
 'cve_descri