In [1]:
import json
import os
import pandas as pd
import numpy as np

In [2]:
#2019 CVE data
data_path = os.path.join('data','nvdcve-1.1-2019.json')

#read json
with open(data_path) as f:
    data = json.load(f)

In [3]:
def getCVEItemLabels(cve_item):
    cve_item_dict = dict()
    
    #data type
    cve_item_dict['data_type'] = cve_item['cve']['data_type']
    #format
    cve_item_dict['data_format'] = cve_item['cve']['data_format']
    #version
    cve_item_dict['data_version'] = cve_item['cve']['data_version']
    
    #unique id
    cve_item_dict['ID'] = cve_item['cve']['CVE_data_meta']['ID']
    
    #assigner
    cve_item_dict['assigner'] = cve_item['cve']['CVE_data_meta']['ASSIGNER']
    
    #language of description of problem type
    #note that problemtype_data contains a list such that multiple CWEs might be present
    cwe_val = []
    for problemtype_data in cve_item['cve']['problemtype']['problemtype_data']:
        for description in problemtype_data['description']:
            cwe_val.append(description['value'])
        
    cve_item_dict['cwe_val'] = cwe_val
        
    
    #add all desciptions into one string
    description_string = ''
    for description_data in cve_item['cve']['description']['description_data']:
        description_string += description_data['value'] +'\n'
        
    cve_item_dict['description'] = description_string
    
    return cve_item_dict

In [4]:
def getCVEV3Tags(cve_item):
    cve_item_dict = dict()
    
    
     #vector string
    try:
        cve_item_dict['vectorString_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['vectorString']
    except KeyError:
        cve_item_dict['vectorString_V3'] = None
        
        
    try:
        cve_item_dict['attackVector_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['attackVector']
    except KeyError:
        cve_item_dict['attackVector_V3'] = None
        
        
    try:
        cve_item_dict['attackComplexity_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['attackComplexity']
    except KeyError:
        cve_item_dict['attackComplexity_V3'] = None
        
        
    try:
        cve_item_dict['privilegesRequired_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['privilegesRequired']
    except KeyError:
        cve_item_dict['privilegesRequired_V3'] = None
        
    try:
        cve_item_dict['userInteraction_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['userInteraction']
    except KeyError:
        cve_item_dict['userInteraction_V3'] = None
        
    try:
        cve_item_dict['scope_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['scope']
    except KeyError:
        cve_item_dict['scope_V3'] = None
    
    try:
        cve_item_dict['confidentialityImpact_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['confidentialityImpact']
    except KeyError:
        cve_item_dict['confidentialityImpact_V3'] = None
    
    
    try:
        cve_item_dict['integrityImpact_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['integrityImpact']
    except KeyError:
        cve_item_dict['integrityImpact_V3'] = None
        
    try:
        cve_item_dict['availabilityImpact_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['availabilityImpact']
    except KeyError:
        cve_item_dict['availabilityImpact_V3'] = None
        
    try:
        cve_item_dict['baseScore_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['baseScore']
    except KeyError:
        cve_item_dict['baseScore_V3'] = None
        
    try:
        cve_item_dict['baseSeverity_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['baseSeverity']
    except KeyError:
        cve_item_dict['baseSeverity_V3'] = None
        
        
    try:
        cve_item_dict['exploitabilityScore_V3'] = cve_item['impact']['baseMetricV3']['exploitabilityScore']
    except KeyError:
        cve_item_dict['exploitabilityScore_V3'] = None
        
    try:
        cve_item_dict['impactScore_V3'] = cve_item['impact']['baseMetricV3']['impactScore']
    except KeyError:
        cve_item_dict['impactScore_V3'] = None
        
        
    return cve_item_dict

In [5]:
def getCVEV2Tags(cve_item):
    cve_item_dict = dict()
    
    try:
        cve_item_dict['vectorString_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['vectorString']
    except KeyError:
        cve_item_dict['vectorString_V2'] = None
        
        
    try:
        cve_item_dict['accessVector_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['accessVector']
    except KeyError:
        cve_item_dict['accessVector_V2'] = None
        
        
    try:
        cve_item_dict['accessComplexity_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['accessComplexity']
    except KeyError:
        cve_item_dict['accessComplexity_V2'] = None
        
        
    try:
        cve_item_dict['authentication_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['authentication']
    except KeyError:
        cve_item_dict['authentication_V2'] = None
        
        
    try:
        cve_item_dict['confidentialityImpact_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['confidentialityImpact']
    except KeyError:
        cve_item_dict['confidentialityImpact_V2'] = None
        
        
    try:
        cve_item_dict['integrityImpact_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['integrityImpact']
    except KeyError:
        cve_item_dict['integrityImpact_V2'] = None
        
        
    try:
        cve_item_dict['availabilityImpact_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['availabilityImpact']
    except KeyError:
        cve_item_dict['availabilityImpact_V2'] = None
        
        
    try:
        cve_item_dict['baseScore_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['baseScore']
    except KeyError:
        cve_item_dict['baseScore_V2'] = None
        
        
    try:
        cve_item_dict['vectorString_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['vectorString']
    except KeyError:
        cve_item_dict['vectorString_V2'] = None
        
        
    try:
        cve_item_dict['severity_V2'] = cve_item['impact']['baseMetricV2']['severity']
    except KeyError:
        cve_item_dict['severity_V2'] = None
        
        
    try:
        cve_item_dict['exploitabilityScore_V2'] = cve_item['impact']['baseMetricV2']['exploitabilityScore']
    except KeyError:
        cve_item_dict['exploitabilityScore_V2'] = None
        
        
    try:
        cve_item_dict['impactScore_V2'] = cve_item['impact']['baseMetricV2']['impactScore']
    except KeyError:
        cve_item_dict['impactScore_V2'] = None
        
        
    try:
        cve_item_dict['acInsufInfo_V2'] = cve_item['impact']['baseMetricV2']['acInsufInfo']
    except KeyError:
        cve_item_dict['acInsufInfo_V2'] = None
        
        
    try:
        cve_item_dict['obtainAllPrivilege_V2'] = cve_item['impact']['baseMetricV2']['obtainAllPrivilege']
    except KeyError:
        cve_item_dict['obtainAllPrivilege_V2'] = None
        
        
    try:
        cve_item_dict['obtainUserPrivilege_V2'] = cve_item['impact']['baseMetricV2']['obtainUserPrivilege']
    except KeyError:
        cve_item_dict['obtainUserPrivilege_V2'] = None
        
        
    try:
        cve_item_dict['obtainOtherPrivilege_V2'] = cve_item['impact']['baseMetricV2']['obtainOtherPrivilege']
    except KeyError:
        cve_item_dict['obtainOtherPrivilege_V2'] = None
        
        
    try:
        cve_item_dict['userInteractionRequired_V2'] = cve_item['impact']['baseMetricV2']['userInteractionRequired']
    except KeyError:
        cve_item_dict['userInteractionRequired_V2'] = None
        
        
    return cve_item_dict
    

In [6]:
def getCVETemporalData(cve_item):
    
    cve_item_dict = dict()
    
    try:
        cve_item_dict['publishedDate'] = cve_item['publishedDate']
    except KeyError:
        cve_item_dict['publishedDate'] = None
        
        
    try:
        cve_item_dict['lastModifiedDate'] = cve_item['lastModifiedDate']
    except KeyError:
        cve_item_dict['lastModifiedDate'] = None
    
    return cve_item_dict

In [7]:
def getCVEAsDict(cve_item):
    
    cve_item_dict_labels = getCVEItemLabels(cve_item)
    cve_item_dict_v3 = getCVEV3Tags(cve_item)
    cve_item_dict_v2 = getCVEV2Tags(cve_item)
    cve_item_dict_temporal = getCVETemporalData(cve_item)
    
    
    #combine multiple dicts
    #can use | but not running python 3.9
    cve_item_dict = {**cve_item_dict_labels,  **cve_item_dict_v3, **cve_item_dict_v2, **cve_item_dict_temporal}
    
    return cve_item_dict
    

In [8]:
a = data['CVE_Items'][0]
a

{'cve': {'data_type': 'CVE',
  'data_format': 'MITRE',
  'data_version': '4.0',
  'CVE_data_meta': {'ID': 'CVE-2019-0001', 'ASSIGNER': 'cve@mitre.org'},
  'problemtype': {'problemtype_data': [{'description': [{'lang': 'en',
       'value': 'CWE-400'}]}]},
  'references': {'reference_data': [{'url': 'http://www.securityfocus.com/bid/106541',
     'name': '106541',
     'refsource': 'BID',
     'tags': ['Third Party Advisory', 'VDB Entry']},
    {'url': 'https://kb.juniper.net/JSA10900',
     'name': 'https://kb.juniper.net/JSA10900',
     'refsource': 'CONFIRM',
     'tags': ['Vendor Advisory']},
    {'url': 'https://lists.fedoraproject.org/archives/list/package-announce@lists.fedoraproject.org/message/RMKFSHPMOZL7MDWU5RYOTIBTRWSZ4Z6X/',
     'name': 'FEDORA-2019-5f14b810f8',
     'refsource': 'FEDORA',
     'tags': []},
    {'url': 'https://lists.fedoraproject.org/archives/list/package-announce@lists.fedoraproject.org/message/W7CPKBW4QZ4VIY4UXIUVUSHRJ4R2FROE/',
     'name': 'FEDORA-201

In [10]:
a_dict = getCVEAsDict(a)

a_dict

{'data_type': 'CVE',
 'data_format': 'MITRE',
 'data_version': '4.0',
 'ID': 'CVE-2019-0001',
 'assigner': 'cve@mitre.org',
 'cwe_val': ['CWE-400'],
 'description': 'Receipt of a malformed packet on MX Series devices with dynamic vlan configuration can trigger an uncontrolled recursion loop in the Broadband Edge subscriber management daemon (bbe-smgd), and lead to high CPU usage and a crash of the bbe-smgd service. Repeated receipt of the same packet can result in an extended denial of service condition for the device. Affected releases are Juniper Networks Junos OS: 16.1 versions prior to 16.1R7-S1; 16.2 versions prior to 16.2R2-S7; 17.1 versions prior to 17.1R2-S10, 17.1R3; 17.2 versions prior to 17.2R3; 17.3 versions prior to 17.3R3-S1; 17.4 versions prior to 17.4R2; 18.1 versions prior to 18.1R3; 18.2 versions prior to 18.2R2.\n',
 'vectorString_V3': 'CVSS:3.0/AV:N/AC:H/PR:N/UI:N/S:U/C:N/I:N/A:H',
 'attackVector_V3': 'NETWORK',
 'attackComplexity_V3': 'HIGH',
 'privilegesRequired_V

In [12]:
type(a_dict['cwe_val'])

list