In [1]:
import json
import os
import pandas as pd
import numpy as np

In [3]:
#2019 CVE data
data_path = os.path.join('data','nvdcve-1.1-2019.json')

#read json
with open(data_path) as f:
    data = json.load(f)

In [4]:
def flatten_json(y):
    """
    takes in a multi-level dictionary from JSON object and flattens it
    """
    out = {}
    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '_')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i +=1
                
        else:
            out[name[:-1]] = x
    flatten(y)
    return out
        

we want to extract relevant data from the CVEs.
A brute force method like flattening the JSON might yield wilding vaying keys and is not recommended.

In [5]:
#array to store cve items
cve_item_array = []

#loop over all CVEs in dict
for cve_item in data['CVE_Items']:
    cve_item_dict = dict()
    
    #data type
    cve_item_dict['data_type'] = cve_item['cve']['data_type']
    #format
    cve_item_dict['data_format'] = cve_item['cve']['data_format']
    #version
    cve_item_dict['data_version'] = cve_item['cve']['data_version']
    
    #unique id
    cve_item_dict['ID'] = cve_item['cve']['CVE_data_meta']['ID']
    
    #assigner
    cve_item_dict['assigner'] = cve_item['cve']['CVE_data_meta']['ASSIGNER']
    
    #language of description of problem type
    #note that problemtype_data contains a list such that multiple CWEs might be present
    cwe_val = []
    for problemtype_data in cve_item['cve']['problemtype']['problemtype_data']:
        for description in problemtype_data['description']:
            cwe_val.append(description['value'])
        
    cve_item_dict['cwe_val'] = cwe_val
        
    
    #add all desciptions into one string
    description_string = ''
    for description_data in cve_item['cve']['description']['description_data']:
        description_string += description_data['value'] +'\n'
        
    cve_item_dict['description'] = description_string
    
    
    ### add values from v3 ###
    
    #vector string
    try:
        cve_item_dict['vectorString_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['vectorString']
    except KeyError:
        cve_item_dict['vectorString_V3'] = None
        
        
    try:
        cve_item_dict['attackVector_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['attackVector']
    except KeyError:
        cve_item_dict['attackVector_V3'] = None
        
        
    try:
        cve_item_dict['attackComplexity_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['attackComplexity']
    except KeyError:
        cve_item_dict['attackComplexity_V3'] = None
        
        
    try:
        cve_item_dict['privilegesRequired_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['privilegesRequired']
    except KeyError:
        cve_item_dict['privilegesRequired_V3'] = None
        
    try:
        cve_item_dict['userInteraction_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['userInteraction']
    except KeyError:
        cve_item_dict['userInteraction_V3'] = None
        
    try:
        cve_item_dict['scope_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['scope']
    except KeyError:
        cve_item_dict['scope_V3'] = None
    
    try:
        cve_item_dict['confidentialityImpact_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['confidentialityImpact']
    except KeyError:
        cve_item_dict['confidentialityImpact_V3'] = None
    
    
    try:
        cve_item_dict['integrityImpact_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['integrityImpact']
    except KeyError:
        cve_item_dict['integrityImpact_V3'] = None
        
    try:
        cve_item_dict['availabilityImpact_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['availabilityImpact']
    except KeyError:
        cve_item_dict['availabilityImpact_V3'] = None
        
    try:
        cve_item_dict['baseScore_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['baseScore']
    except KeyError:
        cve_item_dict['baseScore_V3'] = None
        
    try:
        cve_item_dict['baseSeverity_V3'] = cve_item['impact']['baseMetricV3']['cvssV3']['baseSeverity']
    except KeyError:
        cve_item_dict['baseSeverity_V3'] = None
        
        
    try:
        cve_item_dict['exploitabilityScore_V3'] = cve_item['impact']['baseMetricV3']['exploitabilityScore']
    except KeyError:
        cve_item_dict['exploitabilityScore_V3'] = None
        
    try:
        cve_item_dict['impactScore_V3'] = cve_item['impact']['baseMetricV3']['impactScore']
    except KeyError:
        cve_item_dict['impactScore_V3'] = None
        
        
        
    ### add values from V2 ###
    
    
    try:
        cve_item_dict['vectorString_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['vectorString']
    except KeyError:
        cve_item_dict['vectorString_V2'] = None
        
        
    try:
        cve_item_dict['accessVector_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['accessVector']
    except KeyError:
        cve_item_dict['accessVector_V2'] = None
        
        
    try:
        cve_item_dict['accessComplexity_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['accessComplexity']
    except KeyError:
        cve_item_dict['accessComplexity_V2'] = None
        
        
    try:
        cve_item_dict['authentication_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['authentication']
    except KeyError:
        cve_item_dict['authentication_V2'] = None
        
        
    try:
        cve_item_dict['confidentialityImpact_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['confidentialityImpact']
    except KeyError:
        cve_item_dict['confidentialityImpact_V2'] = None
        
        
    try:
        cve_item_dict['integrityImpact_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['integrityImpact']
    except KeyError:
        cve_item_dict['integrityImpact_V2'] = None
        
        
    try:
        cve_item_dict['availabilityImpact_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['availabilityImpact']
    except KeyError:
        cve_item_dict['availabilityImpact_V2'] = None
        
        
    try:
        cve_item_dict['baseScore_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['baseScore']
    except KeyError:
        cve_item_dict['baseScore_V2'] = None
        
        
        
    try:
        cve_item_dict['vectorString_V2'] = cve_item['impact']['baseMetricV2']['cvssV2']['vectorString']
    except KeyError:
        cve_item_dict['vectorString_V2'] = None
        
        
    try:
        cve_item_dict['severity_V2'] = cve_item['impact']['baseMetricV2']['severity']
    except KeyError:
        cve_item_dict['severity_V2'] = None
        
        
    try:
        cve_item_dict['exploitabilityScore_V2'] = cve_item['impact']['baseMetricV2']['exploitabilityScore']
    except KeyError:
        cve_item_dict['exploitabilityScore_V2'] = None
        
        
    try:
        cve_item_dict['impactScore_V2'] = cve_item['impact']['baseMetricV2']['impactScore']
    except KeyError:
        cve_item_dict['impactScore_V2'] = None
        
        
    try:
        cve_item_dict['acInsufInfo_V2'] = cve_item['impact']['baseMetricV2']['acInsufInfo']
    except KeyError:
        cve_item_dict['acInsufInfo_V2'] = None
        
        
    try:
        cve_item_dict['obtainAllPrivilege_V2'] = cve_item['impact']['baseMetricV2']['obtainAllPrivilege']
    except KeyError:
        cve_item_dict['obtainAllPrivilege_V2'] = None
        
        
    try:
        cve_item_dict['obtainUserPrivilege_V2'] = cve_item['impact']['baseMetricV2']['obtainUserPrivilege']
    except KeyError:
        cve_item_dict['obtainUserPrivilege_V2'] = None
        
        
    try:
        cve_item_dict['obtainOtherPrivilege_V2'] = cve_item['impact']['baseMetricV2']['obtainOtherPrivilege']
    except KeyError:
        cve_item_dict['obtainOtherPrivilege_V2'] = None
        
        
    try:
        cve_item_dict['userInteractionRequired_V2'] = cve_item['impact']['baseMetricV2']['userInteractionRequired']
    except KeyError:
        cve_item_dict['userInteractionRequired_V2'] = None
        
    
        
    ### get temporal data ###
    
    try:
        cve_item_dict['publishedDate'] = cve_item['publishedDate']
    except KeyError:
        cve_item_dict['publishedDate'] = None
        
        
    try:
        cve_item_dict['lastModifiedDate'] = cve_item['lastModifiedDate']
    except KeyError:
        cve_item_dict['lastModifiedDate'] = None
        
    
    
    cve_item_array.append(cve_item_dict)
    

In [6]:
df_cve = pd.DataFrame(cve_item_array)
df_cve.head()

Unnamed: 0,data_type,data_format,data_version,ID,assigner,cwe_val,description,vectorString_V3,attackVector_V3,attackComplexity_V3,...,severity_V2,exploitabilityScore_V2,impactScore_V2,acInsufInfo_V2,obtainAllPrivilege_V2,obtainUserPrivilege_V2,obtainOtherPrivilege_V2,userInteractionRequired_V2,publishedDate,lastModifiedDate
0,CVE,MITRE,4.0,CVE-2019-0001,cve@mitre.org,[CWE-400],Receipt of a malformed packet on MX Series dev...,CVSS:3.0/AV:N/AC:H/PR:N/UI:N/S:U/C:N/I:N/A:H,NETWORK,HIGH,...,HIGH,8.6,6.9,False,False,False,False,False,2019-01-15T21:29Z,2020-07-22T18:00Z
1,CVE,MITRE,4.0,CVE-2019-0002,cve@mitre.org,[CWE-20],"On EX2300 and EX3400 series, stateless firewal...",CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H,NETWORK,LOW,...,HIGH,10.0,6.4,False,False,False,False,False,2019-01-15T21:29Z,2020-07-22T18:00Z
2,CVE,MITRE,4.0,CVE-2019-0003,cve@mitre.org,[CWE-20],When a specific BGP flowspec configuration is ...,CVSS:3.0/AV:N/AC:H/PR:N/UI:N/S:U/C:N/I:N/A:H,NETWORK,HIGH,...,MEDIUM,8.6,2.9,False,False,False,False,False,2019-01-15T21:29Z,2019-10-09T23:43Z
3,CVE,MITRE,4.0,CVE-2019-0004,cve@mitre.org,[CWE-200],"On Juniper ATP, the API key and the device key...",CVSS:3.0/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:N/A:N,LOCAL,LOW,...,LOW,3.9,2.9,False,False,False,False,False,2019-01-15T21:29Z,2019-10-09T23:43Z
4,CVE,MITRE,4.0,CVE-2019-0005,cve@mitre.org,[CWE-770],"On EX2300, EX3400, EX4600, QFX3K and QFX5K ser...",CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:L/A:N,NETWORK,LOW,...,MEDIUM,10.0,2.9,False,False,False,False,False,2019-01-15T21:29Z,2020-08-24T17:37Z


In [7]:
df_cve.isna().mean()

data_type                     0.000000
data_format                   0.000000
data_version                  0.000000
ID                            0.000000
assigner                      0.000000
cwe_val                       0.000000
description                   0.000000
vectorString_V3               0.060737
attackVector_V3               0.060737
attackComplexity_V3           0.060737
privilegesRequired_V3         0.060737
userInteraction_V3            0.060737
scope_V3                      0.060737
confidentialityImpact_V3      0.060737
integrityImpact_V3            0.060737
availabilityImpact_V3         0.060737
baseScore_V3                  0.060737
baseSeverity_V3               0.060737
exploitabilityScore_V3        0.060737
impactScore_V3                0.060737
vectorString_V2               0.060737
accessVector_V2               0.060737
accessComplexity_V2           0.060737
authentication_V2             0.060737
confidentialityImpact_V2      0.060737
integrityImpact_V2       

In [12]:
for descp in df_cve['description']:
    if 'wild' in descp.split():
        print(descp)
        print(20*'=')

A type confusion vulnerability can occur when manipulating JavaScript objects due to issues in Array.pop. This can allow for an exploitable crash. We are aware of targeted attacks in the wild abusing this flaw. This vulnerability affects Firefox ESR < 60.7.1, Firefox < 67.0.3, and Thunderbird < 60.7.2.

An issue was discovered in Foxit PhantomPDF before 8.3.11. The application could crash when calling xfa.event.rest XFA JavaScript due to accessing a wild pointer.

Incorrect alias information in IonMonkey JIT compiler for setting array elements could lead to a type confusion. We are aware of targeted attacks in the wild abusing this flaw. This vulnerability affects Firefox ESR < 68.4.1, Thunderbird < 68.4.1, and Firefox < 72.0.1.

NETGEAR SRX5308 4.3.5-3 devices allow SQL Injection, as exploited in the wild in September 2019 to add a new user account.

An issue was discovered in Foxit 3D Plugin Beta before 9.4.0.16807 for Foxit Reader and PhantomPDF. The application could encounter a Us