# Get CVSS Data

1. Download the NVD JSON files per year
2. Extract the fields we want from JSON to CSV
3. Save the file

In [None]:
#based on code contributed via https://gist.github.com/jgamblin/7a927997b1f1e35cc7f4f1788ee5eae1

In [1]:
%%capture
!mkdir -p jsondata
%cd jsondata
!rm *.json 
!rm *.zip 
!wget https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{2022..2023}.json.zip 
!unzip -o "*.zip" 
!date > date.txt 

### Import Python Libraries

In [2]:
from IPython.core.magic import register_cell_magic
from IPython.display import Markdown
import datetime
from datetime import date
import glob
import json
import logging
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly
import warnings
import csv

@register_cell_magic
def markdown(line, cell):
    return Markdown(cell.format(**globals()))


logging.getLogger('matplotlib.font_manager').disabled = True
warnings.filterwarnings("ignore")
pd.set_option('display.width', 500)
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 10)

### Build Base DataFrame

This code builds a Panda dataframe from the JSON files we downloaded, removing all CVE's marked rejected.

In [3]:
row_accumulator = []
for filename in glob.glob('nvdcve-1.1-*.json'):

    with open(filename, 'r', encoding='utf-8') as f:
        nvd_data = json.load(f)
        for entry in nvd_data['CVE_Items']:
            cve = entry['cve']['CVE_data_meta']['ID']
            try:
                published_date = entry['publishedDate']
            except KeyError:
                published_date = 'Missing_Data'
            try:
                description = entry['cve']['description']['description_data'][0]['value']
            except IndexError:
                description = ''
            try:
                attack_vector_v3 = entry['impact']['baseMetricV3']['cvssV3']['attackVector']
            except KeyError:
                attack_vector_v3 = 'Missing_Data'
            try:
                attack_complexity_v3 = entry['impact']['baseMetricV3']['cvssV3']['attackComplexity']
            except KeyError:
                attack_complexity_v3 = 'Missing_Data'
            try:
                privileges_required_v3 = entry['impact']['baseMetricV3']['cvssV3']['privilegesRequired']
            except KeyError:
                privileges_required_v3 = 'Missing_Data'
            try:
                user_interaction_v3 = entry['impact']['baseMetricV3']['cvssV3']['userInteraction']
            except KeyError:
                user_interaction_v3 = 'Missing_Data'
            try:
                scope_v3 = entry['impact']['baseMetricV3']['cvssV3']['scope']
            except KeyError:
                scope_v3 = 'Missing_Data'
            try:
                confidentiality_impact_v3 = entry['impact']['baseMetricV3']['cvssV3']['confidentialityImpact']
            except KeyError:
                confidentiality_impact_v3 = 'Missing_Data'
            try:
                integrity_impact_v3 = entry['impact']['baseMetricV3']['cvssV3']['integrityImpact']
            except KeyError:
                integrity_impact_v3 = 'Missing_Data'
            try:
                availability_impact_v3 = entry['impact']['baseMetricV3']['cvssV3']['availabilityImpact']
            except KeyError:
                availability_impact_v3 = 'Missing_Data'
            try:
                base_score_v3 = entry['impact']['baseMetricV3']['cvssV3']['baseScore']
            except KeyError:
                base_score_v3 = '0.0'
            try:
                base_severity_v3 = entry['impact']['baseMetricV3']['cvssV3']['baseSeverity']
            except KeyError:
                base_severity_v3 = 'Missing_Data'
            try:
                exploitability_score_v3 = entry['impact']['baseMetricV3']['exploitabilityScore']
            except KeyError:
                exploitability_score_v3 = '0.0'
            try:
                impact_score_v3 = entry['impact']['baseMetricV3']['impactScore']
            except KeyError:
                impact_score_v3 = '0.0'
            try:
                access_vector_v2 = entry['impact']['baseMetricV2']['cvssV2']['accessVector']
            except KeyError:
                access_vector_v2 = 'Missing_Data'
            try:
                access_complexity_v2 = entry['impact']['baseMetricV2']['cvssV2']['accessComplexity']
            except KeyError:
                access_complexity_v2 = 'Missing_Data'
            try:
                authentication_v2 = entry['impact']['baseMetricV2']['cvssV2']['authentication']
            except KeyError:
                authentication_v2 = 'Missing_Data'
            try:
                confidentiality_impact_v2 = entry['impact']['baseMetricV2']['cvssV2']['confidentialityImpact']
            except KeyError:
                confidentiality_impact_v2 = 'Missing_Data'
            try:
                integrity_impact_v2 = entry['impact']['baseMetricV2']['cvssV2']['integrityImpact']
            except KeyError:
                integrity_impact_v2 = 'Missing_Data'
            try:
                availability_impact_v2 = entry['impact']['baseMetricV2']['cvssV2']['availabilityImpact']
            except KeyError:
                availability_impact_v2 = 'Missing_Data'
            try:
                base_score_v2 = entry['impact']['baseMetricV2']['cvssV2']['baseScore']
            except KeyError:
                base_score_v2 = '0.0'
            try:
                base_severity_v2 = entry['impact']['baseMetricV2']['cvssV2']['severity']
            except KeyError:
                base_severity_v2 = 'Missing_Data'
            try:
                exploitability_score_v2 = entry['impact']['baseMetricV2']['exploitabilityScore']
            except KeyError:
                exploitability_score_v2 = '0.0'
            try:
                impact_score_v2 = entry['impact']['baseMetricV2']['impactScore']
            except KeyError:
                impact_score_v2 = '0.0'
            try:
                #some CVEs have more than one CWE e.g. CVE-2023-0058
                cwe_values = [desc['value'] for desc in entry['cve']['problemtype']['problemtype_data'][0]['description']]
            except KeyError:
                cwe_values = 'Missing_Data'
            new_row = { 
                'CVE': cve, 
                'Published': published_date,
                'Description': description,
                'AttackVector CVSS3': attack_vector_v3,
                'AttackComplexity CVSS3': attack_complexity_v3,
                'PrivilegesRequired CVSS3': privileges_required_v3,
                'UserInteraction CVSS3': user_interaction_v3,
                'Scope CVSS3': scope_v3,
                'ConfidentialityImpact CVSS3': confidentiality_impact_v3,
                'IntegrityImpact CVSS3': integrity_impact_v3,
                'AvailabilityImpact CVSS3': availability_impact_v3,
                'BaseScore CVSS3': base_score_v3,
                'BaseSeverity CVSS3': base_severity_v3,
                'ExploitabilityScore CVSS3': exploitability_score_v3,
                'ImpactScore CVSS3': impact_score_v3,
                'AccessVector CVSS2': access_vector_v2,
                'AccessComplexity CVSS2': access_complexity_v2,
                'Authentication CVSS2': authentication_v2,
                'ConfidentialityImpact CVSS2': confidentiality_impact_v2,
                'IntegrityImpact CVSS2': integrity_impact_v2,
                'AvailabilityImpact CVSS2': availability_impact_v2,
                'BaseScore CVSS2': base_score_v2,
                'BaseSeverity CVSS2': base_severity_v2,
                'ExploitabilityScore CVSS2': exploitability_score_v2,
                'ImpactScore CVSS2': impact_score_v2,
                'CWEs': cwe_values
            }
            if not (description.startswith('** REJECT **') | (description.startswith('Rejected reason:'))): # disputed, rejected and other non issues start with
                row_accumulator.append(new_row)
        nvd = pd.DataFrame(row_accumulator)

# Example Rejected reasons:
#  Rejected reason: DO NOT USE THIS CANDIDATE NUMBER
#  Rejected reason: This candidate is unused by its CNA.     

In [4]:
nvd

Unnamed: 0,CVE,Published,Description,AttackVector CVSS3,AttackComplexity CVSS3,...,BaseScore CVSS2,BaseSeverity CVSS2,ExploitabilityScore CVSS2,ImpactScore CVSS2,CWEs
0,CVE-2021-0001,2021-06-09T20:15Z,Observable timing discrepancy in Intel(R) IPP ...,LOCAL,HIGH,...,2.1,Missing_Data,3.9,2.9,[CWE-203]
1,CVE-2021-0002,2021-08-11T13:15Z,Improper conditions check in some Intel(R) Eth...,LOCAL,LOW,...,3.6,Missing_Data,3.9,4.9,[CWE-754]
2,CVE-2021-0003,2021-08-11T13:15Z,Improper conditions check in some Intel(R) Eth...,LOCAL,LOW,...,2.1,Missing_Data,3.9,2.9,[CWE-755]
3,CVE-2021-0004,2021-08-11T13:15Z,Improper buffer restrictions in the firmware o...,LOCAL,LOW,...,2.1,Missing_Data,3.9,2.9,[CWE-119]
4,CVE-2021-0005,2021-08-11T13:15Z,Uncaught exception in firmware for Intel(R) Et...,LOCAL,LOW,...,2.1,Missing_Data,3.9,2.9,[CWE-755]
...,...,...,...,...,...,...,...,...,...,...,...
221684,CVE-2022-4958,2024-01-11T12:15Z,A vulnerability classified as problematic has ...,Missing_Data,Missing_Data,...,0.0,Missing_Data,0.0,0.0,[CWE-79]
221685,CVE-2022-4959,2024-01-11T21:15Z,A vulnerability classified as problematic was ...,Missing_Data,Missing_Data,...,0.0,Missing_Data,0.0,0.0,[CWE-79]
221686,CVE-2022-4960,2024-01-12T03:15Z,"A vulnerability, which was classified as probl...",Missing_Data,Missing_Data,...,0.0,Missing_Data,0.0,0.0,[CWE-79]
221687,CVE-2022-4961,2024-01-12T05:15Z,A vulnerability was found in Weitong Mall 1.0....,Missing_Data,Missing_Data,...,0.0,Missing_Data,0.0,0.0,[CWE-89]


In [7]:
nvd['Published'] = pd.to_datetime(nvd['Published'])
nvd = nvd.sort_values(by=['Published'])
nvd = nvd.reset_index(drop=True)
nvd['Published'] = pd.to_datetime(nvd['Published']).apply(lambda x: x.date())

## Export to CSV

In [10]:
#quoting=csv.QUOTE_NONNUMERIC to ensure that fields with special characters are enclosed in double-quotes.
#escapechar='\\' to escape any double-quote characters within the data with a backslash (\).
nvd.to_csv('../../../data_out/CVSSData.csv.gz', index=False, quoting=csv.QUOTE_NONNUMERIC, escapechar='\\', compression='gzip')
