# Overview
1. Add CVE Descriptions to Top 25 removing newlines, tabs,... and escaping as required
2. Remove entries where CWE is not a CWE e.g. NVD-CWE-Insufficient-Info, UNSURE

In [2]:
from IPython.core.magic import register_cell_magic
from IPython.display import Markdown
import datetime
from datetime import date
import glob
import json
import logging
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly
import warnings
import csv



In [3]:
df_cve = pd.read_csv('./data_out/CVSSData.csv.gz', quoting=csv.QUOTE_ALL, escapechar='\\', compression='gzip')
df_cve

Unnamed: 0,CVE,Published,Description,AttackVector CVSS3,AttackComplexity CVSS3,PrivilegesRequired CVSS3,UserInteraction CVSS3,Scope CVSS3,ConfidentialityImpact CVSS3,IntegrityImpact CVSS3,...,AccessComplexity CVSS2,Authentication CVSS2,ConfidentialityImpact CVSS2,IntegrityImpact CVSS2,AvailabilityImpact CVSS2,BaseScore CVSS2,BaseSeverity CVSS2,ExploitabilityScore CVSS2,ImpactScore CVSS2,CWEs
0,CVE-2021-3002,2021-01-01,Seo Panel 4.8.0 allows reflected XSS via the s...,NETWORK,LOW,NONE,REQUIRED,CHANGED,LOW,LOW,...,MEDIUM,NONE,NONE,PARTIAL,NONE,4.3,Missing_Data,8.6,2.9,['CWE-79']
1,CVE-2021-3005,2021-01-03,MK-AUTH through 19.01 K4.9 allows remote attac...,NETWORK,LOW,LOW,NONE,UNCHANGED,LOW,NONE,...,LOW,SINGLE,PARTIAL,NONE,NONE,4.0,Missing_Data,8.0,2.9,['NVD-CWE-noinfo']
2,CVE-2021-3004,2021-01-03,The _deposit function in the smart contract im...,NETWORK,LOW,NONE,NONE,UNCHANGED,NONE,HIGH,...,LOW,NONE,NONE,PARTIAL,NONE,5.0,Missing_Data,10.0,2.9,['CWE-682']
3,CVE-2021-3006,2021-01-03,The breed function in the smart contract imple...,NETWORK,LOW,NONE,NONE,UNCHANGED,NONE,HIGH,...,LOW,NONE,NONE,PARTIAL,NONE,5.0,Missing_Data,10.0,2.9,['NVD-CWE-Other']
4,CVE-2021-3007,2021-01-04,"Laminas Project laminas-http before 2.14.2, an...",NETWORK,LOW,NONE,NONE,UNCHANGED,HIGH,HIGH,...,LOW,NONE,PARTIAL,PARTIAL,PARTIAL,7.5,Missing_Data,10.0,6.4,['CWE-502']
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46311,CVE-2022-48942,2024-08-22,"In the Linux kernel, the following vulnerabili...",LOCAL,LOW,LOW,NONE,UNCHANGED,NONE,NONE,...,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,0.0,Missing_Data,0.0,0.0,['CWE-476']
46312,CVE-2022-48943,2024-08-22,"In the Linux kernel, the following vulnerabili...",LOCAL,LOW,LOW,NONE,UNCHANGED,HIGH,HIGH,...,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,0.0,Missing_Data,0.0,0.0,['NVD-CWE-noinfo']
46313,CVE-2022-48926,2024-08-22,"In the Linux kernel, the following vulnerabili...",LOCAL,LOW,LOW,NONE,UNCHANGED,HIGH,HIGH,...,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,0.0,Missing_Data,0.0,0.0,['NVD-CWE-noinfo']
46314,CVE-2022-48936,2024-08-22,"In the Linux kernel, the following vulnerabili...",LOCAL,LOW,LOW,NONE,UNCHANGED,NONE,NONE,...,Missing_Data,Missing_Data,Missing_Data,Missing_Data,Missing_Data,0.0,Missing_Data,0.0,0.0,['NVD-CWE-noinfo']


In [4]:
df = pd.read_csv('./data_in/top25-mitre-mapping-analysis-2023-public.csv')

df

Unnamed: 0,CVE,New CWE,Reasoning
0,CVE-2021-0674,CWE-20,"(Chains: CWE-20->CWE-125) Desc: ""out of bounds..."
1,CVE-2021-0674,CWE-125,"(Chains: CWE-20->CWE-125) Desc: ""out of bounds..."
2,CVE-2021-0676,CWE-20,"(Chains: CWE-20->CWE-125) Desc: ""out of bounds..."
3,CVE-2021-0676,CWE-125,"(Chains: CWE-20->CWE-125) Desc: ""out of bounds..."
4,CVE-2021-0677,CWE-190,"(Chains: CWE-190->CWE-125) Desc: ""out of bound..."
...,...,...,...
9707,CVE-2022-29897,CWE-20,"""due to an improper input validation"". ref men..."
9708,CVE-2022-29922,CWE-20,"""Improper Input Validation"" in desc. NVD vendo..."
9709,CVE-2022-3001,CWE-20,"""improper input handling"" and NVD Reference ma..."
9710,CVE-2022-30232,CWE-20,"desc: ""CWE-20: Improper Input Validation"" and ..."


In [5]:
df = df.merge(df_cve[['CVE', 'Description']], on='CVE', how='left')
df

Unnamed: 0,CVE,New CWE,Reasoning,Description
0,CVE-2021-0674,CWE-20,"(Chains: CWE-20->CWE-125) Desc: ""out of bounds...","In alac decoder, there is a possible out of bo..."
1,CVE-2021-0674,CWE-125,"(Chains: CWE-20->CWE-125) Desc: ""out of bounds...","In alac decoder, there is a possible out of bo..."
2,CVE-2021-0676,CWE-20,"(Chains: CWE-20->CWE-125) Desc: ""out of bounds...","In geniezone driver, there is a possible out o..."
3,CVE-2021-0676,CWE-125,"(Chains: CWE-20->CWE-125) Desc: ""out of bounds...","In geniezone driver, there is a possible out o..."
4,CVE-2021-0677,CWE-190,"(Chains: CWE-190->CWE-125) Desc: ""out of bound...","In ccu driver, there is a possible out of boun..."
...,...,...,...,...
9707,CVE-2022-29897,CWE-20,"""due to an improper input validation"". ref men...",On various RAD-ISM-900-EN-* devices by PHOENIX...
9708,CVE-2022-29922,CWE-20,"""Improper Input Validation"" in desc. NVD vendo...",Improper Input Validation vulnerability in the...
9709,CVE-2022-3001,CWE-20,"""improper input handling"" and NVD Reference ma...",This vulnerability exists in Milesight Video M...
9710,CVE-2022-30232,CWE-20,"desc: ""CWE-20: Improper Input Validation"" and ...",A CWE-20: Improper Input Validation vulnerabil...


## Check for CVE Descriptions that are null

* 1 row has null description - CVE-2021-44228 - this is because the CVE was rejected.
* But we can still add the description from the CVE https://nvd.nist.gov/vuln/detail/CVE-2021-42248#VulnChangeHistorySection
* "GJSON <= 1.9.2 allows attackers to cause a redos via crafted JSON input."


In [6]:
df['Description'].isnull().sum()

1

In [7]:
null_description_rows = df[df['Description'].isnull()]
null_description_rows

Unnamed: 0,CVE,New CWE,Reasoning,Description
2849,CVE-2021-42248,CWE-1333,"""redos"" in desc",


In [8]:
df.loc[2849, 'Description'] = "GJSON <= 1.9.2 allows attackers to cause a redos via crafted JSON input."
df['Description'].isnull().sum()

0

In [9]:
df.drop('Reasoning', axis=1, inplace=True)
df

Unnamed: 0,CVE,New CWE,Description
0,CVE-2021-0674,CWE-20,"In alac decoder, there is a possible out of bo..."
1,CVE-2021-0674,CWE-125,"In alac decoder, there is a possible out of bo..."
2,CVE-2021-0676,CWE-20,"In geniezone driver, there is a possible out o..."
3,CVE-2021-0676,CWE-125,"In geniezone driver, there is a possible out o..."
4,CVE-2021-0677,CWE-190,"In ccu driver, there is a possible out of boun..."
...,...,...,...
9707,CVE-2022-29897,CWE-20,On various RAD-ISM-900-EN-* devices by PHOENIX...
9708,CVE-2022-29922,CWE-20,Improper Input Validation vulnerability in the...
9709,CVE-2022-3001,CWE-20,This vulnerability exists in Milesight Video M...
9710,CVE-2022-30232,CWE-20,A CWE-20: Improper Input Validation vulnerabil...


In [12]:
df = df[df['New CWE'].str.startswith('CWE-')]
df

Unnamed: 0,CVE,New CWE,Description
0,CVE-2021-0674,CWE-20,"In alac decoder, there is a possible out of bo..."
1,CVE-2021-0674,CWE-125,"In alac decoder, there is a possible out of bo..."
2,CVE-2021-0676,CWE-20,"In geniezone driver, there is a possible out o..."
3,CVE-2021-0676,CWE-125,"In geniezone driver, there is a possible out o..."
4,CVE-2021-0677,CWE-190,"In ccu driver, there is a possible out of boun..."
...,...,...,...
9707,CVE-2022-29897,CWE-20,On various RAD-ISM-900-EN-* devices by PHOENIX...
9708,CVE-2022-29922,CWE-20,Improper Input Validation vulnerability in the...
9709,CVE-2022-3001,CWE-20,This vulnerability exists in Milesight Video M...
9710,CVE-2022-30232,CWE-20,A CWE-20: Improper Input Validation vulnerabil...


In [13]:
# replace any sequence of whitespace characters (including newlines, tabs, and multiple spaces) with a single space.
df['Description'] = df['Description'].str.replace('\s+', ' ', regex=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Description'] = df['Description'].str.replace('\s+', ' ', regex=True)


In [14]:
df.to_csv("./data_out/top25-mitre-mapping-analysis-2023-public_with_cve_descriptions.csv", index=False, quoting=csv.QUOTE_ALL, escapechar='\\')