In [1]:
# Install if needed (run once in a Jupyter cell)
!pip install pandas



In [3]:
import pandas as pd
import re

# Regex pattern to detect CVE IDs like CVE-2023-12345
CVE_PATTERN = re.compile(r"CVE-\d{4}-\d{4,7}", re.IGNORECASE)

def extract_cves(text):
    """
    Return a list of unique CVEs found in a text.
    If none, return an empty list.
    """
    if pd.isna(text):
        return []
    matches = CVE_PATTERN.findall(str(text))
    # Normalize to upper-case + remove duplicates
    return sorted(set(m.upper() for m in matches))


In [5]:
# === 1) LOAD TENABLE DATA ===
tenable_path = "clean_tenable_plugins_213445_272165.csv"   # <-- filename (maybe change, depends download)
tenable = pd.read_csv(tenable_path)

# Quick sanity check
print("Tenable columns:", tenable.columns.tolist())
print("Tenable rows:", len(tenable))

# === 2) COMBINE TEXT FIELDS TO SEARCH FOR CVE ===
text_cols = ["Title", "Synopsis", "Description", "Solution"]

for col in text_cols:
    if col not in tenable.columns:
        tenable[col] = ""

tenable["combined_text"] = tenable[text_cols].astype(str).agg(" ".join, axis=1)

# === 3) EXTRACT CVE LIST PER ROW ===
tenable["CVE_list"] = tenable["combined_text"].apply(extract_cves)

# How many rows actually have at least one CVE?
tenable_with_cve = tenable[tenable["CVE_list"].str.len() > 0]
print("Tenable rows with CVE:", len(tenable_with_cve))

# === 4) EXPLODE SO EACH ROW = 1 CVE ===
tenable_exploded = tenable_with_cve.explode("CVE_list").rename(columns={"CVE_list": "CVE"})

# Keep only important columns for now
tenable_exploded = tenable_exploded[[
    "CVE",
    "Plugin ID",
    "Title",
    "Severity",
    "CVSS Score",
    "Family",
    "Published",
    "Exploit",
    "Source URL",
]]

# Optional: rename column to simpler names
tenable_exploded = tenable_exploded.rename(columns={
    "Plugin ID": "Plugin_ID",
    "CVSS Score": "CVSS_Score",
    "Source URL": "Source_URL"
})

tenable_exploded["Source"] = "Tenable"

tenable_exploded.head()

Tenable columns: ['Plugin ID', 'Title', 'Severity', 'Synopsis', 'Description', 'Solution', 'CVSS Score', 'Family', 'Published', 'Exploit', 'Source URL']
Tenable rows: 57853
Tenable rows with CVE: 55098


Unnamed: 0,CVE,Plugin_ID,Title,Severity,CVSS_Score,Family,Published,Exploit,Source_URL,Source
0,CVE-2023-28370,213445,Debian dla-4007 : python-tornado-doc - securit...,Medium,6.4,Debian Local Security Checks,1/1/2025,No known exploits are available,https://www.tenable.com/plugins/nessus/213445,Tenable
0,CVE-2024-52804,213445,Debian dla-4007 : python-tornado-doc - securit...,Medium,6.4,Debian Local Security Checks,1/1/2025,No known exploits are available,https://www.tenable.com/plugins/nessus/213445,Tenable
1,CVE-2024-40896,213446,Fedora 40 : libxml2 (2024-9f3765a04b),Critical,7.5,Fedora Local Security Checks,1/1/2025,No known exploits are available,https://www.tenable.com/plugins/nessus/213446,Tenable
8,CVE-2024-53899,213453,RHEL 8 : python36:3.6 (RHSA-2025:0002),High,6.8,Red Hat Local Security Checks,1/1/2025,Exploits are available,https://www.tenable.com/plugins/nessus/213453,Tenable
13,CVE-2024-56378,213458,SUSE SLED15 / SLES15 / openSUSE 15 Security Up...,Medium,3.2,SuSE Local Security Checks,1/2/2025,Exploits are available,https://www.tenable.com/plugins/nessus/213458,Tenable


In [10]:
tenable_exploded.to_csv("tenable_with_cve_exploded.csv", index=False)
print("Saved: tenable_with_cve_exploded.csv")

Saved: tenable_with_cve_exploded.csv
