Step 1: Broze Layer - To update the json records from github to the delta lake to table.

In [0]:
# Simple setup - download CVE data and prepare bronze layer
import os
import json
import zipfile
import urllib.request
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *

print("🚀 Setting up CVE data for Graph Analytics demo...")
print("📊 Target: 2000 CVEs from 2024 (Bronze)")

# Temporary working directory for downloads/extractions
TMP_DIR = "/tmp/cve_demo"
os.makedirs(TMP_DIR, exist_ok=True)
print(f"📁 Temporary workspace: {TMP_DIR}")


🚀 Setting up CVE data for Graph Analytics demo...
📊 Target: 2000 CVEs from 2024 (Bronze)
📁 Temporary workspace: /tmp/cve_demo


In [0]:
print("📥 Downloading CVE repository for 2024 Bronze load...")

# Temp locations on the driver (local to the workspace runtime)
zip_dest    = "/tmp/cve_graph_demo/cvelistV5.zip"
extract_dir = "/tmp/cve_graph_demo/cvelistV5-main"

# Create working directory
os.makedirs("/tmp/cve_graph_demo", exist_ok=True)

# Download the repository ZIP (full CVE list V5)
zip_url = "https://github.com/CVEProject/cvelistV5/archive/refs/heads/main.zip"
print(f"📥 Downloading from: {zip_url}")

with urllib.request.urlopen(zip_url) as response:
    data = response.read()

with open(zip_dest, "wb") as f:
    f.write(data)

print(f"✅ Downloaded {len(data):,} bytes to {zip_dest}")

# Extract the ZIP so we can traverse cves/2024/*
print("📦 Extracting ZIP archive...")
with zipfile.ZipFile(zip_dest) as z:
    z.extractall("/tmp/cve_graph_demo/")

print(f"✅ Extraction complete at: {extract_dir}")
print("✅ Bronze 2024 Delta target (managed table): bronze_db.cve_bronze_2024")


📥 Downloading CVE repository for 2024 Bronze load...
📥 Downloading from: https://github.com/CVEProject/cvelistV5/archive/refs/heads/main.zip
✅ Downloaded 524,611,867 bytes to /tmp/cve_graph_demo/cvelistV5.zip
📦 Extracting ZIP archive...
✅ Extraction complete at: /tmp/cve_graph_demo/cvelistV5-main
✅ Bronze 2024 Delta target (managed table): bronze_db.cve_bronze_2024


In [0]:
# Process 2024 CVEs (no limit if max_files=None)
print("🎯 Processing 2024 CVEs...")

def process_year_cves(year, max_files=None):
    """Process CVEs for a specific year. If max_files is None, process all files."""
    cve_year_dir = f"{extract_dir}/cves/{year}"
    json_files = []
    print(f"📂 Scanning directory: {cve_year_dir}")

    if os.path.exists(cve_year_dir):
        file_count = 0
        for root, dirs, files in os.walk(cve_year_dir):  # recurse through subfolders
            # stop the outer loop too if we've hit the cap
            if max_files is not None and file_count >= max_files:
                break

            for file in files:
                if file.endswith(".json") and f"CVE-{year}-" in file:
                    if max_files is not None and file_count >= max_files:
                        break

                    file_path = os.path.join(root, file)
                    try:
                        with open(file_path, "r", encoding="utf-8") as f:
                            cve_data = json.load(f)   # load directly from file
                            json_files.append(cve_data)

                        file_count += 1
                        if file_count % 500 == 0:
                            print(f"   📊 Processed {file_count} CVE-{year} files...")

                    except Exception as e:
                        print(f"⚠️ Skipped {file}: {e}")
                        continue

        print(f"✅ Collected {len(json_files)} CVEs from {year}")
        return json_files
    else:
        print(f"❌ Directory not found: {cve_year_dir}")
        return []

# ALL 2024 files (no cap)
cves_2024 = process_year_cves(2024)   # pass a number to cap if needed later
print(f"📊 Total 2024 CVEs collected: {len(cves_2024)}")


🎯 Processing 2024 CVEs...
📂 Scanning directory: /tmp/cve_graph_demo/cvelistV5-main/cves/2024
   📊 Processed 500 CVE-2024 files...
   📊 Processed 1000 CVE-2024 files...
   📊 Processed 1500 CVE-2024 files...
   📊 Processed 2000 CVE-2024 files...
   📊 Processed 2500 CVE-2024 files...
   📊 Processed 3000 CVE-2024 files...
   📊 Processed 3500 CVE-2024 files...
   📊 Processed 4000 CVE-2024 files...
   📊 Processed 4500 CVE-2024 files...
   📊 Processed 5000 CVE-2024 files...
   📊 Processed 5500 CVE-2024 files...
   📊 Processed 6000 CVE-2024 files...
   📊 Processed 6500 CVE-2024 files...
   📊 Processed 7000 CVE-2024 files...
   📊 Processed 7500 CVE-2024 files...
   📊 Processed 8000 CVE-2024 files...
   📊 Processed 8500 CVE-2024 files...
   📊 Processed 9000 CVE-2024 files...
   📊 Processed 9500 CVE-2024 files...
   📊 Processed 10000 CVE-2024 files...
   📊 Processed 10500 CVE-2024 files...
   📊 Processed 11000 CVE-2024 files...
   📊 Processed 11500 CVE-2024 files...
   📊 Processed 12000 CVE-2024 

In [0]:
from pyspark.sql.functions import col


In [0]:
# --- Serverless-safe Bronze writer for ALL 2024 CVEs (no pandas, no sparkContext) ---

from pyspark.sql.functions import (
    current_timestamp,
    current_date,
    lit,
    monotonically_increasing_id,
    col,
)
import json

print("💾 Converting 2024 CVEs to Spark DataFrame and saving as a managed Delta table...")
print("🚀 No pandas, no sparkContext, no Arrow config changes — serverless-friendly.")

# 1) Make sure the DB exists (managed tables => no filesystem paths needed)
spark.sql("CREATE DATABASE IF NOT EXISTS bronze_db")

table_name = "bronze_db.cve_bronze_2024"

def save_cves_to_delta_managed(cves_list, year: int, table_name: str):
    """
    Save CVEs (list[dict]) to a MANAGED Delta table.

    Works on serverless because:
      * no pandas
      * no sparkContext
      * no spark.conf tweaks
    """
    if not cves_list:
        print(f"⚠️ No CVEs to save for {year}")
        return None

    print(f"📊 Normalizing and converting {len(cves_list)} CVEs to Spark DataFrame...")

    # --- KEY FIX: normalize the 'containers' field so Spark can infer a single type ---
    normalized = []
    for rec in cves_list:
        r = dict(rec)  # shallow copy so we don't mutate original list

        cont = r.get("containers", None)
        if cont is None:
            # keep it explicitly as None
            r["containers"] = None
        else:
            # always store as JSON string -> consistent StringType for all rows
            try:
                r["containers"] = json.dumps(cont)
            except Exception:
                # if something is really weird, just stringify it
                r["containers"] = json.dumps(str(cont))

        normalized.append(r)

    # list[dict] -> Spark DF (Spark infers nested schema for other fields; 'containers' is now string)
    df_raw = spark.createDataFrame(normalized)

    # Bronze metadata columns (same as before)
    df_bronze = (
        df_raw
        .withColumn("_ingestion_timestamp", current_timestamp())
        .withColumn("_ingestion_date", current_date())
        .withColumn("_year", lit(year))
        .withColumn("_record_id", monotonically_increasing_id())
    )

    record_count = df_bronze.count()
    print(f"📈 {year} CVE records going into table: {record_count:,}")

    # Write as MANAGED Delta table in the metastore
    (
        df_bronze.write
        .format("delta")
        .mode("overwrite")          # replace if you re-run the notebook
        .option("mergeSchema", "true")
        .option("overwriteSchema", "true")
        .option("delta.columnMapping.mode", "name")
        .saveAsTable(table_name)
    )

    print(f"✅ {year} Bronze managed table created/updated: {table_name}")
    return df_bronze

# 2) Save ALL 2024 CVEs to the managed Delta table
df_2024 = save_cves_to_delta_managed(cves_2024, 2024, table_name)

# 3) Verify the table
print("🔍 Verifying managed table read...")
bronze_2024 = spark.table(table_name)
print(f"✅ 2024 data loaded: {bronze_2024.count():,} records\n")

print("🔎 Sample 2024 (key fields):")
(
    bronze_2024
    .select(
        col("cveMetadata.cveId").alias("cve_id"),
        col("cveMetadata.datePublished").alias("date_published"),
    )
    .show(10, truncate=False)
)

print("\n📄 Table storage details:")
spark.sql(f"DESCRIBE DETAIL {table_name}").show(truncate=False)

print("\n🕘 Delta history:")
spark.sql(f"DESCRIBE HISTORY {table_name}").show(truncate=False)


💾 Converting 2024 CVEs to Spark DataFrame and saving as a managed Delta table...
🚀 No pandas, no sparkContext, no Arrow config changes — serverless-friendly.
📊 Normalizing and converting 38753 CVEs to Spark DataFrame...
📈 2024 CVE records going into table: 38,753
✅ 2024 Bronze managed table created/updated: bronze_db.cve_bronze_2024
🔍 Verifying managed table read...
✅ 2024 data loaded: 38,753 records

🔎 Sample 2024 (key fields):
+--------------+------------------------+
|cve_id        |date_published          |
+--------------+------------------------+
|CVE-2024-39339|2024-09-18T00:00:00     |
|CVE-2024-39881|2024-07-09T21:23:31.171Z|
|CVE-2024-39580|2024-09-10T08:54:54.258Z|
|CVE-2024-39864|2024-07-05T13:40:37.937Z|
|CVE-2024-39522|2024-07-11T15:58:26.205Z|
|CVE-2024-39591|2024-08-13T05:00:42.912Z|
|CVE-2024-39211|2024-07-04T00:00:00     |
|CVE-2024-39914|2024-07-12T14:46:44.696Z|
|CVE-2024-39520|2024-07-11T15:56:53.855Z|
|CVE-2024-39735|2024-07-15T02:05:09.065Z|
+--------------+-----

In [0]:
# Verify the data is ready for graph analytics (2024 Bronze only, managed table)
print("🔍 Verifying data for graph analytics...")

from pyspark.sql.functions import col  # <-- add this import

try:
    table_name = "bronze_db.cve_bronze_2024"
    bronze_2024 = spark.table(table_name)

    # Count
    cnt = bronze_2024.count()
    print(f"✅ 2024 data loaded: {cnt} records")

    # Sample key fields
    print("\n🔍 Sample 2024 data (key fields for graph):")
    bronze_2024.select(
        col("cveMetadata.cveId").alias("cve_id"),
        col("cveMetadata.datePublished").alias("date_published")
    ).show(5, truncate=False)

    print("\n🚀 SUCCESS! Data is ready for Graph Analytics demo!")
    print(f"📚 Bronze table: {table_name}")

    # (Optional) storage details
    spark.sql(f"DESCRIBE DETAIL {table_name}").show(truncate=False)

except Exception as e:
    print(f"❌ Error verifying data: {e}")
    print("Check that the table bronze_db.cve_bronze_2024 exists, then re-run.")


🔍 Verifying data for graph analytics...
✅ 2024 data loaded: 38753 records

🔍 Sample 2024 data (key fields for graph):
+--------------+------------------------+
|cve_id        |date_published          |
+--------------+------------------------+
|CVE-2024-39339|2024-09-18T00:00:00     |
|CVE-2024-39881|2024-07-09T21:23:31.171Z|
|CVE-2024-39580|2024-09-10T08:54:54.258Z|
|CVE-2024-39864|2024-07-05T13:40:37.937Z|
|CVE-2024-39522|2024-07-11T15:58:26.205Z|
+--------------+------------------------+
only showing top 5 rows

🚀 SUCCESS! Data is ready for Graph Analytics demo!
📚 Bronze table: bronze_db.cve_bronze_2024
+------+------------------------------------+-----------------------------------+-----------+--------+-----------------------+-------------------+----------------+-----------------+--------+-----------+---------------------------------------------------------------------------------------------------------------+----------------+----------------+--------------------------------------

In [0]:
%sql
-- Sanity check of the table created
DESCRIBE DETAIL bronze_db.cve_bronze_2024;

format,id,name,description,location,createdAt,lastModified,partitionColumns,clusteringColumns,numFiles,sizeInBytes,properties,minReaderVersion,minWriterVersion,tableFeatures,statistics,clusterByAuto
delta,fe30b537-6264-40d4-9f8e-41f263e1d618,workspace.bronze_db.cve_bronze_2024,,,2025-11-12T20:47:08.393Z,2025-11-17T02:27:31.000Z,List(),List(),8,17598645,"Map(delta.columnMapping.mode -> name, delta.enableDeletionVectors -> true, delta.columnMapping.maxColumnId -> 269)",3,7,"List(appendOnly, columnMapping, deletionVectors, invariants)","Map(numRowsDeletedByDeletionVectors -> 0, numDeletionVectors -> 0)",False


In [0]:
%sql
-- count of the table created
SELECT count(*) FROM bronze_db.cve_bronze_2024 ;

count(*)
38753


In [0]:
%sql
select * from bronze_db.cve_bronze_2024
limit 5

containers,cveMetadata,dataType,dataVersion,_ingestion_timestamp,_ingestion_date,_year,_record_id
"{""cna"": {""providerMetadata"": {""orgId"": ""8254265b-2729-46b6-b9e3-3dfca2d5bfca"", ""shortName"": ""mitre"", ""dateUpdated"": ""2024-09-18T19:31:26.885930""}, ""descriptions"": [{""lang"": ""en"", ""value"": ""A vulnerability has been discovered in all versions of Smartplay headunits, which are widely used in Suzuki and Toyota cars. This misconfiguration can lead to information disclosure, leaking sensitive details such as diagnostic log traces, system logs, headunit passwords, and personally identifiable information (PII). The exposure of such information may have serious implications for user privacy and system integrity.""}], ""affected"": [{""vendor"": ""n/a"", ""product"": ""n/a"", ""versions"": [{""version"": ""n/a"", ""status"": ""affected""}]}], ""references"": [{""url"": ""https://docs.google.com/document/d/1S-d8zyZreYYGSIr4zGww6F2iBfD63v10Z3YVbGnp2es/edit?usp=sharing""}, {""url"": ""https://mohammedshine.github.io/CVE-2024-39339.html""}], ""problemTypes"": [{""descriptions"": [{""type"": ""text"", ""lang"": ""en"", ""description"": ""n/a""}]}]}, ""adp"": [{""problemTypes"": [{""descriptions"": [{""type"": ""CWE"", ""cweId"": ""CWE-922"", ""lang"": ""en"", ""description"": ""CWE-922 Insecure Storage of Sensitive Information""}]}], ""affected"": [{""vendor"": ""globalsuzuki"", ""product"": ""smartplay_headunit_firmware"", ""cpes"": [""cpe:2.3:o:globalsuzuki:smartplay_headunit_firmware:*:*:*:*:*:*:*:*""], ""defaultStatus"": ""unknown"", ""versions"": [{""version"": ""0"", ""status"": ""affected"", ""lessThan"": ""*"", ""versionType"": ""custom""}]}], ""metrics"": [{""cvssV3_1"": {""scope"": ""UNCHANGED"", ""version"": ""3.1"", ""baseScore"": 7.5, ""attackVector"": ""NETWORK"", ""baseSeverity"": ""HIGH"", ""vectorString"": ""CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N"", ""integrityImpact"": ""NONE"", ""userInteraction"": ""NONE"", ""attackComplexity"": ""LOW"", ""availabilityImpact"": ""NONE"", ""privilegesRequired"": ""NONE"", ""confidentialityImpact"": ""HIGH""}}, {""other"": {""type"": ""ssvc"", ""content"": {""timestamp"": ""2024-11-05T19:10:53.031018Z"", ""id"": ""CVE-2024-39339"", ""options"": [{""Exploitation"": ""none""}, {""Automatable"": ""yes""}, {""Technical Impact"": ""partial""}], ""role"": ""CISA Coordinator"", ""version"": ""2.0.3""}}}], ""title"": ""CISA ADP Vulnrichment"", ""providerMetadata"": {""orgId"": ""134c704f-9b21-4f2e-91b3-4a467353bcc0"", ""shortName"": ""CISA-ADP"", ""dateUpdated"": ""2024-11-05T19:28:34.301Z""}}]}","Map(datePublished -> 2024-09-18T00:00:00, cveId -> CVE-2024-39339, assignerOrgId -> 8254265b-2729-46b6-b9e3-3dfca2d5bfca, state -> PUBLISHED, assignerShortName -> mitre, dateReserved -> 2024-06-24T00:00:00, dateUpdated -> 2024-11-05T19:28:34.301Z)",CVE_RECORD,5.1,2025-11-17T02:27:25.277Z,2025-11-17,2024,33908
"{""cna"": {""affected"": [{""defaultStatus"": ""unaffected"", ""product"": ""CNCSoft-G2"", ""vendor"": ""Delta Electronics"", ""versions"": [{""status"": ""affected"", ""version"": ""2.0.0.5""}]}], ""credits"": [{""lang"": ""en"", ""type"": ""finder"", ""value"": ""Bobby Gould and Fritz Sands of Trend Micro Zero Day Initiative reported these vulnerabilities to CISA.""}], ""descriptions"": [{""lang"": ""en"", ""supportingMedia"": [{""base64"": false, ""type"": ""text/html"", ""value"": ""\n\nDelta Electronics CNCSoft-G2 lacks proper validation of user-supplied data, which can result in a memory corruption condition. If a target visits a malicious page or opens a malicious file an attacker can leverage this vulnerability to execute code in the context of the current process.\n\n""}], ""value"": ""Delta Electronics CNCSoft-G2 lacks proper validation of user-supplied data, which can result in a memory corruption condition. If a target visits a malicious page or opens a malicious file an attacker can leverage this vulnerability to execute code in the context of the current process.""}], ""metrics"": [{""cvssV4_0"": {""Automatable"": ""NOT_DEFINED"", ""Recovery"": ""NOT_DEFINED"", ""Safety"": ""NOT_DEFINED"", ""attackComplexity"": ""LOW"", ""attackRequirements"": ""NONE"", ""attackVector"": ""LOCAL"", ""baseScore"": 8.4, ""baseSeverity"": ""HIGH"", ""privilegesRequired"": ""NONE"", ""providerUrgency"": ""NOT_DEFINED"", ""subAvailabilityImpact"": ""NONE"", ""subConfidentialityImpact"": ""NONE"", ""subIntegrityImpact"": ""NONE"", ""userInteraction"": ""ACTIVE"", ""valueDensity"": ""NOT_DEFINED"", ""vectorString"": ""CVSS:4.0/AV:L/AC:L/AT:N/PR:N/UI:A/VC:H/VI:H/VA:H/SC:N/SI:N/SA:N"", ""version"": ""4.0"", ""vulnAvailabilityImpact"": ""HIGH"", ""vulnConfidentialityImpact"": ""HIGH"", ""vulnIntegrityImpact"": ""HIGH"", ""vulnerabilityResponseEffort"": ""NOT_DEFINED""}, ""format"": ""CVSS"", ""scenarios"": [{""lang"": ""en"", ""value"": ""GENERAL""}]}], ""problemTypes"": [{""descriptions"": [{""cweId"": ""CWE-787"", ""description"": ""CWE-787 Out-of-bounds Write"", ""lang"": ""en"", ""type"": ""CWE""}]}], ""providerMetadata"": {""orgId"": ""7d14cffa-0d7d-4270-9dc0-52cabd5a23a6"", ""shortName"": ""icscert"", ""dateUpdated"": ""2024-07-09T21:23:31.171Z""}, ""references"": [{""tags"": [""government-resource""], ""url"": ""https://www.cisa.gov/news-events/ics-advisories/icsa-24-191-01""}], ""solutions"": [{""lang"": ""en"", ""supportingMedia"": [{""base64"": false, ""type"": ""text/html"", ""value"": ""Delta Electronics recommends users update to CNCSoft-G2 V2.1.0.10 or later.\n\n ""}], ""value"": ""Delta Electronics recommends users update to CNCSoft-G2 V2.1.0.10 https://downloadcenter.deltaww.com/en-US/DownloadCenter \u00a0or later.""}], ""source"": {""discovery"": ""UNKNOWN""}, ""title"": ""Out-of-bounds Write in Delta Electronics CNCSoft-G2"", ""x_generator"": {""engine"": ""Vulnogram 0.2.0""}}, ""adp"": [{""metrics"": [{""other"": {""type"": ""ssvc"", ""content"": {""id"": ""CVE-2024-39881"", ""role"": ""CISA Coordinator"", ""options"": [{""Exploitation"": ""none""}, {""Automatable"": ""no""}, {""Technical Impact"": ""total""}], ""version"": ""2.0.3"", ""timestamp"": ""2024-07-10T15:22:25.578143Z""}}}], ""affected"": [{""cpes"": [""cpe:2.3:a:delta_electronics:cncsoft-g2:2.0.0.5:*:*:*:*:*:*:*""], ""vendor"": ""delta_electronics"", ""product"": ""cncsoft-g2"", ""versions"": [{""status"": ""affected"", ""version"": ""2.0.0.5""}], ""defaultStatus"": ""unaffected""}], ""title"": ""CISA ADP Vulnrichment"", ""providerMetadata"": {""orgId"": ""134c704f-9b21-4f2e-91b3-4a467353bcc0"", ""shortName"": ""CISA-ADP"", ""dateUpdated"": ""2025-08-27T20:42:58.039Z""}}, {""providerMetadata"": {""orgId"": ""af854a3a-2127-422b-91ae-364da2661108"", ""shortName"": ""CVE"", ""dateUpdated"": ""2024-08-02T04:33:11.245Z""}, ""title"": ""CVE Program Container"", ""references"": [{""tags"": [""government-resource"", ""x_transferred""], ""url"": ""https://www.cisa.gov/news-events/ics-advisories/icsa-24-191-01""}]}]}","Map(datePublished -> 2024-07-09T21:23:31.171Z, cveId -> CVE-2024-39881, assignerOrgId -> 7d14cffa-0d7d-4270-9dc0-52cabd5a23a6, state -> PUBLISHED, assignerShortName -> icscert, dateReserved -> 2024-07-01T18:13:23.097Z, dateUpdated -> 2025-08-27T20:42:58.039Z)",CVE_RECORD,5.1,2025-11-17T02:27:25.277Z,2025-11-17,2024,33909
"{""cna"": {""affected"": [{""defaultStatus"": ""unaffected"", ""product"": ""PowerScale InsightIQ"", ""vendor"": ""Dell"", ""versions"": [{""lessThanOrEqual"": ""5.1"", ""status"": ""affected"", ""version"": ""5.0"", ""versionType"": ""semver""}]}], ""datePublic"": ""2024-09-09T06:30:00.000Z"", ""descriptions"": [{""lang"": ""en"", ""supportingMedia"": [{""base64"": false, ""type"": ""text/html"", ""value"": ""Dell PowerScale InsightIQ, versions 5.0 through 5.1, contains an Improper Access Control vulnerability. A high privileged attacker with local access could potentially exploit this vulnerability, leading to Elevation of privileges.""}], ""value"": ""Dell PowerScale InsightIQ, versions 5.0 through 5.1, contains an Improper Access Control vulnerability. A high privileged attacker with local access could potentially exploit this vulnerability, leading to Elevation of privileges.""}], ""metrics"": [{""cvssV3_1"": {""attackComplexity"": ""LOW"", ""attackVector"": ""LOCAL"", ""availabilityImpact"": ""HIGH"", ""baseScore"": 6.7, ""baseSeverity"": ""MEDIUM"", ""confidentialityImpact"": ""HIGH"", ""integrityImpact"": ""HIGH"", ""privilegesRequired"": ""HIGH"", ""scope"": ""UNCHANGED"", ""userInteraction"": ""NONE"", ""vectorString"": ""CVSS:3.1/AV:L/AC:L/PR:H/UI:N/S:U/C:H/I:H/A:H"", ""version"": ""3.1""}, ""format"": ""CVSS"", ""scenarios"": [{""lang"": ""en"", ""value"": ""GENERAL""}]}], ""problemTypes"": [{""descriptions"": [{""cweId"": ""CWE-284"", ""description"": ""CWE-284: Improper Access Control"", ""lang"": ""en"", ""type"": ""CWE""}]}], ""providerMetadata"": {""orgId"": ""c550e75a-17ff-4988-97f0-544cde3820fe"", ""shortName"": ""dell"", ""dateUpdated"": ""2024-09-10T08:54:54.258Z""}, ""references"": [{""tags"": [""vendor-advisory""], ""url"": ""https://www.dell.com/support/kbdoc/en-us/000228412/dsa-2024-360-security-update-for-dell-powerscale-insightiq-for-multiple-security-vulnerabilities""}], ""source"": {""discovery"": ""UNKNOWN""}, ""x_generator"": {""engine"": ""Vulnogram 0.2.0""}}, ""adp"": [{""affected"": [{""vendor"": ""dell"", ""product"": ""powerscale_insightiq"", ""cpes"": [""cpe:2.3:a:dell:powerscale_insightiq:*:*:*:*:*:*:*:*""], ""defaultStatus"": ""unknown"", ""versions"": [{""version"": ""5.0"", ""status"": ""affected"", ""lessThanOrEqual"": ""5.1"", ""versionType"": ""semver""}]}], ""metrics"": [{""other"": {""type"": ""ssvc"", ""content"": {""timestamp"": ""2024-09-10T13:10:51.964288Z"", ""id"": ""CVE-2024-39580"", ""options"": [{""Exploitation"": ""none""}, {""Automatable"": ""no""}, {""Technical Impact"": ""total""}], ""role"": ""CISA Coordinator"", ""version"": ""2.0.3""}}}], ""title"": ""CISA ADP Vulnrichment"", ""providerMetadata"": {""orgId"": ""134c704f-9b21-4f2e-91b3-4a467353bcc0"", ""shortName"": ""CISA-ADP"", ""dateUpdated"": ""2024-09-10T13:16:32.221Z""}}]}","Map(datePublished -> 2024-09-10T08:54:54.258Z, cveId -> CVE-2024-39580, assignerOrgId -> c550e75a-17ff-4988-97f0-544cde3820fe, state -> PUBLISHED, assignerShortName -> dell, dateReserved -> 2024-06-26T02:14:30.867Z, dateUpdated -> 2024-09-10T13:16:32.221Z)",CVE_RECORD,5.1,2025-11-17T02:27:25.277Z,2025-11-17,2024,33910
"{""cna"": {""affected"": [{""defaultStatus"": ""unaffected"", ""product"": ""Apache CloudStack"", ""vendor"": ""Apache Software Foundation"", ""versions"": [{""lessThanOrEqual"": ""4.18.2.0"", ""status"": ""affected"", ""version"": ""4.0.0"", ""versionType"": ""semver""}, {""lessThanOrEqual"": ""4.19.0.1"", ""status"": ""affected"", ""version"": ""4.19.0.0"", ""versionType"": ""semver""}]}], ""credits"": [{""lang"": ""en"", ""type"": ""finder"", ""value"": ""Adam Pond of Apple Services Engineering Security""}, {""lang"": ""en"", ""type"": ""finder"", ""value"": ""Terry Thibault of Apple Services Engineering Security""}, {""lang"": ""en"", ""type"": ""finder"", ""value"": ""Damon Smith of Apple Services Engineering Security""}], ""descriptions"": [{""lang"": ""en"", ""supportingMedia"": [{""base64"": false, ""type"": ""text/html"", ""value"": ""The CloudStack integration API service allows running its unauthenticated API server (usually on port 8096 when configured and enabled via integration.api.port global setting) for internal portal integrations and for testing purposes. By default, the integration API service port is disabled and is considered disabled when integration.api.port is set to 0 or negative. Due to an improper initialisation logic, the integration API service would listen on a random port when its port value is set to 0 (default value). An attacker that can access the CloudStack management network could scan and find the randomised integration API service port and exploit it to perform unauthorised administrative actions and perform remote code execution on CloudStack managed hosts and result in complete compromise of the confidentiality, integrity, and availability of CloudStack managed infrastructure. Users are recommended to restrict the network access on the CloudStack management server hosts to only essential ports. Users are recommended to upgrade to version 4.18.2.1, 4.19.0.2 or later, which addresses this issue.""}], ""value"": ""The CloudStack integration API service allows running its unauthenticated API server (usually on port 8096 when configured and enabled via integration.api.port global setting) for internal portal integrations and for testing purposes. By default, the integration API service port is disabled and is considered disabled when integration.api.port is set to 0 or negative. Due to an improper initialisation logic, the integration API service would listen on a random port when its port value is set to 0 (default value).\u00a0An attacker that can access the CloudStack management network could scan and find the randomised integration API service port and exploit it to perform unauthorised administrative actions and perform remote code execution on CloudStack managed hosts and result in complete\u00a0compromise of the confidentiality, integrity, and availability of CloudStack managed infrastructure.\n\nUsers are recommended to restrict the network access on the CloudStack management server hosts to only essential ports. Users are recommended to upgrade to version 4.18.2.1, 4.19.0.2 or later, which addresses this issue.""}], ""metrics"": [{""other"": {""content"": {""text"": ""important""}, ""type"": ""Textual description of severity""}}], ""problemTypes"": [{""descriptions"": [{""cweId"": ""CWE-665"", ""description"": ""CWE-665 Improper Initialization"", ""lang"": ""en"", ""type"": ""CWE""}]}, {""descriptions"": [{""cweId"": ""CWE-94"", ""description"": ""CWE-94 Improper Control of Generation of Code ('Code Injection')"", ""lang"": ""en"", ""type"": ""CWE""}]}], ""providerMetadata"": {""orgId"": ""f0158376-9dc2-43b6-827c-5f631a4d8d09"", ""shortName"": ""apache"", ""dateUpdated"": ""2024-07-05T13:45:07.813Z""}, ""references"": [{""tags"": [""vendor-advisory"", ""mailing-list""], ""url"": ""https://lists.apache.org/thread/6l51r00csrct61plkyd3qg3fj99215d1""}, {""tags"": [""vendor-advisory"", ""patch""], ""url"": ""https://cloudstack.apache.org/blog/security-release-advisory-4.19.0.2-4.18.2.1""}, {""tags"": [""third-party-advisory""], ""url"": ""https://www.shapeblue.com/shapeblue-security-advisory-apache-cloudstack-security-releases-4-18-2-1-and-4-19-0-2/""}, {""url"": ""http://www.openwall.com/lists/oss-security/2024/07/05/1""}], ""source"": {""discovery"": ""UNKNOWN""}, ""title"": ""Apache CloudStack: Integration API service uses dynamic port when disabled"", ""x_generator"": {""engine"": ""Vulnogram 0.1.0-dev""}}, ""adp"": [{""affected"": [{""vendor"": ""apache_software_foundation"", ""product"": ""apache_cloudstack"", ""cpes"": [""cpe:2.3:a:apache_software_foundation:apache_cloudstack:*:*:*:*:*:*:*:*""], ""defaultStatus"": ""unknown"", ""versions"": [{""version"": ""4.0.0"", ""status"": ""affected"", ""lessThanOrEqual"": ""4.18.2.0"", ""versionType"": ""custom""}, {""version"": ""4.19.0.0"", ""status"": ""affected"", ""lessThanOrEqual"": ""4.19.0.1"", ""versionType"": ""custom""}]}], ""metrics"": [{""cvssV3_1"": {""scope"": ""UNCHANGED"", ""version"": ""3.1"", ""baseScore"": 9.8, ""attackVector"": ""NETWORK"", ""baseSeverity"": ""CRITICAL"", ""vectorString"": ""CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H"", ""integrityImpact"": ""HIGH"", ""userInteraction"": ""NONE"", ""attackComplexity"": ""LOW"", ""availabilityImpact"": ""HIGH"", ""privilegesRequired"": ""NONE"", ""confidentialityImpact"": ""HIGH""}}, {""other"": {""type"": ""ssvc"", ""content"": {""timestamp"": ""2024-07-08T13:38:45.994090Z"", ""id"": ""CVE-2024-39864"", ""options"": [{""Exploitation"": ""none""}, {""Automatable"": ""yes""}, {""Technical Impact"": ""total""}], ""role"": ""CISA Coordinator"", ""version"": ""2.0.3""}}}], ""title"": ""CISA ADP Vulnrichment"", ""providerMetadata"": {""orgId"": ""134c704f-9b21-4f2e-91b3-4a467353bcc0"", ""shortName"": ""CISA-ADP"", ""dateUpdated"": ""2025-03-19T14:44:07.745Z""}}, {""providerMetadata"": {""orgId"": ""af854a3a-2127-422b-91ae-364da2661108"", ""shortName"": ""CVE"", ""dateUpdated"": ""2024-08-02T04:33:10.752Z""}, ""title"": ""CVE Program Container"", ""references"": [{""tags"": [""vendor-advisory"", ""mailing-list"", ""x_transferred""], ""url"": ""https://lists.apache.org/thread/6l51r00csrct61plkyd3qg3fj99215d1""}, {""tags"": [""vendor-advisory"", ""patch"", ""x_transferred""], ""url"": ""https://cloudstack.apache.org/blog/security-release-advisory-4.19.0.2-4.18.2.1""}, {""tags"": [""third-party-advisory"", ""x_transferred""], ""url"": ""https://www.shapeblue.com/shapeblue-security-advisory-apache-cloudstack-security-releases-4-18-2-1-and-4-19-0-2/""}, {""url"": ""http://www.openwall.com/lists/oss-security/2024/07/05/1"", ""tags"": [""x_transferred""]}]}]}","Map(datePublished -> 2024-07-05T13:40:37.937Z, cveId -> CVE-2024-39864, assignerOrgId -> f0158376-9dc2-43b6-827c-5f631a4d8d09, state -> PUBLISHED, assignerShortName -> apache, dateReserved -> 2024-07-01T10:59:29.245Z, dateUpdated -> 2025-03-19T14:44:07.745Z)",CVE_RECORD,5.1,2025-11-17T02:27:25.277Z,2025-11-17,2024,33911
"{""cna"": {""affected"": [{""defaultStatus"": ""unaffected"", ""product"": ""Junos OS Evolved"", ""vendor"": ""Juniper Networks"", ""versions"": [{""lessThan"": ""22.3R2-EVO"", ""status"": ""affected"", ""version"": ""22.3-EVO"", ""versionType"": ""semver""}, {""lessThan"": ""22.4R1-S1-EVO, 22.4R2-EVO"", ""status"": ""affected"", ""version"": ""22.4-EVO"", ""versionType"": ""semver""}]}], ""datePublic"": ""2024-07-10T16:00:00.000Z"", ""descriptions"": [{""lang"": ""en"", ""supportingMedia"": [{""base64"": false, ""type"": ""text/html"", ""value"": ""An Improper Neutralization of Special Elements vulnerability in Juniper Networks Junos OS Evolved commands allows a local, authenticated attacker with low privileges to escalate their privileges to 'root' leading to a full compromise of the system. The Junos OS Evolved CLI doesn't properly handle command options in some cases, allowing users which execute specific CLI commands with a crafted set of parameters to escalate their privileges to root on shell level. \n\nThis issue affects Junos OS Evolved:22.3-EVO versions before 22.3R2-EVO,22.4-EVO versions before 22.4R1-S1-EVO, 22.4R2-EVO.""}], ""value"": ""An Improper Neutralization of Special Elements vulnerability in Juniper Networks Junos OS Evolved commands allows a local, authenticated attacker with low privileges to escalate their privileges to 'root' leading to a full compromise of the system.\n\nThe Junos OS Evolved CLI doesn't properly handle command options in some cases, allowing users which execute specific CLI commands with a crafted set of parameters to escalate their privileges to root on shell level.\n\n\nThis issue affects Junos OS Evolved:\n\n\n\n * 22.3-EVO versions before 22.3R2-EVO,\n * 22.4-EVO versions before 22.4R1-S1-EVO, 22.4R2-EVO.""}], ""exploits"": [{""lang"": ""en"", ""supportingMedia"": [{""base64"": false, ""type"": ""text/html"", ""value"": ""Juniper SIRT is not aware of any malicious exploitation of this vulnerability.""}], ""value"": ""Juniper SIRT is not aware of any malicious exploitation of this vulnerability.""}], ""metrics"": [{""cvssV3_1"": {""attackComplexity"": ""LOW"", ""attackVector"": ""LOCAL"", ""availabilityImpact"": ""HIGH"", ""baseScore"": 7.8, ""baseSeverity"": ""HIGH"", ""confidentialityImpact"": ""HIGH"", ""integrityImpact"": ""HIGH"", ""privilegesRequired"": ""LOW"", ""scope"": ""UNCHANGED"", ""userInteraction"": ""NONE"", ""vectorString"": ""CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H"", ""version"": ""3.1""}, ""format"": ""CVSS"", ""scenarios"": [{""lang"": ""en"", ""value"": ""GENERAL""}]}, {""cvssV4_0"": {""Automatable"": ""NOT_DEFINED"", ""Recovery"": ""NOT_DEFINED"", ""Safety"": ""NOT_DEFINED"", ""attackComplexity"": ""LOW"", ""attackRequirements"": ""NONE"", ""attackVector"": ""LOCAL"", ""baseScore"": 8.5, ""baseSeverity"": ""HIGH"", ""privilegesRequired"": ""LOW"", ""providerUrgency"": ""NOT_DEFINED"", ""subAvailabilityImpact"": ""NONE"", ""subConfidentialityImpact"": ""NONE"", ""subIntegrityImpact"": ""NONE"", ""userInteraction"": ""NONE"", ""valueDensity"": ""NOT_DEFINED"", ""vectorString"": ""CVSS:4.0/AV:L/AC:L/AT:N/PR:L/UI:N/VC:H/VI:H/VA:H/SC:N/SI:N/SA:N"", ""version"": ""4.0"", ""vulnAvailabilityImpact"": ""HIGH"", ""vulnConfidentialityImpact"": ""HIGH"", ""vulnIntegrityImpact"": ""HIGH"", ""vulnerabilityResponseEffort"": ""NOT_DEFINED""}, ""format"": ""CVSS"", ""scenarios"": [{""lang"": ""en"", ""value"": ""GENERAL""}]}], ""problemTypes"": [{""descriptions"": [{""cweId"": ""CWE-78"", ""description"": ""CWE-78 Improper Neutralization of Special Elements used in an OS Command ('OS Command Injection')"", ""lang"": ""en"", ""type"": ""CWE""}]}], ""providerMetadata"": {""orgId"": ""8cbe9d5a-a066-4c94-8978-4b15efeae968"", ""shortName"": ""juniper"", ""dateUpdated"": ""2024-07-11T15:58:26.205Z""}, ""references"": [{""tags"": [""vendor-advisory""], ""url"": ""https://supportportal.juniper.net/JSA82975""}], ""solutions"": [{""lang"": ""en"", ""supportingMedia"": [{""base64"": false, ""type"": ""text/html"", ""value"": ""The following software releases have been updated to resolve this specific issue: 22.3R2-EVO, 22.4R1-S1-EVO, 22.4R2-EVO, 23.2R1-EVO, and all subsequent releases.""}], ""value"": ""The following software releases have been updated to resolve this specific issue: 22.3R2-EVO, 22.4R1-S1-EVO, 22.4R2-EVO, 23.2R1-EVO, and all subsequent releases.""}], ""source"": {""advisory"": ""JSA82975"", ""defect"": [""1696784""], ""discovery"": ""INTERNAL""}, ""title"": ""Junos OS Evolved: CLI parameter processing issue allows privilege escalation"", ""workarounds"": [{""lang"": ""en"", ""supportingMedia"": [{""base64"": false, ""type"": ""text/html"", ""value"": ""There are no known workarounds for this issue.""}], ""value"": ""There are no known workarounds for this issue.""}], ""x_generator"": {""engine"": ""Vulnogram 0.1.0-dev""}}, ""adp"": [{""affected"": [{""vendor"": ""juniper"", ""product"": ""junos_os_evolved"", ""cpes"": [""cpe:2.3:a:juniper:junos_os_evolved:22.3:*:*:*:*:*:*:*""], ""defaultStatus"": ""unaffected"", ""versions"": [{""version"": ""22.3"", ""status"": ""affected"", ""lessThan"": ""22.3r2"", ""versionType"": ""semver""}]}, {""vendor"": ""juniper"", ""product"": ""junos_os_evolved"", ""cpes"": [""cpe:2.3:o:juniper:junos_os_evolved:22.4:*:*:*:*:*:*:*""], ""defaultStatus"": ""unaffected"", ""versions"": [{""version"": ""22.4"", ""status"": ""affected"", ""lessThan"": ""22.4r2"", ""versionType"": ""semver""}, {""version"": ""22.4"", ""status"": ""affected"", ""lessThan"": ""22.4r1-s2"", ""versionType"": ""semver""}]}], ""metrics"": [{""other"": {""type"": ""ssvc"", ""content"": {""timestamp"": ""2024-07-13T03:55:24.372855Z"", ""id"": ""CVE-2024-39522"", ""options"": [{""Exploitation"": ""none""}, {""Automatable"": ""no""}, {""Technical Impact"": ""total""}], ""role"": ""CISA Coordinator"", ""version"": ""2.0.3""}}}], ""title"": ""CISA ADP Vulnrichment"", ""providerMetadata"": {""orgId"": ""134c704f-9b21-4f2e-91b3-4a467353bcc0"", ""shortName"": ""CISA-ADP"", ""dateUpdated"": ""2024-07-13T11:40:43.932Z""}}, {""providerMetadata"": {""orgId"": ""af854a3a-2127-422b-91ae-364da2661108"", ""shortName"": ""CVE"", ""dateUpdated"": ""2024-08-02T04:26:15.539Z""}, ""title"": ""CVE Program Container"", ""references"": [{""tags"": [""vendor-advisory"", ""x_transferred""], ""url"": ""https://supportportal.juniper.net/JSA82975""}]}]}","Map(datePublished -> 2024-07-11T15:58:26.205Z, cveId -> CVE-2024-39522, assignerOrgId -> 8cbe9d5a-a066-4c94-8978-4b15efeae968, state -> PUBLISHED, assignerShortName -> juniper, dateReserved -> 2024-06-25T15:12:53.239Z, dateUpdated -> 2024-08-02T04:26:15.539Z)",CVE_RECORD,5.1,2025-11-17T02:27:25.277Z,2025-11-17,2024,33912


In [0]:
%sql
select  count( distinct cveMetadata.cveId) from bronze_db.cve_bronze_2024

count(DISTINCTcveMetadata.cveId)
38753


In [0]:
%sql

SELECT COUNT(*) 
FROM bronze_db.cve_bronze_2024 
WHERE cveMetadata.cveId IS NULL;


COUNT(*)
0


In [0]:
%sql
-- Changing the name of the table created to cve_broze.records as per the project requirement given.
CREATE DATABASE IF NOT EXISTS cve_bronze;


In [0]:
%sql
CREATE OR REPLACE VIEW cve_bronze.records AS
SELECT * FROM bronze_db.cve_bronze_2024;


In [0]:
%sql
select * from cve_bronze.records
limit 5

containers,cveMetadata,dataType,dataVersion,_ingestion_timestamp,_ingestion_date,_year,_record_id
"{""cna"": {""providerMetadata"": {""orgId"": ""8254265b-2729-46b6-b9e3-3dfca2d5bfca"", ""shortName"": ""mitre"", ""dateUpdated"": ""2024-09-18T19:31:26.885930""}, ""descriptions"": [{""lang"": ""en"", ""value"": ""A vulnerability has been discovered in all versions of Smartplay headunits, which are widely used in Suzuki and Toyota cars. This misconfiguration can lead to information disclosure, leaking sensitive details such as diagnostic log traces, system logs, headunit passwords, and personally identifiable information (PII). The exposure of such information may have serious implications for user privacy and system integrity.""}], ""affected"": [{""vendor"": ""n/a"", ""product"": ""n/a"", ""versions"": [{""version"": ""n/a"", ""status"": ""affected""}]}], ""references"": [{""url"": ""https://docs.google.com/document/d/1S-d8zyZreYYGSIr4zGww6F2iBfD63v10Z3YVbGnp2es/edit?usp=sharing""}, {""url"": ""https://mohammedshine.github.io/CVE-2024-39339.html""}], ""problemTypes"": [{""descriptions"": [{""type"": ""text"", ""lang"": ""en"", ""description"": ""n/a""}]}]}, ""adp"": [{""problemTypes"": [{""descriptions"": [{""type"": ""CWE"", ""cweId"": ""CWE-922"", ""lang"": ""en"", ""description"": ""CWE-922 Insecure Storage of Sensitive Information""}]}], ""affected"": [{""vendor"": ""globalsuzuki"", ""product"": ""smartplay_headunit_firmware"", ""cpes"": [""cpe:2.3:o:globalsuzuki:smartplay_headunit_firmware:*:*:*:*:*:*:*:*""], ""defaultStatus"": ""unknown"", ""versions"": [{""version"": ""0"", ""status"": ""affected"", ""lessThan"": ""*"", ""versionType"": ""custom""}]}], ""metrics"": [{""cvssV3_1"": {""scope"": ""UNCHANGED"", ""version"": ""3.1"", ""baseScore"": 7.5, ""attackVector"": ""NETWORK"", ""baseSeverity"": ""HIGH"", ""vectorString"": ""CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N"", ""integrityImpact"": ""NONE"", ""userInteraction"": ""NONE"", ""attackComplexity"": ""LOW"", ""availabilityImpact"": ""NONE"", ""privilegesRequired"": ""NONE"", ""confidentialityImpact"": ""HIGH""}}, {""other"": {""type"": ""ssvc"", ""content"": {""timestamp"": ""2024-11-05T19:10:53.031018Z"", ""id"": ""CVE-2024-39339"", ""options"": [{""Exploitation"": ""none""}, {""Automatable"": ""yes""}, {""Technical Impact"": ""partial""}], ""role"": ""CISA Coordinator"", ""version"": ""2.0.3""}}}], ""title"": ""CISA ADP Vulnrichment"", ""providerMetadata"": {""orgId"": ""134c704f-9b21-4f2e-91b3-4a467353bcc0"", ""shortName"": ""CISA-ADP"", ""dateUpdated"": ""2024-11-05T19:28:34.301Z""}}]}","Map(datePublished -> 2024-09-18T00:00:00, cveId -> CVE-2024-39339, assignerOrgId -> 8254265b-2729-46b6-b9e3-3dfca2d5bfca, state -> PUBLISHED, assignerShortName -> mitre, dateReserved -> 2024-06-24T00:00:00, dateUpdated -> 2024-11-05T19:28:34.301Z)",CVE_RECORD,5.1,2025-11-17T02:27:25.277Z,2025-11-17,2024,33908
"{""cna"": {""affected"": [{""defaultStatus"": ""unaffected"", ""product"": ""CNCSoft-G2"", ""vendor"": ""Delta Electronics"", ""versions"": [{""status"": ""affected"", ""version"": ""2.0.0.5""}]}], ""credits"": [{""lang"": ""en"", ""type"": ""finder"", ""value"": ""Bobby Gould and Fritz Sands of Trend Micro Zero Day Initiative reported these vulnerabilities to CISA.""}], ""descriptions"": [{""lang"": ""en"", ""supportingMedia"": [{""base64"": false, ""type"": ""text/html"", ""value"": ""\n\nDelta Electronics CNCSoft-G2 lacks proper validation of user-supplied data, which can result in a memory corruption condition. If a target visits a malicious page or opens a malicious file an attacker can leverage this vulnerability to execute code in the context of the current process.\n\n""}], ""value"": ""Delta Electronics CNCSoft-G2 lacks proper validation of user-supplied data, which can result in a memory corruption condition. If a target visits a malicious page or opens a malicious file an attacker can leverage this vulnerability to execute code in the context of the current process.""}], ""metrics"": [{""cvssV4_0"": {""Automatable"": ""NOT_DEFINED"", ""Recovery"": ""NOT_DEFINED"", ""Safety"": ""NOT_DEFINED"", ""attackComplexity"": ""LOW"", ""attackRequirements"": ""NONE"", ""attackVector"": ""LOCAL"", ""baseScore"": 8.4, ""baseSeverity"": ""HIGH"", ""privilegesRequired"": ""NONE"", ""providerUrgency"": ""NOT_DEFINED"", ""subAvailabilityImpact"": ""NONE"", ""subConfidentialityImpact"": ""NONE"", ""subIntegrityImpact"": ""NONE"", ""userInteraction"": ""ACTIVE"", ""valueDensity"": ""NOT_DEFINED"", ""vectorString"": ""CVSS:4.0/AV:L/AC:L/AT:N/PR:N/UI:A/VC:H/VI:H/VA:H/SC:N/SI:N/SA:N"", ""version"": ""4.0"", ""vulnAvailabilityImpact"": ""HIGH"", ""vulnConfidentialityImpact"": ""HIGH"", ""vulnIntegrityImpact"": ""HIGH"", ""vulnerabilityResponseEffort"": ""NOT_DEFINED""}, ""format"": ""CVSS"", ""scenarios"": [{""lang"": ""en"", ""value"": ""GENERAL""}]}], ""problemTypes"": [{""descriptions"": [{""cweId"": ""CWE-787"", ""description"": ""CWE-787 Out-of-bounds Write"", ""lang"": ""en"", ""type"": ""CWE""}]}], ""providerMetadata"": {""orgId"": ""7d14cffa-0d7d-4270-9dc0-52cabd5a23a6"", ""shortName"": ""icscert"", ""dateUpdated"": ""2024-07-09T21:23:31.171Z""}, ""references"": [{""tags"": [""government-resource""], ""url"": ""https://www.cisa.gov/news-events/ics-advisories/icsa-24-191-01""}], ""solutions"": [{""lang"": ""en"", ""supportingMedia"": [{""base64"": false, ""type"": ""text/html"", ""value"": ""Delta Electronics recommends users update to CNCSoft-G2 V2.1.0.10 or later.\n\n ""}], ""value"": ""Delta Electronics recommends users update to CNCSoft-G2 V2.1.0.10 https://downloadcenter.deltaww.com/en-US/DownloadCenter \u00a0or later.""}], ""source"": {""discovery"": ""UNKNOWN""}, ""title"": ""Out-of-bounds Write in Delta Electronics CNCSoft-G2"", ""x_generator"": {""engine"": ""Vulnogram 0.2.0""}}, ""adp"": [{""metrics"": [{""other"": {""type"": ""ssvc"", ""content"": {""id"": ""CVE-2024-39881"", ""role"": ""CISA Coordinator"", ""options"": [{""Exploitation"": ""none""}, {""Automatable"": ""no""}, {""Technical Impact"": ""total""}], ""version"": ""2.0.3"", ""timestamp"": ""2024-07-10T15:22:25.578143Z""}}}], ""affected"": [{""cpes"": [""cpe:2.3:a:delta_electronics:cncsoft-g2:2.0.0.5:*:*:*:*:*:*:*""], ""vendor"": ""delta_electronics"", ""product"": ""cncsoft-g2"", ""versions"": [{""status"": ""affected"", ""version"": ""2.0.0.5""}], ""defaultStatus"": ""unaffected""}], ""title"": ""CISA ADP Vulnrichment"", ""providerMetadata"": {""orgId"": ""134c704f-9b21-4f2e-91b3-4a467353bcc0"", ""shortName"": ""CISA-ADP"", ""dateUpdated"": ""2025-08-27T20:42:58.039Z""}}, {""providerMetadata"": {""orgId"": ""af854a3a-2127-422b-91ae-364da2661108"", ""shortName"": ""CVE"", ""dateUpdated"": ""2024-08-02T04:33:11.245Z""}, ""title"": ""CVE Program Container"", ""references"": [{""tags"": [""government-resource"", ""x_transferred""], ""url"": ""https://www.cisa.gov/news-events/ics-advisories/icsa-24-191-01""}]}]}","Map(datePublished -> 2024-07-09T21:23:31.171Z, cveId -> CVE-2024-39881, assignerOrgId -> 7d14cffa-0d7d-4270-9dc0-52cabd5a23a6, state -> PUBLISHED, assignerShortName -> icscert, dateReserved -> 2024-07-01T18:13:23.097Z, dateUpdated -> 2025-08-27T20:42:58.039Z)",CVE_RECORD,5.1,2025-11-17T02:27:25.277Z,2025-11-17,2024,33909
"{""cna"": {""affected"": [{""defaultStatus"": ""unaffected"", ""product"": ""PowerScale InsightIQ"", ""vendor"": ""Dell"", ""versions"": [{""lessThanOrEqual"": ""5.1"", ""status"": ""affected"", ""version"": ""5.0"", ""versionType"": ""semver""}]}], ""datePublic"": ""2024-09-09T06:30:00.000Z"", ""descriptions"": [{""lang"": ""en"", ""supportingMedia"": [{""base64"": false, ""type"": ""text/html"", ""value"": ""Dell PowerScale InsightIQ, versions 5.0 through 5.1, contains an Improper Access Control vulnerability. A high privileged attacker with local access could potentially exploit this vulnerability, leading to Elevation of privileges.""}], ""value"": ""Dell PowerScale InsightIQ, versions 5.0 through 5.1, contains an Improper Access Control vulnerability. A high privileged attacker with local access could potentially exploit this vulnerability, leading to Elevation of privileges.""}], ""metrics"": [{""cvssV3_1"": {""attackComplexity"": ""LOW"", ""attackVector"": ""LOCAL"", ""availabilityImpact"": ""HIGH"", ""baseScore"": 6.7, ""baseSeverity"": ""MEDIUM"", ""confidentialityImpact"": ""HIGH"", ""integrityImpact"": ""HIGH"", ""privilegesRequired"": ""HIGH"", ""scope"": ""UNCHANGED"", ""userInteraction"": ""NONE"", ""vectorString"": ""CVSS:3.1/AV:L/AC:L/PR:H/UI:N/S:U/C:H/I:H/A:H"", ""version"": ""3.1""}, ""format"": ""CVSS"", ""scenarios"": [{""lang"": ""en"", ""value"": ""GENERAL""}]}], ""problemTypes"": [{""descriptions"": [{""cweId"": ""CWE-284"", ""description"": ""CWE-284: Improper Access Control"", ""lang"": ""en"", ""type"": ""CWE""}]}], ""providerMetadata"": {""orgId"": ""c550e75a-17ff-4988-97f0-544cde3820fe"", ""shortName"": ""dell"", ""dateUpdated"": ""2024-09-10T08:54:54.258Z""}, ""references"": [{""tags"": [""vendor-advisory""], ""url"": ""https://www.dell.com/support/kbdoc/en-us/000228412/dsa-2024-360-security-update-for-dell-powerscale-insightiq-for-multiple-security-vulnerabilities""}], ""source"": {""discovery"": ""UNKNOWN""}, ""x_generator"": {""engine"": ""Vulnogram 0.2.0""}}, ""adp"": [{""affected"": [{""vendor"": ""dell"", ""product"": ""powerscale_insightiq"", ""cpes"": [""cpe:2.3:a:dell:powerscale_insightiq:*:*:*:*:*:*:*:*""], ""defaultStatus"": ""unknown"", ""versions"": [{""version"": ""5.0"", ""status"": ""affected"", ""lessThanOrEqual"": ""5.1"", ""versionType"": ""semver""}]}], ""metrics"": [{""other"": {""type"": ""ssvc"", ""content"": {""timestamp"": ""2024-09-10T13:10:51.964288Z"", ""id"": ""CVE-2024-39580"", ""options"": [{""Exploitation"": ""none""}, {""Automatable"": ""no""}, {""Technical Impact"": ""total""}], ""role"": ""CISA Coordinator"", ""version"": ""2.0.3""}}}], ""title"": ""CISA ADP Vulnrichment"", ""providerMetadata"": {""orgId"": ""134c704f-9b21-4f2e-91b3-4a467353bcc0"", ""shortName"": ""CISA-ADP"", ""dateUpdated"": ""2024-09-10T13:16:32.221Z""}}]}","Map(datePublished -> 2024-09-10T08:54:54.258Z, cveId -> CVE-2024-39580, assignerOrgId -> c550e75a-17ff-4988-97f0-544cde3820fe, state -> PUBLISHED, assignerShortName -> dell, dateReserved -> 2024-06-26T02:14:30.867Z, dateUpdated -> 2024-09-10T13:16:32.221Z)",CVE_RECORD,5.1,2025-11-17T02:27:25.277Z,2025-11-17,2024,33910
"{""cna"": {""affected"": [{""defaultStatus"": ""unaffected"", ""product"": ""Apache CloudStack"", ""vendor"": ""Apache Software Foundation"", ""versions"": [{""lessThanOrEqual"": ""4.18.2.0"", ""status"": ""affected"", ""version"": ""4.0.0"", ""versionType"": ""semver""}, {""lessThanOrEqual"": ""4.19.0.1"", ""status"": ""affected"", ""version"": ""4.19.0.0"", ""versionType"": ""semver""}]}], ""credits"": [{""lang"": ""en"", ""type"": ""finder"", ""value"": ""Adam Pond of Apple Services Engineering Security""}, {""lang"": ""en"", ""type"": ""finder"", ""value"": ""Terry Thibault of Apple Services Engineering Security""}, {""lang"": ""en"", ""type"": ""finder"", ""value"": ""Damon Smith of Apple Services Engineering Security""}], ""descriptions"": [{""lang"": ""en"", ""supportingMedia"": [{""base64"": false, ""type"": ""text/html"", ""value"": ""The CloudStack integration API service allows running its unauthenticated API server (usually on port 8096 when configured and enabled via integration.api.port global setting) for internal portal integrations and for testing purposes. By default, the integration API service port is disabled and is considered disabled when integration.api.port is set to 0 or negative. Due to an improper initialisation logic, the integration API service would listen on a random port when its port value is set to 0 (default value). An attacker that can access the CloudStack management network could scan and find the randomised integration API service port and exploit it to perform unauthorised administrative actions and perform remote code execution on CloudStack managed hosts and result in complete compromise of the confidentiality, integrity, and availability of CloudStack managed infrastructure. Users are recommended to restrict the network access on the CloudStack management server hosts to only essential ports. Users are recommended to upgrade to version 4.18.2.1, 4.19.0.2 or later, which addresses this issue.""}], ""value"": ""The CloudStack integration API service allows running its unauthenticated API server (usually on port 8096 when configured and enabled via integration.api.port global setting) for internal portal integrations and for testing purposes. By default, the integration API service port is disabled and is considered disabled when integration.api.port is set to 0 or negative. Due to an improper initialisation logic, the integration API service would listen on a random port when its port value is set to 0 (default value).\u00a0An attacker that can access the CloudStack management network could scan and find the randomised integration API service port and exploit it to perform unauthorised administrative actions and perform remote code execution on CloudStack managed hosts and result in complete\u00a0compromise of the confidentiality, integrity, and availability of CloudStack managed infrastructure.\n\nUsers are recommended to restrict the network access on the CloudStack management server hosts to only essential ports. Users are recommended to upgrade to version 4.18.2.1, 4.19.0.2 or later, which addresses this issue.""}], ""metrics"": [{""other"": {""content"": {""text"": ""important""}, ""type"": ""Textual description of severity""}}], ""problemTypes"": [{""descriptions"": [{""cweId"": ""CWE-665"", ""description"": ""CWE-665 Improper Initialization"", ""lang"": ""en"", ""type"": ""CWE""}]}, {""descriptions"": [{""cweId"": ""CWE-94"", ""description"": ""CWE-94 Improper Control of Generation of Code ('Code Injection')"", ""lang"": ""en"", ""type"": ""CWE""}]}], ""providerMetadata"": {""orgId"": ""f0158376-9dc2-43b6-827c-5f631a4d8d09"", ""shortName"": ""apache"", ""dateUpdated"": ""2024-07-05T13:45:07.813Z""}, ""references"": [{""tags"": [""vendor-advisory"", ""mailing-list""], ""url"": ""https://lists.apache.org/thread/6l51r00csrct61plkyd3qg3fj99215d1""}, {""tags"": [""vendor-advisory"", ""patch""], ""url"": ""https://cloudstack.apache.org/blog/security-release-advisory-4.19.0.2-4.18.2.1""}, {""tags"": [""third-party-advisory""], ""url"": ""https://www.shapeblue.com/shapeblue-security-advisory-apache-cloudstack-security-releases-4-18-2-1-and-4-19-0-2/""}, {""url"": ""http://www.openwall.com/lists/oss-security/2024/07/05/1""}], ""source"": {""discovery"": ""UNKNOWN""}, ""title"": ""Apache CloudStack: Integration API service uses dynamic port when disabled"", ""x_generator"": {""engine"": ""Vulnogram 0.1.0-dev""}}, ""adp"": [{""affected"": [{""vendor"": ""apache_software_foundation"", ""product"": ""apache_cloudstack"", ""cpes"": [""cpe:2.3:a:apache_software_foundation:apache_cloudstack:*:*:*:*:*:*:*:*""], ""defaultStatus"": ""unknown"", ""versions"": [{""version"": ""4.0.0"", ""status"": ""affected"", ""lessThanOrEqual"": ""4.18.2.0"", ""versionType"": ""custom""}, {""version"": ""4.19.0.0"", ""status"": ""affected"", ""lessThanOrEqual"": ""4.19.0.1"", ""versionType"": ""custom""}]}], ""metrics"": [{""cvssV3_1"": {""scope"": ""UNCHANGED"", ""version"": ""3.1"", ""baseScore"": 9.8, ""attackVector"": ""NETWORK"", ""baseSeverity"": ""CRITICAL"", ""vectorString"": ""CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H"", ""integrityImpact"": ""HIGH"", ""userInteraction"": ""NONE"", ""attackComplexity"": ""LOW"", ""availabilityImpact"": ""HIGH"", ""privilegesRequired"": ""NONE"", ""confidentialityImpact"": ""HIGH""}}, {""other"": {""type"": ""ssvc"", ""content"": {""timestamp"": ""2024-07-08T13:38:45.994090Z"", ""id"": ""CVE-2024-39864"", ""options"": [{""Exploitation"": ""none""}, {""Automatable"": ""yes""}, {""Technical Impact"": ""total""}], ""role"": ""CISA Coordinator"", ""version"": ""2.0.3""}}}], ""title"": ""CISA ADP Vulnrichment"", ""providerMetadata"": {""orgId"": ""134c704f-9b21-4f2e-91b3-4a467353bcc0"", ""shortName"": ""CISA-ADP"", ""dateUpdated"": ""2025-03-19T14:44:07.745Z""}}, {""providerMetadata"": {""orgId"": ""af854a3a-2127-422b-91ae-364da2661108"", ""shortName"": ""CVE"", ""dateUpdated"": ""2024-08-02T04:33:10.752Z""}, ""title"": ""CVE Program Container"", ""references"": [{""tags"": [""vendor-advisory"", ""mailing-list"", ""x_transferred""], ""url"": ""https://lists.apache.org/thread/6l51r00csrct61plkyd3qg3fj99215d1""}, {""tags"": [""vendor-advisory"", ""patch"", ""x_transferred""], ""url"": ""https://cloudstack.apache.org/blog/security-release-advisory-4.19.0.2-4.18.2.1""}, {""tags"": [""third-party-advisory"", ""x_transferred""], ""url"": ""https://www.shapeblue.com/shapeblue-security-advisory-apache-cloudstack-security-releases-4-18-2-1-and-4-19-0-2/""}, {""url"": ""http://www.openwall.com/lists/oss-security/2024/07/05/1"", ""tags"": [""x_transferred""]}]}]}","Map(datePublished -> 2024-07-05T13:40:37.937Z, cveId -> CVE-2024-39864, assignerOrgId -> f0158376-9dc2-43b6-827c-5f631a4d8d09, state -> PUBLISHED, assignerShortName -> apache, dateReserved -> 2024-07-01T10:59:29.245Z, dateUpdated -> 2025-03-19T14:44:07.745Z)",CVE_RECORD,5.1,2025-11-17T02:27:25.277Z,2025-11-17,2024,33911
"{""cna"": {""affected"": [{""defaultStatus"": ""unaffected"", ""product"": ""Junos OS Evolved"", ""vendor"": ""Juniper Networks"", ""versions"": [{""lessThan"": ""22.3R2-EVO"", ""status"": ""affected"", ""version"": ""22.3-EVO"", ""versionType"": ""semver""}, {""lessThan"": ""22.4R1-S1-EVO, 22.4R2-EVO"", ""status"": ""affected"", ""version"": ""22.4-EVO"", ""versionType"": ""semver""}]}], ""datePublic"": ""2024-07-10T16:00:00.000Z"", ""descriptions"": [{""lang"": ""en"", ""supportingMedia"": [{""base64"": false, ""type"": ""text/html"", ""value"": ""An Improper Neutralization of Special Elements vulnerability in Juniper Networks Junos OS Evolved commands allows a local, authenticated attacker with low privileges to escalate their privileges to 'root' leading to a full compromise of the system. The Junos OS Evolved CLI doesn't properly handle command options in some cases, allowing users which execute specific CLI commands with a crafted set of parameters to escalate their privileges to root on shell level. \n\nThis issue affects Junos OS Evolved:22.3-EVO versions before 22.3R2-EVO,22.4-EVO versions before 22.4R1-S1-EVO, 22.4R2-EVO.""}], ""value"": ""An Improper Neutralization of Special Elements vulnerability in Juniper Networks Junos OS Evolved commands allows a local, authenticated attacker with low privileges to escalate their privileges to 'root' leading to a full compromise of the system.\n\nThe Junos OS Evolved CLI doesn't properly handle command options in some cases, allowing users which execute specific CLI commands with a crafted set of parameters to escalate their privileges to root on shell level.\n\n\nThis issue affects Junos OS Evolved:\n\n\n\n * 22.3-EVO versions before 22.3R2-EVO,\n * 22.4-EVO versions before 22.4R1-S1-EVO, 22.4R2-EVO.""}], ""exploits"": [{""lang"": ""en"", ""supportingMedia"": [{""base64"": false, ""type"": ""text/html"", ""value"": ""Juniper SIRT is not aware of any malicious exploitation of this vulnerability.""}], ""value"": ""Juniper SIRT is not aware of any malicious exploitation of this vulnerability.""}], ""metrics"": [{""cvssV3_1"": {""attackComplexity"": ""LOW"", ""attackVector"": ""LOCAL"", ""availabilityImpact"": ""HIGH"", ""baseScore"": 7.8, ""baseSeverity"": ""HIGH"", ""confidentialityImpact"": ""HIGH"", ""integrityImpact"": ""HIGH"", ""privilegesRequired"": ""LOW"", ""scope"": ""UNCHANGED"", ""userInteraction"": ""NONE"", ""vectorString"": ""CVSS:3.1/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H"", ""version"": ""3.1""}, ""format"": ""CVSS"", ""scenarios"": [{""lang"": ""en"", ""value"": ""GENERAL""}]}, {""cvssV4_0"": {""Automatable"": ""NOT_DEFINED"", ""Recovery"": ""NOT_DEFINED"", ""Safety"": ""NOT_DEFINED"", ""attackComplexity"": ""LOW"", ""attackRequirements"": ""NONE"", ""attackVector"": ""LOCAL"", ""baseScore"": 8.5, ""baseSeverity"": ""HIGH"", ""privilegesRequired"": ""LOW"", ""providerUrgency"": ""NOT_DEFINED"", ""subAvailabilityImpact"": ""NONE"", ""subConfidentialityImpact"": ""NONE"", ""subIntegrityImpact"": ""NONE"", ""userInteraction"": ""NONE"", ""valueDensity"": ""NOT_DEFINED"", ""vectorString"": ""CVSS:4.0/AV:L/AC:L/AT:N/PR:L/UI:N/VC:H/VI:H/VA:H/SC:N/SI:N/SA:N"", ""version"": ""4.0"", ""vulnAvailabilityImpact"": ""HIGH"", ""vulnConfidentialityImpact"": ""HIGH"", ""vulnIntegrityImpact"": ""HIGH"", ""vulnerabilityResponseEffort"": ""NOT_DEFINED""}, ""format"": ""CVSS"", ""scenarios"": [{""lang"": ""en"", ""value"": ""GENERAL""}]}], ""problemTypes"": [{""descriptions"": [{""cweId"": ""CWE-78"", ""description"": ""CWE-78 Improper Neutralization of Special Elements used in an OS Command ('OS Command Injection')"", ""lang"": ""en"", ""type"": ""CWE""}]}], ""providerMetadata"": {""orgId"": ""8cbe9d5a-a066-4c94-8978-4b15efeae968"", ""shortName"": ""juniper"", ""dateUpdated"": ""2024-07-11T15:58:26.205Z""}, ""references"": [{""tags"": [""vendor-advisory""], ""url"": ""https://supportportal.juniper.net/JSA82975""}], ""solutions"": [{""lang"": ""en"", ""supportingMedia"": [{""base64"": false, ""type"": ""text/html"", ""value"": ""The following software releases have been updated to resolve this specific issue: 22.3R2-EVO, 22.4R1-S1-EVO, 22.4R2-EVO, 23.2R1-EVO, and all subsequent releases.""}], ""value"": ""The following software releases have been updated to resolve this specific issue: 22.3R2-EVO, 22.4R1-S1-EVO, 22.4R2-EVO, 23.2R1-EVO, and all subsequent releases.""}], ""source"": {""advisory"": ""JSA82975"", ""defect"": [""1696784""], ""discovery"": ""INTERNAL""}, ""title"": ""Junos OS Evolved: CLI parameter processing issue allows privilege escalation"", ""workarounds"": [{""lang"": ""en"", ""supportingMedia"": [{""base64"": false, ""type"": ""text/html"", ""value"": ""There are no known workarounds for this issue.""}], ""value"": ""There are no known workarounds for this issue.""}], ""x_generator"": {""engine"": ""Vulnogram 0.1.0-dev""}}, ""adp"": [{""affected"": [{""vendor"": ""juniper"", ""product"": ""junos_os_evolved"", ""cpes"": [""cpe:2.3:a:juniper:junos_os_evolved:22.3:*:*:*:*:*:*:*""], ""defaultStatus"": ""unaffected"", ""versions"": [{""version"": ""22.3"", ""status"": ""affected"", ""lessThan"": ""22.3r2"", ""versionType"": ""semver""}]}, {""vendor"": ""juniper"", ""product"": ""junos_os_evolved"", ""cpes"": [""cpe:2.3:o:juniper:junos_os_evolved:22.4:*:*:*:*:*:*:*""], ""defaultStatus"": ""unaffected"", ""versions"": [{""version"": ""22.4"", ""status"": ""affected"", ""lessThan"": ""22.4r2"", ""versionType"": ""semver""}, {""version"": ""22.4"", ""status"": ""affected"", ""lessThan"": ""22.4r1-s2"", ""versionType"": ""semver""}]}], ""metrics"": [{""other"": {""type"": ""ssvc"", ""content"": {""timestamp"": ""2024-07-13T03:55:24.372855Z"", ""id"": ""CVE-2024-39522"", ""options"": [{""Exploitation"": ""none""}, {""Automatable"": ""no""}, {""Technical Impact"": ""total""}], ""role"": ""CISA Coordinator"", ""version"": ""2.0.3""}}}], ""title"": ""CISA ADP Vulnrichment"", ""providerMetadata"": {""orgId"": ""134c704f-9b21-4f2e-91b3-4a467353bcc0"", ""shortName"": ""CISA-ADP"", ""dateUpdated"": ""2024-07-13T11:40:43.932Z""}}, {""providerMetadata"": {""orgId"": ""af854a3a-2127-422b-91ae-364da2661108"", ""shortName"": ""CVE"", ""dateUpdated"": ""2024-08-02T04:26:15.539Z""}, ""title"": ""CVE Program Container"", ""references"": [{""tags"": [""vendor-advisory"", ""x_transferred""], ""url"": ""https://supportportal.juniper.net/JSA82975""}]}]}","Map(datePublished -> 2024-07-11T15:58:26.205Z, cveId -> CVE-2024-39522, assignerOrgId -> 8cbe9d5a-a066-4c94-8978-4b15efeae968, state -> PUBLISHED, assignerShortName -> juniper, dateReserved -> 2024-06-25T15:12:53.239Z, dateUpdated -> 2024-08-02T04:26:15.539Z)",CVE_RECORD,5.1,2025-11-17T02:27:25.277Z,2025-11-17,2024,33912


Step 2: Silver Layer: To create useful relational tables from the table of arrays which is the output from the broze layer.

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.functions import *
from pyspark.sql.types import *

print("📖 Starting Silver layer...")

bronze_table = "bronze_db.cve_bronze_2024"
bronze_df = spark.table(bronze_table)

print("Bronze rows:", bronze_df.count())
bronze_df.printSchema()


📖 Starting Silver layer...
Bronze rows: 38753
root
 |-- containers: string (nullable = true)
 |-- cveMetadata: map (nullable = true)
 |    |-- key: string
 |    |-- value: string (valueContainsNull = true)
 |-- dataType: string (nullable = true)
 |-- dataVersion: string (nullable = true)
 |-- _ingestion_timestamp: timestamp (nullable = true)
 |-- _ingestion_date: date (nullable = true)
 |-- _year: integer (nullable = true)
 |-- _record_id: long (nullable = true)



In [0]:
# descriptions
description_schema = StructType([
    StructField("lang", StringType(), True),
    StructField("value", StringType(), True)
])

# versions
version_schema = StructType([
    StructField("version", StringType(), True),
    StructField("status", StringType(), True)
])

# affected products
affected_schema = StructType([
    StructField("vendor",   StringType(), True),
    StructField("product",  StringType(), True),
    StructField("versions", ArrayType(version_schema), True)
])

# CVSS 3.x
cvss_v3_schema = StructType([
    StructField("baseScore",    StringType(), True),
    StructField("baseSeverity", StringType(), True),
    StructField("vectorString", StringType(), True)
])

metrics_entry_schema = StructType([
    StructField("cvssV3_1", cvss_v3_schema, True),
    StructField("cvssV3_0", cvss_v3_schema, True)
])

# Full containers
containers_schema = StructType([
    StructField(
        "cna",
        StructType([
            StructField("descriptions", ArrayType(description_schema), True),
            StructField("affected", ArrayType(affected_schema), True),
            StructField("metrics", ArrayType(metrics_entry_schema), True)
        ]),
        True
    )
])

print("Schema for parsing ready.")


Schema for parsing ready.


In [0]:
bronze_parsed = bronze_df.withColumn(
    "containers_parsed",
    from_json(col("containers"), containers_schema)
)

print("Parsed 'containers' successfully.")
bronze_parsed.select("cveMetadata", "containers_parsed").show(3, truncate=False)


Parsed 'containers' successfully.
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|cveMetadata                                                                                                                                                                                                                              

In [0]:
from pyspark.sql.functions import col, to_timestamp, coalesce, element_at

core_df = (
    bronze_parsed
    .withColumn(
        "description_primary",
        element_at(col("containers_parsed.cna.descriptions"), 1).getField("value")
    )
    .withColumn(
        "metric_primary",
        element_at(col("containers_parsed.cna.metrics"), 1)
    )
    .withColumn("cvss_v31", col("metric_primary.cvssV3_1"))
    .withColumn("cvss_v30", col("metric_primary.cvssV3_0"))
    .select(
        col("cveMetadata")["cveId"].alias("cve_id"),

        # 🔹 NEW COLUMN: reserved timestamp
        to_timestamp(col("cveMetadata")["dateReserved"]).alias("date_reserved_ts"),

        # existing timestamps
        to_timestamp(col("cveMetadata")["datePublished"]).alias("date_published_ts"),
        to_timestamp(col("cveMetadata")["dateUpdated"]).alias("date_updated_ts"),
        col("cveMetadata")["state"].alias("state"),

        col("description_primary").alias("description_en"),

        coalesce(col("cvss_v31.baseScore"), col("cvss_v30.baseScore")).alias("cvss_base_score"),
        coalesce(col("cvss_v31.baseSeverity"), col("cvss_v30.baseSeverity")).alias("cvss_base_severity"),
        coalesce(col("cvss_v31.vectorString"), col("cvss_v30.vectorString")).alias("cvss_vector"),

        col("_ingestion_timestamp"),
        col("_ingestion_date"),
        col("_year"),
        col("_record_id"),
    )
)


print("Core Silver rows:", core_df.count())
core_df.show(5, truncate=False)


Core Silver rows: 38753
+-------------+-----------------------+-----------------------+-----------------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------+------------------+--------------------------------------------+--------------------------+---------------+-----+----------+
|cve_id       |date_reserved_ts       |date_published_ts      |date_updated_ts        |state    |description_en                                                                                                                         

In [0]:
affected_df = (
    bronze_parsed
    .select(
        col("cveMetadata")["cveId"].alias("cve_id"),
        explode_outer(col("containers_parsed.cna.affected")).alias("aff")
    )
    .select(
        "cve_id",
        col("aff.vendor").alias("vendor"),
        col("aff.product").alias("product"),
        explode_outer(col("aff.versions")).alias("ver")
    )
    .select(
        "cve_id",
        "vendor",
        "product",
        col("ver.version").alias("version"),
        col("ver.status").alias("version_status")
    )
)

print("Affected Silver rows:", affected_df.count())
affected_df.show(5, truncate=False)


Affected Silver rows: 179498
+-------------+---------+---------------------------------------------------------------------------------------------------------+-------+--------------+
|cve_id       |vendor   |product                                                                                                  |version|version_status|
+-------------+---------+---------------------------------------------------------------------------------------------------------+-------+--------------+
|CVE-2024-4856|Unknown  |FS Product Inquiry                                                                                       |0      |affected      |
|CVE-2024-4716|Campcodes|Complete Web-Based School Management System                                                              |1.0    |affected      |
|CVE-2024-4446|pt-guy   |Content Views – Post Grid & Filter, Recent Posts, Category Posts, & More (Gutenberg Blocks and Shortcode)|*      |affected      |
|CVE-2024-4758|Unknown  |Muslim Prayer Ti

In [0]:
spark.sql("CREATE DATABASE IF NOT EXISTS silver_db")

core_table_name = "silver_db.cve_core_2024"
affected_table_name = "silver_db.cve_affected_2024"

(core_df.write
 .format("delta")
 .mode("overwrite")
 .option("mergeSchema", "true")
 .saveAsTable(core_table_name))

(affected_df.write
 .format("delta")
 .mode("overwrite")
 .option("mergeSchema", "true")
 .saveAsTable(affected_table_name))

print("Silver tables written successfully.")


Silver tables written successfully.


In [0]:
#sanity check by quireying the silver table.
spark.sql(f"SELECT COUNT(*) FROM silver_db.cve_core_2024").show()



+--------+
|COUNT(*)|
+--------+
|   38753|
+--------+



In [0]:
spark.sql(f"SELECT COUNT(*) FROM silver_db.cve_affected_2024").show()



+--------+
|COUNT(*)|
+--------+
|  179498|
+--------+



In [0]:
spark.sql("SELECT * FROM silver_db.cve_core_2024 LIMIT 5").show(truncate=False)


+--------------+-----------------------+-----------------------+---------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+---------------+------------------+--------------------------------------------+--------------------------+---------------+-----+----------+-----------------------+
|cve_id        |date_published_ts      |date_updated_ts        |state    |description_en                                                                                                                                                                                                                                                                                            |cvss_base_score|cvss_base_severity|cvss_vector                                 |_ingestio

In [0]:
spark.sql("SELECT * FROM silver_db.cve_affected_2024 LIMIT 5").show(truncate=False)

+--------------+--------------+-----------------------------+--------+--------------+
|cve_id        |vendor        |product                      |version |version_status|
+--------------+--------------+-----------------------------+--------+--------------+
|CVE-2024-33637|Solid Plugins |Solid Affiliate              |n/a     |affected      |
|CVE-2024-33695|ThemeNcode    |Fan Page Widget by ThemeNcode|n/a     |affected      |
|CVE-2024-33546|AA-Team       |WZone                        |n/a     |affected      |
|CVE-2024-33901|n/a           |n/a                          |n/a     |affected      |
|CVE-2024-33039|Qualcomm, Inc.|Snapdragon                   |QAM8255P|affected      |
+--------------+--------------+-----------------------------+--------+--------------+



In [0]:
%sql
-- Sanity check by querying the silver affected table

SELECT vendor,
       COUNT(version_status) AS count
FROM silver_db.cve_affected_2024
GROUP BY vendor
ORDER BY count DESC;


vendor,count
Linux,36048
Cisco,32445
"Qualcomm, Inc.",25316
Microsoft,13161
,7339
"Brother Industries, Ltd",4427
Autodesk,2966
Siemens,2545
Red Hat,2291
Apple,1692


EDA to show the insights after cleaning the raw data we have to an relational tables.

1.Temeporal Analysis

In [0]:
%sql
-- 1.1 Monthly count of CVE's.
SELECT DATE_FORMAT(date_published_ts, 'yyyy-MM') AS month,
       COUNT(*) AS count
FROM silver_db.cve_core_2024
GROUP BY month
ORDER BY month;



month,count
,433
2024-01,1134
2024-02,1769
2024-03,2616
2024-04,3218
2024-05,3348
2024-06,2707
2024-07,2877
2024-08,2692
2024-09,2408


In [0]:
%sql
-- 1.2 Distribution of publication latency 
SELECT
  cve_id,
  DATEDIFF(date_published_ts, date_reserved_ts) AS latency_days
FROM silver_db.cve_core_2024
WHERE date_published_ts IS NOT NULL
  AND date_reserved_ts IS NOT NULL;





cve_id,latency_days
CVE-2024-33637,4
CVE-2024-33695,0
CVE-2024-33546,5
CVE-2024-33901,22
CVE-2024-33039,223
CVE-2024-33607,423
CVE-2024-33540,5
CVE-2024-33911,3
CVE-2024-33853,118
CVE-2024-33972,99


In [0]:
%sql
-- 1.3 Seasonal patterns by month name
SELECT
  DATE_FORMAT(date_published_ts, 'MMMM') AS month_name,
  COUNT(*) AS vuln_count
FROM silver_db.cve_core_2024
WHERE date_published_ts IS NOT NULL
GROUP BY month_name
ORDER BY vuln_count DESC;


month_name,vuln_count
November,3793
May,3781
March,3573
April,3484
October,3410
January,3320
December,3022
July,2991
June,2835
February,2834


2.Risk Distribution Analysis

In [0]:
%sql
-- 2.1 Severity buckets count based on CVSS base.
SELECT cvss_base_severity, COUNT(*)
FROM silver_db.cve_core_2024
GROUP BY cvss_base_severity;



cvss_base_severity,COUNT(*)
HIGH,7588
MEDIUM,11795
CRITICAL,1788
,16555
LOW,1015
NONE,12


In [0]:
%sql
-- 2.2 Monthly trend of severity buckets
SELECT DATE_FORMAT(date_published_ts, 'yyyy-MM') AS month,
       cvss_base_severity,
       COUNT(*)
FROM silver_db.cve_core_2024
GROUP BY month, cvss_base_severity
ORDER BY month;



month,cvss_base_severity,COUNT(*)
,,433
2024-01,NONE,1
2024-01,,294
2024-01,MEDIUM,429
2024-01,LOW,89
2024-01,HIGH,273
2024-01,CRITICAL,48
2024-02,MEDIUM,640
2024-02,CRITICAL,100
2024-02,,589


In [0]:
%sql
-- 2.3 Unknown severity Count.
SELECT COUNT(*)
FROM silver_db.cve_core_2024
WHERE cvss_base_score IS NULL;



COUNT(*)
16555


3. Vendor Intelligence

In [0]:
%sql
-- 3.1 Top 25 vendors by vulnerability count.
SELECT vendor, COUNT(DISTINCT cve_id) AS vulns
FROM silver_db.cve_affected_2024
GROUP BY vendor
ORDER BY vulns DESC
LIMIT 25;



vendor,vulns
,6537
Linux,3083
Microsoft,1108
,1092
Unknown,975
Adobe,751
Google,630
SourceCodester,557
Apple,530
IBM,504


In [0]:
%sql
-- 3.2
WITH vendor_counts AS (
    -- How many distinct CVEs per vendor
    SELECT 
        vendor,
        COUNT(DISTINCT cve_id) AS vulns
    FROM silver_db.cve_affected_2024
    GROUP BY vendor
),

total AS (
    -- Total vulnerabilities across all vendors
    SELECT SUM(vulns) AS total_vulns
    FROM vendor_counts
),

top10_vendors AS (
    -- Pick the top 10 vendors by vuln count
    SELECT 
        vendor,
        vulns
    FROM vendor_counts
    ORDER BY vulns DESC
    LIMIT 10
),

top10 AS (
    -- Sum their vulnerabilities
    SELECT SUM(vulns) AS top10_vulns
    FROM top10_vendors
)

-- Final result: how much share do top 10 vendors account for?
SELECT
    t.total_vulns,
    k.top10_vulns,
    ROUND(100.0 * k.top10_vulns / t.total_vulns, 2) AS top10_share_percent
FROM total t
CROSS JOIN top10 k;


total_vulns,top10_vulns,top10_share_percent
39200,15767,40.22


In [0]:
%sql
-- 3.3 Vendor-specific risk profile

SELECT vendor, cvss_base_severity, COUNT(*)
FROM silver_db.cve_affected_2024 a
JOIN silver_db.cve_core_2024 c
  ON a.cve_id = c.cve_id
GROUP BY vendor, cvss_base_severity;

vendor,cvss_base_severity,COUNT(*)
Genetec Inc.,HIGH,12
Mediavine,MEDIUM,3
CodePassenger,CRITICAL,1
netweblogic,MEDIUM,5
hedgedoc,MEDIUM,1
EDM115,,1
wpdiscover,HIGH,1
guruteam,HIGH,1
virgial,MEDIUM,1
Benjamin Rojas,HIGH,1
