# Working with Known IoT-Related CVES: A New Direction


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
sheet_names = pd.ExcelFile('../data/MITRE/MITRE_2024_IoT_CVEs.xlsx').sheet_names
print(f'The MITRE IoT CVEs Excel spreadsheet has the following sheets: {sheet_names}')

The MITRE IoT CVEs Excel spreadsheet has the following sheets: ['2024 IoT CVEs', '2020-2024 CVEs', '2019-2024 CVEs']


In [14]:
df2019_2024 = pd.read_excel('../data/MITRE/MITRE_2024_IoT_CVEs.xlsx', sheet_name='2019-2024 CVEs')

In [11]:
df2020_2024 = pd.read_excel('../data/MITRE/MITRE_2024_IoT_CVEs.xlsx', sheet_name='2020-2024 CVEs')

In [12]:
df2024 = pd.read_excel('../data/MITRE/MITRE_2024_IoT_CVEs.xlsx', sheet_name='2024 IoT CVEs')

In [16]:
def shape_of(name, df):
    rows = len(df)
    cols = len(df.columns)
    print(f'"{name}" has {rows} rows and {cols} columns.')

shape_of('2019-2024 CVES', df2019_2024)
shape_of('2020-2024 CVES', df2020_2024)
shape_of('2024 IoT CVES', df2024)

"2019-2024 CVES" has 1088 rows and 2 columns.
"2020-2024 CVES" has 714 rows and 2 columns.
"2024 IoT CVES" has 24 rows and 2 columns.


In [17]:
df2019_2024.head(3)

Unnamed: 0,CVE-2024-38089,Microsoft Defender for IoT Elevation of Privilege Vulnerability
0,CVE-2024-29195,The azure-c-shared-utility is a C library for ...
1,CVE-2024-29055,Microsoft Defender for IoT Elevation of Privil...
2,CVE-2024-29054,Microsoft Defender for IoT Elevation of Privil...


In [18]:
df2020_2024.head(3)

Unnamed: 0,CVE-2024-38089,Microsoft Defender for IoT Elevation of Privilege Vulnerability
0,CVE-2024-29195,The azure-c-shared-utility is a C library for ...
1,CVE-2024-29055,Microsoft Defender for IoT Elevation of Privil...
2,CVE-2024-29054,Microsoft Defender for IoT Elevation of Privil...


In [19]:
df2024.head(3)

Unnamed: 0,2024: MITRE - IoT CVEs,Unnamed: 1
0,CVE-2024-38089,Microsoft Defender for IoT Elevation of Privil...
1,CVE-2024-29195,The azure-c-shared-utility is a C library for ...
2,CVE-2024-29055,Microsoft Defender for IoT Elevation of Privil...


The column names of these datasets are themselves observations. They'll have to be pushed down into the dataset and replaced with accurate column names.

In [34]:
def add_cols_as_obs(df):
    current_col_names = df.columns.tolist() # Grab current column names
    df.loc[-1] = current_col_names # Set the column names equal to a row
    df.index = df.index + 1 # Shift the index
    df = df.sort_index() # Sort the index

#add_cols_as_obs(df2019_2024)
#add_cols_as_obs(df2020_2024)

In [60]:
# Rename columns names
df2019_2024 = df2019_2024.rename(columns={
    'CVE-2024-38089': 'cve_id',
    'Microsoft Defender for IoT Elevation of Privilege Vulnerability': 'description'
})

df2020_2024 = df2020_2024.rename(columns={
    'CVE-2024-38089': 'cve_id',
    'Microsoft Defender for IoT Elevation of Privilege Vulnerability': 'description'
})

df2024 = df2024.rename(columns={
    '2024: MITRE - IoT CVEs': 'cve_id',
    'Unnamed: 1': 'description'
})

In [43]:
df2019_2024.head(3)

Unnamed: 0,cve_id,description
0,CVE-2024-38089,Microsoft Defender for IoT Elevation of Privil...
1,CVE-2024-29195,The azure-c-shared-utility is a C library for ...
2,CVE-2024-29055,Microsoft Defender for IoT Elevation of Privil...


In [45]:
df2020_2024.head(3)

Unnamed: 0,cve_id,description
0,CVE-2024-38089,Microsoft Defender for IoT Elevation of Privil...
1,CVE-2024-29195,The azure-c-shared-utility is a C library for ...
2,CVE-2024-29055,Microsoft Defender for IoT Elevation of Privil...


In [62]:
df2024.head(3)

Unnamed: 0,cve_id,description
0,CVE-2024-38089,Microsoft Defender for IoT Elevation of Privil...
1,CVE-2024-29195,The azure-c-shared-utility is a C library for ...
2,CVE-2024-29055,Microsoft Defender for IoT Elevation of Privil...


<class 'pandas.core.frame.DataFrame'>
Index: 1089 entries, 0 to 1088
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   cve_id       1089 non-null   object
 1   description  1089 non-null   object
dtypes: object(2)
memory usage: 25.5+ KB
None
<class 'pandas.core.frame.DataFrame'>
Index: 715 entries, 0 to 714
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   cve_id       715 non-null    object
 1   description  715 non-null    object
dtypes: object(2)
memory usage: 16.8+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24 entries, 0 to 23
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   cwe_id       24 non-null     object
 1   description  24 non-null     object
dtypes: object(2)
memory usage: 512.0+ bytes
None


In [56]:
print(f'"df2019-2024" has {df2019_2024.duplicated().sum()} duplicate observations.')
print(f'"df2020-2024" has {df2020_2024.duplicated().sum()} duplicate observations.')
print(f'"df2024" has {df2024.duplicated().sum()} duplicate observations.')

"df2019-2024" has 0 duplicate observations.
"df2020-2024" has 0 duplicate observations.
"df2024" has 0 duplicate observations.


In [59]:
print(f'"df2019-2024" has {df2019_2024.isnull().sum().tolist()} null values.')
print(f'"df2020-2024" has {df2020_2024.isnull().sum().tolist()} null values.')
print(f'"df2024" has {df2024.isnull().sum().tolist()} null values.')

"df2019-2024" has [0, 0] null values.
"df2020-2024" has [0, 0] null values.
"df2024" has [0, 0] null values.


In [63]:
# Check whether the smaller datasets exist in the larger datasets
df2019_2024_set = set([tuple(row) for row in df2019_2024.values])
df2020_2024_set = set([tuple(row) for row in df2020_2024.values])
df2024_set = set([tuple(row) for row in df2024.values])

print(f'All observations in "df2020_2024" appear in df2019_2024: {df2020_2024_set.issubset(df2019_2024_set)}')
print(f'All observations in "df2024" appear in df2020_2024: {df2024_set.issubset(df2020_2024_set)}')

All observations in "df2020_2024" appear in df2019_2024: True
All observations in "df2024" appear in df2020_2024: True


All of the observations in `df2024` exist in `df2020_2024` and all of those observations exist in `df2019_2024`, so we'll use the largest dataset. Next, I'll create a dataset from the nation-state attack information we've consolidated. We can then concatenate these two dataframes together.

In [None]:
# Create nation-state attack dataframe
df_nsa = {
    'attack': [
        'Mirai Botnet',
        'VPNFilter',
        'Triton/Trisis',
        'Iranian Cyberattacks on Water Systems',
        'Iranian APT Exploits on Fortinet Vulnerabilities',
        'Operation Shadowhammer',
        'Ripple20 Vulnerabilities',
        'Dragonfly/Energetic Bear Campaign 1',
        'Dragonfly/Energetic Bear Campaign 2'，
        'Stuxnet',
        'Heartbleed Exploits',
    ],
    'year_start': [
        2016,
        2018,
        2017,
        2020,
        2021,
        2018,
        2020,
        2013,
        2017,
        2018,
        2014
    ],
    'year_end': [
        2016,
        2018,
        2017,
        2020,
        2021,
        2019,
        2020,
        2014,
        2017,
        2018,
        2014
    ],
    'attribution_group': [
        pd.NA,
        'Fancy Bear (APT28)',
        pd.NA,
        pd.NA,
        pd.NA,
        'APT41',
        pd.NA,
        'Dragonfly (Energetic Bear)',
        'Dragonfly (Energetic Bear)',
        pd.NA,
        pd.NA,
    ],
    'attribution_state': [
        pd.NA,
        'Russia',
        'Russia',
        'Iran',
        'Iran',
        'China',
        pd.NA,
        'Russia',
        'Russia',
        ['US', 'Israel'],
        'China'
    ],
    'cve_id': [
        [
            pd.NA,
        ],
        [
            'CVE-2018-14847',
            'CVE-2017-12074',
            'CVE-2018-10561',
            'CVE-2018-10562',
            'CVE-2017-8418',
        ],
        [
            'CVE-2017-7905',
            'CVE-2017-7921'
        ],
        [
            pd.NA
        ],
        [
            'CVE-2018-13379'
        ],
        [
            'CVE-2019-19781'
        ],
        [
            'CVE-2020-11896',
            'CVE-2020-11898',
            'CVE-2020-11899',
            'CVE-2020-11901',
        ],
        [
            pd.NA
        ],
        [
            pd.NA
        ],
        [
            pd.NA
        ],
        [
            'CVE-2014-0160'
        ]
    ],
    'description': [
        [
            pd.NA
        ],
        [
            'MikroTik RouterOS through 6.42 allows unauthenticated remote attackers to read arbitrary files and remote authenticated attackers to write arbitrary files due to a directory traversal vulnerability in the WinBox interface.',
            'Directory traversal vulnerability in the SYNO.DNSServer.Zone.MasterZoneConf in Synology DNS Server before 2.2.1-3042 allows remote authenticated attackers to write arbitrary files via the domain_name parameter.',
            'An issue was discovered on Dasan GPON home routers. It is possible to bypass authentication simply by appending "?images" to any URL of the device that requires authentication, as demonstrated by the /menu.html?images/ or /GponForm/diag_FORM?images/ URI. One can then manage the device.',
            "An issue was discovered on Dasan GPON home routers. Command Injection can occur via the dest_host parameter in a diag_action=ping request to a GponForm/diag_Form URI. Because the router saves ping results in /tmp and transmits them to the user when the user revisits /diag.html, it's quite simple to execute commands and retrieve their output.",
            'RuboCop 0.48.1 and earlier does not use /tmp in safe way, allowing local users to exploit this to tamper with cache files belonging to other users.'
        ],
        [
            'A Weak Cryptography for Passwords issue was discovered in General Electric (GE) Multilin SR 750 Feeder Protection Relay, firmware versions prior to Version 7.47; SR 760 Feeder Protection Relay, firmware versions prior to Version 7.47; SR 469 Motor Protection Relay, firmware versions prior to Version 5.23; SR 489 Generator Protection Relay, firmware versions prior to Version 4.06; SR 745 Transformer Protection Relay, firmware versions prior to Version 5.23; SR 369 Motor Protection Relay, all firmware versions; Multilin Universal Relay, firmware Version 6.0 and prior versions; and Multilin URplus (D90, C90, B95), all versions. Ciphertext versions of user passwords were created with a non-random initialization vector leaving them susceptible to dictionary attacks. Ciphertext of user passwords can be obtained from the front LCD panel of affected products and through issued Modbus commands.',
            'An Improper Authentication issue was discovered in Hikvision DS-2CD2xx2F-I Series V5.2.0 build 140721 to V5.4.0 build 160530, DS-2CD2xx0F-I Series V5.2.0 build 140721 to V5.4.0 Build 160401, DS-2CD2xx2FWD Series V5.3.1 build 150410 to V5.4.4 Build 161125, DS-2CD4x2xFWD Series V5.2.0 build 140721 to V5.4.0 Build 160414, DS-2CD4xx5 Series V5.2.0 build 140721 to V5.4.0 Build 160421, DS-2DFx Series V5.2.0 build 140805 to V5.4.5 Build 160928, and DS-2CD63xx Series V5.0.9 build 140305 to V5.3.5 Build 160106 devices. The improper authentication vulnerability occurs when an application does not adequately or correctly authenticate users. This may allow a malicious user to escalate his or her privileges on the system and gain access to sensitive information.'
        ],
        [
            pd.NA
        ],
        [
            'An Improper Limitation of a Pathname to a Restricted Directory ("Path Traversal") in Fortinet FortiOS 6.0.0 to 6.0.4, 5.6.3 to 5.6.7 and 5.4.6 to 5.4.12 and FortiProxy 2.0.0, 1.2.0 to 1.2.8, 1.1.0 to 1.1.6, 1.0.0 to 1.0.7 under SSL VPN web portal allows an unauthenticated attacker to download system files via special crafted HTTP resource requests.'
        ],
        [
            'An issue was discovered in Citrix Application Delivery Controller (ADC) and Gateway 10.5, 11.1, 12.0, 12.1, and 13.0. They allow Directory Traversal.'
        ],
        [
            'The Treck TCP/IP stack before 6.0.1.66 allows Remote Code Execution, related to IPv4 tunneling.',
            'The Treck TCP/IP stack before 6.0.1.66 improperly handles an IPv4/ICMPv4 Length Parameter Inconsistency, which might allow remote attackers to trigger an information leak.',
            'The Treck TCP/IP stack before 6.0.1.66 has an IPv6 Out-of-bounds Read.',
            'The Treck TCP/IP stack before 6.0.1.66 allows Remote Code execution via a single invalid DNS response.',
        ],
        [
            pd.NA
        ],
        [
            pd.NA
        ],
        [
            pd.NA
        ],
        [
            'The (1) TLS and (2) DTLS implementations in OpenSSL 1.0.1 before 1.0.1g do not properly handle Heartbeat Extension packets, which allows remote attackers to obtain sensitive information from process memory via crafted packets that trigger a buffer over-read, as demonstrated by reading private keys, related to d1_both.c and t1_lib.c, aka the Heartbleed bug'
        ],
    ]
}

