In [30]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [31]:
#Had to make a function in order to go through the pages
def scrape_page(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    #empty dictionary
    cve_data = {
        "cve": [],
        "desc": [],
        "vendor_prod": [],
        "Ransomware": [],
        "Due Date": [],
        "Action": []
    }
    
    for article in soup.find_all("article", class_="c-teaser c-teaser--horizontal"):
        
        #CVE data
        cve_id = article.find("h3", class_="c-teaser__title").text.strip()
        cve_data['cve'].append(cve_id)
        
        #Description data
        description = article.find("div", class_="c-teaser__vuln-name").text.strip()
        cve_data['desc'].append(description)
        
        #Vendor Product Data
        vendor_product = article.find("div", class_="c-teaser__eyebrow").text.strip()
        cve_data['vendor_prod'].append(vendor_product)
        
        #Ransom
        action_elem = article.find("span", class_="c-teaser__kev-label", text="Action:")
        ransom = action_elem.find_next("li").text.strip() if action_elem and action_elem.find_next("li") else "Action not specified"
        cve_data['Ransomware'].append(ransom)
        
        #I didn't use this part
        ransomware_campaign_elem = article.find("span", class_="c-teaser__kev-label", text="Known To Be Used in Ransomware Campaigns?:")
        ransomware_campaign = ransomware_campaign_elem.find_next("li").text.strip() if ransomware_campaign_elem and ransomware_campaign_elem.find_next("li") else np.NaN
        
        #Due Date Data
        date_added_elem = article.find("span", class_="c-teaser__kev-label", text="Date Added:")
        due_date = date_added_elem.find_next("li").text.strip() if date_added_elem and date_added_elem.find_next("li") else "Date not specified"
        cve_data['Due Date'].append(due_date)
        
        #Action Data
        due_date_elem = article.find("span", class_="c-teaser__kev-label", text="Due Date:")
        action = due_date_elem.find_next("li").text.strip() if due_date_elem and due_date_elem.find_next("li") else "Due date not specified"
        cve_data['Action'].append(action)
        
        
    return cve_data

#Function to help collect data from the first five pages
def scrape_vulnerability_data():
    url = "https://www.cisa.gov/known-exploited-vulnerabilities-catalog"
    all_cve_data = {
        "cve": [],
        "desc": [],
        "vendor_prod": [],
        "Ransomware": [],
        "Due Date": [],
        "Action": []
    }
    
    for page_num in range(1, 6):
        page_url = f"{url}?page={page_num}"
        cve_data = scrape_page(page_url)
        for key in all_cve_data:
            all_cve_data[key].extend(cve_data[key])
    
    return all_cve_data


cve_data = scrape_vulnerability_data()



In [32]:
#Making Dataframe
df = pd.DataFrame(cve_data)
df

Unnamed: 0,cve,desc,vendor_prod,Ransomware,Due Date,Action
0,CVE-2024-1709,ConnectWise ScreenConnect Authentication Bypas...,ConnectWise | ScreenConnect,Known To Be Used in Ransomware Campaigns?: Known,Due Date: 2024-02-29,Action: Apply mitigations per vendor instructi...
1,CVE-2024-21410,Microsoft Exchange Server Privilege Escalation...,Microsoft | Exchange Server,Known To Be Used in Ransomware Campaigns?: Un...,Due Date: 2024-03-07,Action: Apply mitigations per vendor instructi...
2,CVE-2020-3259,Cisco ASA and FTD Information Disclosure Vulne...,Cisco | Adaptive Security Appliance (ASA) and ...,Known To Be Used in Ransomware Campaigns?: Known,Due Date: 2024-03-07,Action: Apply mitigations per vendor instructi...
3,CVE-2024-21351,Microsoft Windows SmartScreen Security Feature...,Microsoft | Windows,Known To Be Used in Ransomware Campaigns?: Un...,Due Date: 2024-03-05,Action: Apply mitigations per vendor instructi...
4,CVE-2024-21412,Microsoft Windows Internet Shortcut Files Secu...,Microsoft | Windows,Known To Be Used in Ransomware Campaigns?: Un...,Due Date: 2024-03-05,Action: Apply mitigations per vendor instructi...
...,...,...,...,...,...,...
95,CVE-2023-38831,RARLAB WinRAR Code Execution Vulnerability,RARLAB | WinRAR,Known To Be Used in Ransomware Campaigns?: Known,Due Date: 2023-09-14,Action: Apply mitigations per vendor instructi...
96,CVE-2023-32315,Ignite Realtime Openfire Path Traversal Vulner...,Ignite Realtime | Openfire,Known To Be Used in Ransomware Campaigns?: Un...,Due Date: 2023-09-14,Action: Apply mitigations per vendor instructi...
97,CVE-2023-38035,Ivanti Sentry Authentication Bypass Vulnerability,Ivanti | Sentry,Known To Be Used in Ransomware Campaigns?: Un...,Due Date: 2023-09-12,Action: Apply mitigations per vendor instructi...
98,CVE-2023-27532,Veeam Backup & Replication Cloud Connect Missi...,Veeam | Backup & Replication,Known To Be Used in Ransomware Campaigns?: Known,Due Date: 2023-09-12,Action: Apply mitigations per vendor instructi...


In [34]:
#Dataframe to CSV
df.to_csv('cve_data.csv', index = False)