Import

In [16]:
import requests
import pandas as pd
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup

document_info_index = 0

vulnerability_db = {"VulnerabilityID":[],"DocumentIDFK":[], "Ordinal":[], "CVE":[], "Title":[]}
vulnerability_db_status = {"VulnerabilityFK":[], "StatusType": [], "ProductID":[]}
vulnerability_db_notes = {"VulnerabilityFK":[], "Title":[], "Type":[], "Ordinal":[], "Note":[]}
vulnerability_db_threats = {"VulnerabilityFK":[], "Type":[], "Description":[], "ProductID":[]}
vulnerability_db_score_set = {"VulnerabilityFK":[],"BaseScore":[], "TemporalScore":[], "Vector":[], 'ProductID':[]}
vulnerability_db_acknowledgment= {"VulnerabilityFK":[], "Name":[], "URL":[]}
vulnerability_db_revision = {"VulnerabilityFK":[], "Number":[], 'Date':[], 'Description':[]}

productdb = {'ProductID':[], 'ProductName':[], 'Type':[], 'Name':[]}

notes_db = {"NoteID": [], "Title":[], "Audience":[], "Type":[], "Ordinal":[]}

document_info_db = {"DocumentID":[], "ID":[], "Alias":[], 'Status':[], "Version":[], "RevisionHistoryNumber":[], "RevisionHistoryDate":[], 
                    "RevisionHistoryDescription":[], "InitialReleaseDate":[], "CurrentReleaseDate":[], 'Pubishertype':[], 'ContactDetails':[], 'IssuingAuthority':[], 
                    'DocumentTitle':[], 'DocumentType':[], 'vuln':[],'dc':[],'cvrf-common':[],'prod':[],'scap-core':[],'cvssv2':[],'cpe-lang':[],'sch':[],'cvrf':[]}


#ProductTree

In [17]:
def product_tree(soup, productdb):
    library = soup.find('ProductTree')

    def process_node(node, productdb):

        if node.name is not None:
            if 'ProductID' in node.attrs:
                productdb['ProductID'].append(node.attrs['ProductID'])
                productdb['ProductName'].append(node.text)
            
            if 'Type' not in node.attrs and node.name != 'ProductTree':
                if 'Type' in node.parent.attrs:
                    productdb['Type'].append(node.parent.attrs['Type'])
                    productdb['Name'].append(node.parent.attrs['Name'])
                else:
                    productdb['Type'].append(None)
                    productdb['Name'].append(None)

            for child in node.children:
                process_node(child, productdb)


    process_node(library, productdb)



#Vulnerability

In [18]:

def Get_Vulnerability(soup, vulnerability_db, vulnerability_db_status, vulnerability_db_notes, vulnerability_db_threats, vulnerability_db_score_set,vulnerability_db_acknowledgment, vulnerability_db_revision):
    def process_vulnerability(node, vulnerability_db, vulnerability_index):
        vulnerability_db['Ordinal'].append(node.get('Ordinal'))
        vulnerability_db['Title'].append(node.find('Title').text if node.find('Title').text else None)
        vulnerability_db['CVE'].append(node.find('CVE').text)
        vulnerability_db['VulnerabilityID'].append(vulnerability_index+1)

    def process_status(node, vulnerability_db_status, vulnerability_index):
        vulnerability_db_status['ProductID'].append(node.text)
        vulnerability_db_status['StatusType'].append(node.parent.get('Type'))
        vulnerability_db_status['VulnerabilityFK'].append(vulnerability_index)

    def process_notes(node, vulnerability_db_notes, vulnerability_index):
        vulnerability_db_notes['VulnerabilityFK'].append(vulnerability_index)
        vulnerability_db_notes['Title'].append(node.get('Title'))
        vulnerability_db_notes['Type'].append(node.get('Type'))
        vulnerability_db_notes['Ordinal'].append(node.get('Ordinal'))
        vulnerability_db_notes['Note'].append(node.text)

    def process_threats(node, vulnerability_db_threats, vulnerability_index):
        vulnerability_db_threats['VulnerabilityFK'].append(vulnerability_index)
        vulnerability_db_threats['Type'].append(node.get('Type'))
        product_id = node.find('ProductID')
        vulnerability_db_threats['ProductID'].append(product_id.text if product_id else None)
        description = node.find('Description')
        vulnerability_db_threats['Description'].append(description.text if description else None)

    def process_score_set(node, vulnerability_db_score_set, vulnerability_index):
        vulnerability_db_score_set['VulnerabilityFK'].append(vulnerability_index)
        vulnerability_db_score_set['BaseScore'].append(node.find('BaseScore').text)
        vulnerability_db_score_set['TemporalScore'].append(node.find('TemporalScore').text)
        vulnerability_db_score_set['Vector'].append(node.find('Vector').text)
        vulnerability_db_score_set['ProductID'].append(node.find('ProductID').text)

    def process_acknowledgment(node, vulnerability_db_acknowledgment, vulnerability_index):
        vulnerability_db_acknowledgment['VulnerabilityFK'].append(vulnerability_index)
        name = node.find('Name')
        vulnerability_db_acknowledgment['Name'].append(name.text if name and name.text else None)
        url = node.find('URL')
        vulnerability_db_acknowledgment['URL'].append(url.text if url and url.text else None)

    def process_revision(node, vulnerability_db_revision, vulnerability_index):
        vulnerability_db_revision['VulnerabilityFK'].append(vulnerability_index)
        vulnerability_db_revision['Number'].append(node.find('Number').text)
        vulnerability_db_revision['Date'].append(node.find('Date').text)
        vulnerability_db_revision['Description'].append(node.find('Description').text)


    def vulnerability(node, vulnerability_db):
        if node.name is None:
            return
        
        vulnerability_index = len(vulnerability_db['Ordinal']) - 1
        

        if node.name == 'Vulnerability' and 'Ordinal' in node.attrs:
            process_vulnerability(node, vulnerability_db, vulnerability_index)
        elif node.name == 'ProductID' and node.parent.name == "Status":
            process_status(node, vulnerability_db_status, vulnerability_index)
        elif node.name == 'Note' and node.parent.name == "Notes":
            process_notes(node, vulnerability_db_notes, vulnerability_index)
        elif node.name == 'Threat' and node.parent.name == "Threats":
            process_threats(node, vulnerability_db_threats, vulnerability_index)
        elif node.name == 'ScoreSet' and node.parent.name == "CVSSScoreSets":
            process_score_set(node, vulnerability_db_score_set, vulnerability_index)
        elif node.name == 'Acknowledgment' and node.parent.name == 'Acknowledgments':
            process_acknowledgment(node, vulnerability_db_acknowledgment, vulnerability_index)
        elif node.name == 'Revision' and node.parent.name == 'RevisionHistory':
            process_revision(node, vulnerability_db_revision, vulnerability_index)

        for child in node.children:
            vulnerability(child, vulnerability_db)


    lib = soup.find('cvrfdoc').children
    for child in lib:
        if child.name == "Vulnerability":
            vulnerability_db['DocumentIDFK'].append(document_info_index)
            vulnerability(child, vulnerability_db)





#DocumentNotes

In [19]:

def Get_DocumentNotes(soup, notes_db):
    def document_notes(node, notes_db):
        if node.name is None:
            return
        notes_index = len(notes_db['Audience'])
        if node.name == 'Note' and node.parent.name == 'DocumentNotes':
            notes_db["Title"].append(node.get('Title'))
            notes_db["Audience"].append(node.get('Audience'))
            notes_db["Type"].append(node.get('Type'))
            notes_db["Ordinal"].append(node.get('Ordinal'))
            notes_db["NoteID"].append(notes_index)
        
        for child in node.children:
            document_notes(child, notes_db) 
        

    lib = soup.find('cvrfdoc').children
    for child in lib:
        if child.name == "DocumentNotes":
            document_notes(child, notes_db)


#DocumentTracking + DocumentPublisher + DocumentType + DocumentTitle + attrs

In [20]:
def Doc_Info(soup, document_info_db):
    def documen_tracking(node,document_info_db):
        if node.name is None:
            return
        
        if node.name == "ID" and node.parent.name == "Identification":
            document_info_db["ID"].append(node.text if node.text else None)
            document_info_db['DocumentID'].append(document_info_index)
        elif node.name == "Alias" and node.parent.name == "Identification":
            document_info_db["Alias"].append(node.text if node.text else None)
        elif node.name == "Status" and node.parent.name == "DocumentTracking":
            document_info_db["Status"].append(node.text if node.text else None)
        elif node.name == "Version" and node.parent.name == "DocumentTracking":
            document_info_db["Version"].append(node.text if node.text else None)
        elif node.name == "Number" and node.parent.name == "Revision":
            document_info_db["RevisionHistoryNumber"].append(node.text if node.text else None)
        elif node.name == "Date" and node.parent.name == "Revision":
            document_info_db["RevisionHistoryDate"].append(node.text if node.text else None)
        elif node.name == "Description" and node.parent.name == "Revision":
            document_info_db["RevisionHistoryDescription"].append(node.text if node.text else None)
        elif node.name == "InitialReleaseDate" and node.parent.name == "DocumentTracking":
            document_info_db["InitialReleaseDate"].append(node.text if node.text else None)
        elif node.name == "CurrentReleaseDate" and node.parent.name == "DocumentTracking":
            document_info_db["CurrentReleaseDate"].append(node.text if node.text else None)
    
    
        for child in node.children:
            documen_tracking(child, document_info_db) 

    def documen_publisher(node,document_info_db):
        if node.name is None:
            return
        
        if node.name == 'DocumentPublisher' and node.parent.name == 'cvrfdoc':
            document_info_db['Pubishertype'].append(node.get('Type'))
        if node.name == 'ContactDetails' and node.parent.name == 'DocumentPublisher':
            document_info_db['ContactDetails'].append(node.text if node.text else None)
        if node.name == 'IssuingAuthority' and node.parent.name == 'DocumentPublisher':
            document_info_db['IssuingAuthority'].append(node.text if node.text else None)

        for child in node.children:
            documen_publisher(child, document_info_db) 

    lib = soup.find('cvrfdoc')

    document_info_db['vuln'].append(lib.get('xmlns:vuln'))
    document_info_db['dc'].append(lib.get('xmlns:dc'))
    document_info_db['cvrf-common'].append(lib.get('xmlns:cvrf-common'))
    document_info_db['scap-core'].append(lib.get('xmlns:scap-core'))
    document_info_db['prod'].append(lib.get('xmlns:prod'))
    document_info_db['cvssv2'].append(lib.get('xmlns:cvssv2'))
    document_info_db['cpe-lang'].append(lib.get('xmlns:cpe-lang'))
    document_info_db['sch'].append(lib.get('xmlns:sch'))
    document_info_db['cvrf'].append(lib.get('xmlns:cvrf'))

    lib = soup.find('cvrfdoc').children
    for child in lib:
        if child.name == "DocumentTracking":
            documen_tracking(child, document_info_db)
        if child.name == "DocumentPublisher":
            documen_publisher(child, document_info_db)
        if child.name == 'DocumentTitle':
            document_info_db['DocumentTitle'].append(child.text if child.text else None)
        if child.name == 'DocumentType':
            document_info_db['DocumentType'].append(child.text if child.text else None)



In [21]:
# doc_month_array = ["Aug","Sep",'Oct', "Nov", "Dec", "Jan", 'Feb', "Mar", "Apr", "May", "Jun", "Jul"]
# doc_year_array = ["2022", '2023', '2024']

for i in ["2022"]:
    for j in ["Aug"]:
        soup = ""
        url = f'https://api.msrc.microsoft.com/cvrf/v3.0/cvrf/{i}-{j}'

        # Получаем данные по ссылке
        response = requests.get(url)

        # Проверка успешности запроса
        if response.status_code == 200:
            print(f"{i}-{j}")
            document_info_index +=1
            soup = BeautifulSoup(response.content, "xml")
            product_tree(soup, productdb)
            Get_Vulnerability(soup, vulnerability_db, vulnerability_db_status, vulnerability_db_notes, vulnerability_db_threats, vulnerability_db_score_set,vulnerability_db_acknowledgment, vulnerability_db_revision)
            Get_DocumentNotes(soup, notes_db)
            Doc_Info(soup, document_info_db)


            
        else:
            print("Не удалось получить данные, статус код:", response.status_code)
    

    

productdb = pd.DataFrame(productdb)
vulnerability_db = pd.DataFrame(vulnerability_db)
vulnerability_db_status = pd.DataFrame(vulnerability_db_status)
vulnerability_db_notes = pd.DataFrame(vulnerability_db_notes)
vulnerability_db_threats = pd.DataFrame(vulnerability_db_threats)
vulnerability_db_score_set = pd.DataFrame(vulnerability_db_score_set)
vulnerability_db_acknowledgment = pd.DataFrame(vulnerability_db_acknowledgment)
vulnerability_db_revision = pd.DataFrame(vulnerability_db_revision)
notes_db = pd.DataFrame(notes_db)
document_info_db = pd.DataFrame(document_info_db)

print(vulnerability_db)
            

2022-Aug
     VulnerabilityID  DocumentIDFK Ordinal               CVE  \
0                  0             1       0  CVE-2017-1000232   
1                  1             1       2    CVE-2021-33643   
2                  2             1       1    CVE-2021-28861   
3                  3             1       4    CVE-2021-33645   
4                  4             1      11     CVE-2021-3764   
..               ...           ...     ...               ...   
224              224             1      67     CVE-2022-2856   
225              225             1      68     CVE-2022-2857   
226              226             1      69     CVE-2022-2858   
227              227             1      70     CVE-2022-2860   
228              228             1      71     CVE-2022-2861   

                                                 Title  
0                                                 None  
1                                                 None  
2                                                 N

In [22]:


productdb = productdb.head(10)
vulnerability_db = vulnerability_db.head(10)
vulnerability_db_status = pd.merge(vulnerability_db, vulnerability_db_status, left_on = "VulnerabilityID", right_on='VulnerabilityFK', how='inner')
vulnerability_db_score_set = pd.merge(vulnerability_db, vulnerability_db_score_set, left_on = "VulnerabilityID", right_on='VulnerabilityFK', how='inner')
vulnerability_db_acknowledgment= pd.merge(vulnerability_db, vulnerability_db_acknowledgment, left_on = "VulnerabilityID", right_on='VulnerabilityFK', how='inner')
vulnerability_db_notes= pd.merge(vulnerability_db, vulnerability_db_notes, left_on = "VulnerabilityID", right_on='VulnerabilityFK', how='inner')
vulnerability_db_threats= pd.merge(vulnerability_db, vulnerability_db_threats, left_on = "VulnerabilityID", right_on='VulnerabilityFK', how='inner')
vulnerability_db_revision= pd.merge(vulnerability_db, vulnerability_db_revision, left_on = "VulnerabilityID", right_on='VulnerabilityFK', how='inner')

vulnerability_db_status.drop(columns='VulnerabilityFK', inplace=True)
vulnerability_db_score_set.drop(columns='VulnerabilityFK', inplace=True)
vulnerability_db_acknowledgment.drop(columns='VulnerabilityFK', inplace=True)
vulnerability_db_notes.drop(columns='VulnerabilityFK', inplace=True)
vulnerability_db_threats.drop(columns='VulnerabilityFK', inplace=True)
vulnerability_db_revision.drop(columns='VulnerabilityFK',  inplace=True)


result = pd.merge(vulnerability_db_status, vulnerability_db_score_set, on= 'VulnerabilityID', how='inner')



print(result)


     VulnerabilityID  DocumentIDFK_x Ordinal_x             CVE_x Title_x  \
0                  0               1         0  CVE-2017-1000232    None   
1                  0               1         0  CVE-2017-1000232    None   
2                  0               1         0  CVE-2017-1000232    None   
3                  0               1         0  CVE-2017-1000232    None   
4                  1               1         2    CVE-2021-33643    None   
..               ...             ...       ...               ...     ...   
167                9               1         6    CVE-2021-33655    None   
168                9               1         6    CVE-2021-33655    None   
169                9               1         6    CVE-2021-33655    None   
170                9               1         6    CVE-2021-33655    None   
171                9               1         6    CVE-2021-33655    None   

         StatusType ProductID_x  DocumentIDFK_y Ordinal_y             CVE_y  \
0    Kno