## Imports and constants

In [1]:
import os
import re
import subprocess
import json
import requests
import time
import pymongo
from datetime import datetime

In [2]:
with open('config.json', 'r') as file:
    config = json.load(file)

api_key_nist = config['api_key_nist']
api_key_cvedetails = config['api_key_cvedetails']
mongo_uri = config['mongo_uri']
mongo_vulns_key = config['mongo_vulns_key']
mongo_vulns_url = config['mongo_vulns_url']

In [3]:
client = pymongo.MongoClient(mongo_uri)
    
# Select the database
db = client["FinalProject"]

# Collection for store software data
softwares_clt = db["Softwares"]

## Functions

In [4]:
def restructure_cvss_metrics(infoVulnJSON, metricVersion):
    """
    Store in a JSON (cvssMetricInfo) the organized data about cvss metrics
    
    Args: 
        infoVulnJSON: JSON with data still unorganized 
        metricVersion: especifies the cvss version which was calculated the metrics

    Returns:
        JSON: organized data about cvss metrics which contains:
            baseSeverity - baseScore - vertorString and its details - exploitabilityScore - impactScore - type - version cvss Metrics
    """
    cvssMetricInfo = {}
    cvssMetricInfo['baseScore'] = infoVulnJSON[0]['cvssData']['baseScore']
    # Depends cvss version used for metrics the baseSeverity is in diferent JSON's place
    if metricVersion == 31 or metricVersion == 30:
        cvssMetricInfo['baseSeverity'] = infoVulnJSON[0]['cvssData']['baseSeverity']
    else:
        cvssMetricInfo['baseSeverity'] = infoVulnJSON[0]['baseSeverity']
    # Remaining cvss metrics is equally located in any version
    cvssMetricInfo['exploitabilityScore'] = infoVulnJSON[0]['exploitabilityScore']
    cvssMetricInfo['impactScore'] = infoVulnJSON[0]['impactScore']
    cvssMetricInfo['type'] = infoVulnJSON[0]['type']
    cvssMetricInfo['cvssDetailedData'] = infoVulnJSON[0]['cvssData']
    # Deleting repeated info
    del cvssMetricInfo['cvssDetailedData']['baseScore']
    if metricVersion == 31 or metricVersion == 30:
        del cvssMetricInfo['cvssDetailedData']['baseSeverity']
    # Return final restructured JSON
    return cvssMetricInfo

In [5]:
def get_vulnerability_info(vulnId):
    """
    Obtain the info provided by services.nvd.nist.gov about the vulnerability, then store the relevant info in a JSON
    
    Args: 
        vulnId: That's the vulnerability identifier for the endpoint request.

    Returns:
        vulnInfo: JSON that contains relevant info about the vulnerability.
    """
    print('VulnName a pasar = '+ vulnId) 
    endPoint = f'https://services.nvd.nist.gov/rest/json/cves/2.0?cveId={vulnId}'
    headers = {
        'apiKey': api_key_nist
    }
    infoVuln = requests.get(endPoint,headers = headers)
    try:
        infoVulnJSON = infoVuln.json()
    except ValueError:
        print('Error, respuesta recibida fue: ')
        print(infoVuln.json())
        return
    infoVulnJSON = infoVulnJSON['vulnerabilities'][0]['cve']
    cvssMetricInfo = {}
    if 'cvssMetricV31' in infoVulnJSON['metrics']:
        cvssMetricInfo = restructure_cvss_metrics(infoVulnJSON['metrics']['cvssMetricV31'],31)
    elif 'cvssMetricV30' in infoVulnJSON['metrics']:
        cvssMetricInfo = restructure_cvss_metrics(infoVulnJSON['metrics']['cvssMetricV30'],30)
    elif 'cvssMetricV2' in infoVulnJSON['metrics']:
        cvssMetricInfo = restructure_cvss_metrics(infoVulnJSON['metrics']['cvssMetricV2'],2)
    
    vulnInfo = {
        "CVE_ID": vulnId,
        "description": infoVulnJSON['descriptions'][0]['value'],
        "vulnStatus": infoVulnJSON['vulnStatus'],
        "metrics": cvssMetricInfo,
    }
    return vulnInfo

In [6]:
#Finish
def get_cve_per_software(productName):
    url = f"https://www.cvedetails.com/api/v1/vulnerability/search?outputFormat=json&productName={productName}&pageNumber=1&resultsPerPage=20"
    
    headers = {
      'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0',
      'Authorization': f'Bearer {api_key_cvedetails}'
    }
    tryRequest = True
    while(tryRequest):   
        session = requests.Session()
        response = session.get(url, headers=headers)
        responseJSON = response.json()
        if "errors" in responseJSON:
            print("time exceded waiting 1 minute")
            time.sleep(60)
        else:
            print("cveList request of " + productName + " done")
            tryRequest = False
    cveList = []
    if len(responseJSON['results']) < 1:
        return "No vulnerabilities registered"
    for cve in responseJSON['results']:
        cveInfo = get_vulnerability_info(cve['cveId'])
        cveAttackDetails = {k:v for k, v in cve.items() if k.startswith('is')}
        cveInfo['AttackDetails'] = cveAttackDetails
        cveList.append(cveInfo)

    # Create the 
    cveIDList = [cve['CVE_ID'] for cve in cveList]

    return cveList

In [7]:
def filter_vulns_data(data):
    print(json.dumps(data,indent=4))

In [8]:
def get_data_filtered():
    """
    Comments
    """
    data = get_vulns_data()
    data_fiiltered = filter_vulns_data(data)
    #mac_address = get_mac_address(os)
    #os_version = get_software_version(os)
    #oftware_list = get_software_list(os)

    #return {
    #    "id": mac_address,
    #    "os": os_version['so'],
    #    "osVersion": os_version['so_version'],
    #    "scanDate": datetime.now().isoformat(),
    #    "softwares": software_list
    #}

In [9]:
def insert_software_with(software):
    """
    Insert one software in the Atlas MongoDB
    Args: software: JSONs with the info to insert
    """
    try:
        softwares_clt.insert_one(software)
        print("Software inserted in the database")
    except Exception as e:
        print(f"Error: {e}")

    

## Execution

In [10]:
def get_vulns_data():
    """
    Comments
    Try to request the data in MongoDB, if it doesn't respond it uses the temporary data file.
    """
    
    url = mongo_vulns_url
     
    payload = "{\r\n    \"collection\":\"softwareByIP\",\r\n    \"database\":\"vulnsData\",\r\n    \"dataSource\":\"Cluster0\",\r\n    \"filter\": {}\r\n}'"
    headers = {
      'Content-Type': 'application/ejson',
      'apiKey': mongo_vulns_key
    }
    response =  requests.request("POST", url, headers=headers, data=payload)
    if response.status_code == 200:
        return response.json()
    else:
        with open('data_temp.json', 'r') as file:
            return json.load(file)


In [11]:
def main():
    try:
        data_filtered = get_data_filtered()
        print(json.dumps(data_filtered, indent=4))
    except Exception as e:
        print(f"Failed to retrieve or process data: {e}")

In [12]:
#if __name__ == "__main__":
#    main()

In [13]:
data = get_vulns_data()

In [14]:
#print(json.dumps(data,indent=4))

In [15]:
def get_software_list(data):
    # Flatten the list of software names using a list comprehension.
    return [software['Name'] for document in data['documents'] for software in document['softwareData']]

software_list = get_software_list(data)

# Create a list of dictionaries, each representing a document with its details.
all_documents = [
    {
        'id': document['id'],
        'hostname': document['hostname'],
        'os': document['operativeSystem'],
        'osVersion': document['version'],
        'scanDate': datetime.now().isoformat(),
        'softwares': software_list
    } for document in data['documents']
]


In [16]:
print(json.dumps(all_documents,indent=4))

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)

