# This script scrapes Python-related vulnerabilities from the IBM X-force Exchange

Import required libraries 

In [1]:
import json
import requests
from requests.auth import HTTPBasicAuth
import pandas as pd

The authentication credentials
Note: The URL query (q) = Python

In [2]:
API_KEY = "your_api_key"
API_PASSWORD = "your_api_password"
URL = "https://api.xforce.ibmcloud.com/vulnerabilities/fulltext?q=Python"

In [3]:
#function to get data from remote
def get_data_remote(URL):
    python_vulnerabilities_IBM = requests.get(URL, auth=HTTPBasicAuth(API_KEY, API_PASSWORD))
    python_vulnerabilities_IBM_json = json.loads(python_vulnerabilities_IBM.text)
    
    return python_vulnerabilities_IBM_json

In [5]:
python_vulnerabilities_IBM_json

{'total_rows': 393,
 'bookmark': 'g1AAAAMIeJzLYWBg4MhgTmFQS0lKzi9KdUhJMjTRy0zK1a1Iyy9KTjUwMNRLzskvTUnMK9HLSy3JAapnSmRIkv___38WmJMLJESMDAxNdQ1MdQ0tQwwMrMAoKomBwZctC81sc0JmJykAySR7bMab6BoaoBgvpYluvClB4x1Axsdjd72RIYrxPPvRjTcmaHwCyPh6HIFjgWK8RjvYeFW48QTDPY8FSDI0ACmgBfMxbDDWNTJGsUFvA8nBD7FiAcSK_UREwW00TxBpwwGIDfexBRNqLHiIkhdMDyA2YESEoYGukTmqDS5ZWQAuFdVV',
 'rows': [{'type': 'vulnerability',
   'xfdbid': 195401,
   'updateid': 108060,
   'inserted': True,
   'variant': 'single',
   'title': 'Identity Python PySAML2 security bypass',
   'description': 'Identity Python PySAML2 could allow a remote attacker to bypass security restrictions, caused by not validating the SAML document against an XML schema. By persuading a victim to open a specially-crafted XML document, an attacker could exploit this vulnerability to process invalid SAML XML documents.',
   'risk_level': 6.5,
   'cvss': {'version': '3.0',
    'privilegesrequired': 'None',
    'userinteraction': 'Required',
    'scope': 'Unchanged

The response is locally saved (just copy and paste into a Notepad and saved it with a `.json` extension) after the first data pull so that I do not need to make the HTTP request in subsequent times as I explore the data.
#### Import the required libraries in the first cell before continuing from here

In [2]:
#Declare the constants - the file paths to loading and saving the data
LOAD_DATA_PATH = r"C:\Users\Semiu\Documents\python-codesecurity\data\PythonvulfromIBM.json"
SAVE_DATA_PATH = r"C:\Users\Semiu\Documents\python-codesecurity\data\extractedpyvulIBM.csv"

In [3]:
#function to load data from the local machine
def load_data (datapath):
    with open(datapath) as vulData:
        vulData_json = json.load(vulData)
    return vulData_json

In [7]:
#Function to extract data of interest from the loaded data
def extract_data (loaded_data):
    
     #initialize the data dictionary  
    ibm_vuldata = {'title': [], 'description': [], 'exploitability': [], 'risk_level': [], 'cve_id': [], 'platform':[], 'consequences': [], 'privilege':[], 'access_vector':[], 'access_complexity':[], 'confidentiality_impact': [], 'integrity_impact': [], 'availability_impact':[]}
    
    for row in loaded_data['rows']:
        ibm_vuldata['title'].append(row['title'])
        ibm_vuldata['description'].append(row['description'])
        ibm_vuldata['exploitability'].append(row['exploitability'])
        ibm_vuldata['risk_level'].append(row['risk_level'])
        
        #For unexpected missing values observed
        if 'stdcode' in row:
            ibm_vuldata['cve_id'].append(row['stdcode'][0])
        else:
            ibm_vuldata['cve_id'].append(None)
        
        if 'platforms_affected' in row:
            ibm_vuldata['platform'].append(row['platforms_affected'][0])
        else:
            ibm_vuldata['platform'].append(None)
        
        
        ibm_vuldata['consequences'].append(row['consequences'])
        
        if 'privilegesrequired' in row['cvss']:
            ibm_vuldata['privilege'].append(row['cvss']['privilegesrequired'])
        else:
            ibm_vuldata['privilege'].append(None)
        
        ibm_vuldata['access_vector'].append(row['cvss']['access_vector'])
        ibm_vuldata['access_complexity'].append(row['cvss']['access_complexity'])
        ibm_vuldata['confidentiality_impact'].append(row['cvss']['confidentiality_impact'])
        ibm_vuldata['integrity_impact'].append(row['cvss']['integrity_impact'])
        ibm_vuldata['availability_impact'].append(row['cvss']['availability_impact'])
            
    return ibm_vuldata

In [5]:
#function to save the data in csv to local machine
def save_data_tocsv (ibm_vuldata):
    
    IBM_vuldata_frame = pd.DataFrame(ibm_vuldata)
    IBM_vuldata_frame.to_csv(SAVE_DATA_PATH, encoding='utf-8')

In [8]:
#Calling the three functions in a statement - functional programming aye
#when data is loaded from the local
save_data_tocsv(extract_data (load_data(LOAD_DATA_PATH)))

In [None]:
#when data is loaded from the remote
save_data_tocsv(extract_data (get_data_remote(URL)))