# This script scrapes Java-related vulnerabilities from the IBM X-force Exchange

Import required libraries 

In [18]:
import json
import requests
from requests.auth import HTTPBasicAuth
import pandas as pd

The authentication credentials
Note: The URL query (q) = Java

In [19]:
API_KEY = "your_api_key"
API_PASSWORD = "your_api_password"
URL = "https://api.xforce.ibmcloud.com/vulnerabilities/fulltext?q=Java"

In [20]:
#Load data from remote
def load_data_from_remote (URL):
    java_vulnerabilities_IBM = requests.get(URL, auth=HTTPBasicAuth(API_KEY, API_PASSWORD))
    java_vulnerabilities_IBM_json = json.loads(java_vulnerabilities_IBM.text)
    
    return java_vulnerabilities_IBM_json

In [7]:
java_vulnerabilities_IBM_json

{'total_rows': 1961,
 'bookmark': 'g1AAAAMKeJzLYWBg4MhgTmFQS0lKzi9KdUhJMjTRy0zK1a1Iyy9KTjUwMNRLzskvTUnMK9HLSy3JAapnSmRIkv___38WmJMLJESMDAwtdQ3MdA0NQgwMrMAoKomBwWdjFprZ5oTMTlIAkkn22Iw30TUwRjHelQndeEOCxjuAjI8nzngR0o1PABlfj814c10DSxTjPczQjTciZHweC5BkaABSQBvmE_aByyp0K4yJs2IBxIr9RATSXbAVqnArzIiz4QDEhvtYbDA0CzE0Q9gQaIvuCYIJFGLFA4gV2GIa3Qq3rCwAtQXWXw',
 'rows': [{'type': 'vulnerability',
   'xfdbid': 196239,
   'updateid': 109671,
   'inserted': True,
   'variant': 'single',
   'title': 'JetBrains Kotlin information disclosure',
   'description': 'JetBrains Kotlin could allow a local authenticated attacker to obtain sensitive information, caused by an insecure permission flaw when creating temporary file and folder by the Java API. By gaining access to the temporary directory, an attacker could exploit this vulnerability to obtain sensitive information, and use this information to launch further attacks against the affected system.',
   'risk_level': 5.5,
   'cvss': {'version': '3.0',
    'pr

The response is locally saved (just copy and paste into a Notepad and saved it with a `.json` extension) after the first data pull so that I do not need to make the HTTP request in subsequent times as I explore the data.

In [21]:
LOAD_DATA_PATH = r"C:\Users\Semiu\Documents\java-codesecurity\JavaVulData\JavavulFromIBM.json"
SAVE_DATA_PATH = r"C:\Users\Semiu\Documents\java-codesecurity\JavaVulData\extractedJavaVul.csv"

In [9]:
#function to load data from the local machine
def load_data (datapath):
    with open(datapath) as vulData:
        vulData_json = json.load(vulData)
    return vulData_json

In [14]:
#Understanding the data type of the data read from the local machine. This is different from type (buffer) 
#when the data is a response from a HTTP get request 
type(vulData)

_io.TextIOWrapper

In [15]:
#Viewing data
vulData_json

{'total_rows': 1961,
 'bookmark': 'g1AAAAMIeJzLYWBg4MhgTmFQS0lKzi9KdUhJMjTTy0zK1a1Iyy9KTjUwMNRLzskvTUnMK9HLSy3JAapnSmRIkv___38WmJMLJESMDAwtdQ3MdA0NQgwMrMAoKomBwWdjFshsVbjZloSMTlIAkkn22Ew30TUwRjHdlSkLzeWmBI13ABkfT5zxQujGGxI0PgFkfD024811DSxRjPcwQwsbE0Km57EASYYGIAW0YD5hD7gsR_eAMXFWLIBYsZ-IMLqF5gmCMQCx4QDEhvtE24DkCSLD6QHECmwRbWgWYmiGsCLQLSsLAJ221oI',
 'rows': [{'type': 'vulnerability',
   'xfdbid': 196239,
   'updateid': 109671,
   'inserted': True,
   'variant': 'single',
   'title': 'JetBrains Kotlin information disclosure',
   'description': 'JetBrains Kotlin could allow a local authenticated attacker to obtain sensitive information, caused by an insecure permission flaw when creating temporary file and folder by the Java API. By gaining access to the temporary directory, an attacker could exploit this vulnerability to obtain sensitive information, and use this information to launch further attacks against the affected system.',
   'risk_level': 5.5,
   'cvss': {'version': '3.0',
    'p

In [16]:
type (vulData_json)

dict

In [22]:
#Function to extract data of interest from the loaded data
def extract_data (loaded_data):
    
     #initialize the data dictionary  
    ibm_vuldata = {'title': [], 'description': [], 'exploitability': [], 'risk_level': [], 'cve_id': [], 'platform':[], 'consequences': [], 'privilege':[], 'access_vector':[], 'access_complexity':[], 'confidentiality_impact': [], 'integrity_impact': [], 'availability_impact':[]}
    
    for row in loaded_data['rows']:
        ibm_vuldata['title'].append(row['title'])
        ibm_vuldata['description'].append(row['description'])
        ibm_vuldata['exploitability'].append(row['exploitability'])
        ibm_vuldata['risk_level'].append(row['risk_level'])
        
        #For unexpected missing values observed
        if 'stdcode' in row:
            ibm_vuldata['cve_id'].append(row['stdcode'][0])
        else:
            ibm_vuldata['cve_id'].append(None)
        
        if 'platforms_affected' in row:
            ibm_vuldata['platform'].append(row['platforms_affected'][0])
        else:
            ibm_vuldata['platform'].append(None)
        
        
        ibm_vuldata['consequences'].append(row['consequences'])
        ibm_vuldata['privilege'].append(row['cvss']['privilegesrequired'])
        ibm_vuldata['access_vector'].append(row['cvss']['access_vector'])
        ibm_vuldata['access_complexity'].append(row['cvss']['access_complexity'])
        ibm_vuldata['confidentiality_impact'].append(row['cvss']['confidentiality_impact'])
        ibm_vuldata['integrity_impact'].append(row['cvss']['integrity_impact'])
        ibm_vuldata['availability_impact'].append(row['cvss']['availability_impact'])
            
    return ibm_vuldata
        
    


In [23]:
#function to save the data in csv to local machine
def save_data_tocsv (ibm_vuldata):
    
    IBM_vuldata_frame = pd.DataFrame(ibm_vuldata)
    IBM_vuldata_frame.to_csv(SAVE_DATA_PATH, encoding='utf-8')

In [14]:
#Calling the three functions in a statement - functional programming aye
#when data is loaded from the local
save_data_tocsv(extract_data (load_data(LOAD_DATA_PATH)))

In [24]:
#when data is loaded from the remote
save_data_tocsv(extract_data (load_data_from_remote (URL)))

In [1]:
#-----END----#