In [58]:
import requests
import pandas as pd
from datetime import datetime
import logging

class Status(object):
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

    def __str__(self):
        return str(self.__dict__)

class EPSS():

  def __init__(self,api_url='https://api.first.org/data/v1/'):
    self.api_url = api_url
    self.raw_url = "https://epss.cyentia.com/"

  def validate_date(self,date_text):
    try:
        datetime.strptime(date_text, '%Y-%m-%d')
    except ValueError:
        raise ValueError("Incorrect data format, should be YYYY-MM-DD")

  def get_all(self,date:str=None)-> pd.DataFrame:
    if date is None:
      day_url = self.raw_url + 'epss_scores-current.csv.gz'
      date = datetime.today().strftime('%Y-%m-%d')
    elif type(date) is str:
        self.validate_date(date)
        day_url = self.raw_url + 'epss_scores-{date}.csv.gz'
    else:
      raise Exception('Date is invalid')
      
    epss_df = pd.read_csv(day_url,compression='gzip',sep=',')
    if len(epss_df) > 0 :
      logging.info('Done with total rows = %d' % len(epss_df))
      header = epss_df.iloc[0]
      if len(header)==2:
        version = header.index[0].split(':')[1]
        score_date = ''.join(header.index[1].split(':')[1:])
        epss_df.columns = epss_df.iloc[0]
        num_df = epss_df.iloc[1:].copy()
        del epss_df
        num_df['epss']=num_df['epss'].astype('float')
        num_df['percentile']=num_df['percentile'].astype('float')
        num_df['date']=date
        #num_df.set_index('cve',inplace=True)
        status = Status(version=version,score_date=score_date)
        return num_df,status
      else:
        raise Exception('EPSS format is malformed')

  def get(self,cve=None,envelope:bool=True,pretty:bool=False,offset:int=None,limit:int=None,order:bool=True,sort_fields:list=None,
          date:str=None,scope:str='public',epss_gt:float=None,epss_lt:float=None,fields:list=None,
          percentile_gt:float=None,percentile_lt:float=None,q:str=None)-> pd.DataFrame:

    '''
    Refer to parameters here: https://api.first.org/#Global-parameters
    '''
    url = self.api_url + 'epss'

    params = {}
    if type(cve) is list:
        params['cve'] = ','.join(cve)
    if type(cve) is str:
        params['cve'] = cve
    if type(sort_fields) is list:
        params['sort'] = ','.join(sort_fields)
    if type(fields) is list:
        params['fields'] = ','.join(fields)
    if type(date) is str:
        self.validate_date(date)
        params['date'] = date
    
    params['envelope']=envelope
    params['pretty']=pretty
    params['offset']=offset
    params['limit']=limit
    params['scope']=scope
    params['epss-gt']=epss_gt
    params['epss-lt']=epss_lt
    params['percentile-gt']=percentile_gt
    params['percentile-lt']=percentile_lt
    params['q']=q
 
    if order == False: params['order']='!epss'
    
    r = requests.get(url,params)

    if r.status_code == 200:
        data_status = r.json()

        if params['scope']=='public':
          df = pd.json_normalize(data_status, 'data')
          df.set_index('cve',inplace=True)
        elif params['scope']=='time-series':
          # iterate through each vulnerability
          df = pd.json_normalize(data_status, 'data')
          tmp = []
          for idx,ts in df.iterrows():
            tmp_df = pd.DataFrame(ts['time-series'])
            tmp_df['cve']=ts['cve']
            tmp_df = tmp_df.append(ts.drop(labels=['time-series']), ignore_index=True)
            #tmp_df = pd.concat([tmp_df,ts.drop(labels=['time-series'])])
            tmp.append(tmp_df)
          df = pd.concat(tmp)
          df.set_index('cve',inplace=True)
        else:
          raise Exception('Scope not supported')

        del data_status['data']
        if envelope is True:
          status = Status(**data_status)
        else:
          status = None
        return df,status
    else:
      raise Exception(f'HTTP errror {r.status_code}')



In [59]:
epss = EPSS()
data_df,status = epss.get(limit=10)
assert len(data_df)==10
assert data_df.columns.tolist()== ['epss', 'percentile', 'date']
data_df.index

Index(['CVE-2022-30524', 'CVE-2022-30335', 'CVE-2022-30334', 'CVE-2022-30333',
       'CVE-2022-30330', 'CVE-2022-30295', 'CVE-2022-30294', 'CVE-2022-30293',
       'CVE-2022-30292', 'CVE-2022-30288'],
      dtype='object', name='cve')

In [60]:
data_df,status = epss.get(sort_fields=['-epss'],limit=10)
data_df

Unnamed: 0_level_0,epss,percentile,date
cve,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CVE-2022-30524,0.01055,0.48889,2022-05-10
CVE-2022-30335,0.0089,0.27066,2022-05-10
CVE-2022-30334,0.0095,0.29049,2022-05-10
CVE-2022-30333,0.00885,0.24688,2022-05-10
CVE-2022-30330,0.00885,0.24688,2022-05-10
CVE-2022-30295,0.00885,0.24688,2022-05-10
CVE-2022-30294,0.00885,0.24688,2022-05-10
CVE-2022-30293,0.00885,0.24688,2022-05-10
CVE-2022-30292,0.00885,0.24688,2022-05-10
CVE-2022-30288,0.0089,0.27066,2022-05-10


In [61]:
data_df,status = epss.get(sort_fields=['+epss'],limit=10)
data_df

Unnamed: 0_level_0,epss,percentile,date
cve,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CVE-2022-30524,0.01055,0.48889,2022-05-10
CVE-2022-30335,0.0089,0.27066,2022-05-10
CVE-2022-30334,0.0095,0.29049,2022-05-10
CVE-2022-30333,0.00885,0.24688,2022-05-10
CVE-2022-30330,0.00885,0.24688,2022-05-10
CVE-2022-30295,0.00885,0.24688,2022-05-10
CVE-2022-30294,0.00885,0.24688,2022-05-10
CVE-2022-30293,0.00885,0.24688,2022-05-10
CVE-2022-30292,0.00885,0.24688,2022-05-10
CVE-2022-30288,0.0089,0.27066,2022-05-10


In [62]:
data_df,status = epss.get(fields=['cve','epss'],limit=5)
data_df

Unnamed: 0_level_0,epss
cve,Unnamed: 1_level_1
CVE-2022-30524,0.01055
CVE-2022-30335,0.0089
CVE-2022-30334,0.0095
CVE-2022-30333,0.00885
CVE-2022-30330,0.00885


In [63]:
data_df,status = epss.get(order=False)
data_df.head(2)

Unnamed: 0_level_0,epss,percentile,date
cve,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CVE-2021-40438,0.97224,1.0,2022-05-10
CVE-2017-12635,0.97109,0.99999,2022-05-10


In [64]:
data_df,status = epss.get(epss_lt=0.0088)
data_df.shape

(1, 3)

In [65]:
data_df,status = epss.get(epss_gt=0.95)
data_df.shape

(70, 3)

In [66]:
data_df,status = epss.get(percentile_gt=0.98)
data_df.shape

(100, 3)

In [67]:
data_df,status = epss.get(percentile_lt=0.1)
data_df.shape

(1, 3)

In [68]:
data_df,status = epss.get(cve='CVE-2022-27225',envelope=False)
data_df

Unnamed: 0_level_0,epss,percentile,date
cve,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CVE-2022-27225,0.00885,0.24688,2022-05-10


In [69]:
status

In [70]:
cve_list = ['CVE-2022-27225','CVE-2022-27223','CVE-2022-27218']
data_df,status = epss.get(cve=cve_list)
data_df

Unnamed: 0_level_0,epss,percentile,date
cve,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CVE-2022-27225,0.00885,0.24688,2022-05-10
CVE-2022-27223,0.0089,0.27066,2022-05-10
CVE-2022-27218,0.00885,0.24688,2022-05-10


In [71]:
data_df,status = epss.get(date='2022-03-01')
data_df

Unnamed: 0_level_0,epss,percentile,date
cve,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CVE-2022-26332,0.008850000,0.111530000,2022-03-01
CVE-2022-26315,0.008850000,0.111530000,2022-03-01
CVE-2022-26181,0.008900000,0.127680000,2022-03-01
CVE-2022-26159,0.014120000,0.507330000,2022-03-01
CVE-2022-26158,0.008850000,0.111530000,2022-03-01
...,...,...,...
CVE-2022-25206,0.008850000,0.111530000,2022-03-01
CVE-2022-25205,0.008850000,0.111530000,2022-03-01
CVE-2022-25204,0.008850000,0.111530000,2022-03-01
CVE-2022-25203,0.008850000,0.111530000,2022-03-01


In [72]:
data_df,status = epss.get(cve=cve_list,scope='time-series')
data_df

Unnamed: 0_level_0,epss,percentile,date,0
cve,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CVE-2022-27225,0.008850000,0.246670000,2022-05-09,
CVE-2022-27225,0.008850000,0.246670000,2022-05-08,
CVE-2022-27225,0.008850000,0.246640000,2022-05-07,
CVE-2022-27225,0.008850000,0.246590000,2022-05-06,
CVE-2022-27225,0.008850000,0.246240000,2022-05-05,
...,...,...,...,...
CVE-2022-27218,0.008850000,0.243270000,2022-04-10,
,,,,CVE-2022-27218
,,,,0.008850000
,,,,0.246880000


In [57]:
data_df,status = epss.get(cve='CVE-2022-27225',scope='time-series')
data_df.head(4)

  tmp_df = tmp_df.append(ts.drop(labels=['time-series']), ignore_index=True)


Unnamed: 0_level_0,epss,percentile,date
cve,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CVE-2022-27225,0.00885,0.24667,2022-05-09
CVE-2022-27225,0.00885,0.24667,2022-05-08
CVE-2022-27225,0.00885,0.24664,2022-05-07
CVE-2022-27225,0.00885,0.24659,2022-05-06


In [56]:
all_df,status = epss.get_all()
assert len(all_df)>1000
assert all_df.columns.tolist()== ['epss', 'percentile', 'date']
all_df.head(4)

cve,epss,percentile,date
CVE-2007-2447,0.79403,0.9943,2022-05-11
CVE-2022-23852,0.123,0.94975,2022-05-11
CVE-2022-1388,0.23579,0.96506,2022-05-11
CVE-2020-25540,0.73957,0.99229,2022-05-11
