In [252]:
import requests
import pandas as pd
from datetime import datetime
import logging

class Status(object):
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

    def __str__(self):
        return str(self.__dict__)

class EPSS():

  def __init__(self,api_url='https://api.first.org/data/v1/'):
    self.api_url = api_url
    self.raw_url = "https://epss.cyentia.com/"

  def validate_date(self,date_text):
    try:
        datetime.strptime(date_text, '%Y-%m-%d')
    except ValueError:
        raise ValueError("Incorrect data format, should be YYYY-MM-DD")

  def get_all(self,date:str=None)-> pd.DataFrame:
    if date is None:
      day_url = self.raw_url + 'epss_scores-current.csv.gz'
      date = datetime.today().strftime('%Y-%m-%d')
    elif type(date) is str:
        self.validate_date(date)
        day_url = self.raw_url + 'epss_scores-{date}.csv.gz'
    else:
      raise Exception('Date is invalid')
      
    epss_df = pd.read_csv(day_url,compression='gzip',sep=',')
    if len(epss_df) > 0 :
      logging.info('Done with total rows = %d' % len(epss_df))
      header = epss_df.iloc[0]
      if len(header)==2:
        version = header.index[0].split(':')[1]
        score_date = ''.join(header.index[1].split(':')[1:])
        epss_df.columns = epss_df.iloc[0]
        num_df = epss_df.iloc[1:].copy()
        del epss_df
        num_df['epss']=num_df['epss'].astype('float')
        num_df['percentile']=num_df['percentile'].astype('float')
        num_df['date']=date
        status = Status(version=version,score_date=score_date)
        return num_df,status
      else:
        raise Exception('EPSS format is malformed')

  def get(self,cve=None,envelope:bool=True,pretty:bool=False,offset:int=None,order:bool=True,
          date:str=None,scope:str='public',epss_gt:float=None,epss_lt:float=None,
          percentile_gt:float=None,percentile_lt:float=None,q:str=None)-> pd.DataFrame:

    url = self.api_url + 'epss'

    params = {}
    if type(cve) is list:
        params['cve'] = ','.join(cve)
    if type(cve) is str:
        params['cve'] = cve

    if type(date) is str:
        self.validate_date(date)
        params['date'] = date
    
    params['envelope']=envelope
    params['pretty']=pretty
    params['offset']=offset
    params['scope']=scope
    params['epss-gt']=epss_gt
    params['epss-lt']=epss_lt
    params['percentile-gt']=percentile_gt
    params['percentile-lt']=percentile_lt
    params['q']=q
    if order == False: params['order']='!epss'

    r = requests.get(url,params)

    if r.status_code == 200:
        data_status = r.json()

        if params['scope']=='public':
          df = pd.json_normalize(data_status, 'data')
          df.set_index('cve',inplace=True)
        elif params['scope']=='time-series':
          # iterate through each vulnerability
          df = pd.json_normalize(data_status, 'data')
          tmp = []
          for idx,ts in df.iterrows():
            tmp_df = pd.DataFrame(ts['time-series'])
            tmp_df['cve']=ts['cve']
            tmp_df = tmp_df.append(ts.drop(labels=['time-series']), ignore_index=True)
            tmp.append(tmp_df)
          df = pd.concat(tmp)
          df.set_index('cve',inplace=True)
        else:
          raise Exception('Scope not supported')

        del data_status['data']
        if envelope is True:
          status = Status(**data_status)
        else:
          status = None
        return df,status
    else:
      raise Exception(f'HTTP errror {r.status_code}')



In [260]:
epss = EPSS()
data_df,status = epss.get()
data_df.columns

Index(['epss', 'percentile', 'date'], dtype='object')

In [261]:
data_df.head(2)

Unnamed: 0_level_0,epss,percentile,date
cve,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CVE-2022-30292,0.00885,0.24624,2022-05-05
CVE-2022-30288,0.0089,0.27007,2022-05-05


In [262]:
all_df,status = epss.get_all()
all_df.columns

Index(['epss', 'percentile', 'date'], dtype='object', name='cve')

In [263]:
all_df.head(4)

cve,epss,percentile,date
CVE-2022-25236,0.20148,0.96152,2022-05-06
CVE-2022-1388,0.01404,0.70195,2022-05-06
CVE-2022-1040,0.15362,0.95652,2022-05-06
CVE-2022-22954,0.93243,0.99926,2022-05-06


In [225]:
data_df,status = epss.get(order=False)
data_df.head(2)

Unnamed: 0,cve,epss,percentile,date
0,CVE-2021-40438,0.97224,1.0,2022-05-05
1,CVE-2017-12635,0.97109,0.99999,2022-05-05


In [226]:
data_df,status = epss.get(epss_lt=0.0088)
data_df.shape

(1, 4)

In [227]:
data_df,status = epss.get(epss_gt=0.95)
data_df.shape

(70, 4)

In [232]:
data_df,status = epss.get(percentile_gt=0.98)
data_df.shape

(100, 4)

In [231]:
data_df,status = epss.get(percentile_lt=0.1)
data_df.shape

(1, 4)

In [190]:
data_df,status = epss.get(cve='CVE-2022-27225',envelope=False)
data_df

Unnamed: 0,cve,epss,percentile,date
0,CVE-2022-27225,0.00885,0.24624,2022-05-05


In [191]:
status

In [192]:
cve_list = ['CVE-2022-27225','CVE-2022-27223','CVE-2022-27218']
data_df,status = epss.get(cve=cve_list)
data_df

Unnamed: 0,cve,epss,percentile,date
0,CVE-2022-27225,0.00885,0.24624,2022-05-05
1,CVE-2022-27223,0.0089,0.27007,2022-05-05
2,CVE-2022-27218,0.00885,0.24624,2022-05-05


In [193]:
data_df,status = epss.get(date='2022-03-01')
data_df

Unnamed: 0,cve,epss,percentile,date
0,CVE-2022-26332,0.008850000,0.111530000,2022-03-01
1,CVE-2022-26315,0.008850000,0.111530000,2022-03-01
2,CVE-2022-26181,0.008900000,0.127680000,2022-03-01
3,CVE-2022-26159,0.014120000,0.507330000,2022-03-01
4,CVE-2022-26158,0.008850000,0.111530000,2022-03-01
...,...,...,...,...
95,CVE-2022-25206,0.008850000,0.111530000,2022-03-01
96,CVE-2022-25205,0.008850000,0.111530000,2022-03-01
97,CVE-2022-25204,0.008850000,0.111530000,2022-03-01
98,CVE-2022-25203,0.008850000,0.111530000,2022-03-01


In [194]:
data_df,status = epss.get(cve=cve_list,scope='time-series')
data_df

Unnamed: 0,epss,percentile,date,cve
0,0.008850000,0.246150000,2022-05-04,CVE-2022-27225
1,0.008850000,0.246040000,2022-05-03,CVE-2022-27225
2,0.008850000,0.245850000,2022-05-02,CVE-2022-27225
3,0.008850000,0.245810000,2022-05-01,CVE-2022-27225
4,0.008850000,0.245810000,2022-04-30,CVE-2022-27225
...,...,...,...,...
26,0.008850000,0.242790000,2022-04-08,CVE-2022-27218
27,0.008850000,0.242790000,2022-04-07,CVE-2022-27218
28,0.008850000,0.242790000,2022-04-06,CVE-2022-27218
29,0.008850000,0.242810000,2022-04-05,CVE-2022-27218


In [195]:
data_df,status = epss.get(cve='CVE-2022-27225',scope='time-series')
data_df

Unnamed: 0,epss,percentile,date,cve
0,0.00885,0.24615,2022-05-04,CVE-2022-27225
1,0.00885,0.24604,2022-05-03,CVE-2022-27225
2,0.00885,0.24585,2022-05-02,CVE-2022-27225
3,0.00885,0.24581,2022-05-01,CVE-2022-27225
4,0.00885,0.24581,2022-04-30,CVE-2022-27225
5,0.00885,0.24569,2022-04-29,CVE-2022-27225
6,0.00885,0.24555,2022-04-28,CVE-2022-27225
7,0.00885,0.24546,2022-04-27,CVE-2022-27225
8,0.00885,0.2454,2022-04-26,CVE-2022-27225
9,0.00885,0.24532,2022-04-25,CVE-2022-27225
