In [14]:
import eikon as ek
import pandas as pd

from utilities import columns, filter_dataset

# CONSTANTS

In [5]:
with open('key.secret') as file:
    KEY = file.read()

In [6]:
FILENAME = 'green_bonds_IsPublic.csv'

In [7]:
try:
    ek.set_app_key(KEY)
    bonds, err = ek.get_data(['CN151819SH='], 'TR.FirstAnnounceDate')
    if bonds['First Announcement Date'][0] != '2019-07-11':
        raise ValueError
except ek.EikonError:
    raise ValueError('Please check key.secret, then run Eikon Desktop or Eikon API Proxy')

# CHECK DB

In [9]:
green_bonds = pd.read_csv(FILENAME, parse_dates=['Issue Date'])

In [13]:
col_set = set(green_bonds.columns)

if columns.col_names != col_set:
    if len(col_set) < len(columns.col_names):
        raise TypeError(
            f"{columns.col_names - set(green_bonds.columns)} ARE MISSING COLUMNS!")

    elif len(col_set) > len(columns.col_names):
        raise TypeError(f"{set(green_bonds.columns) - columns.col_names} ARE ADDITIONAL COLUMNS!\nPlease ADD THEM"
                        " to columns.py OR REMOVE THEM")

    else:
        raise TypeError(
            f"{columns.col_names - set(green_bonds.columns)} ARE MISSING COLUMNS!")

TypeError: {'IsPublic'} ARE ADDITIONAL COLUMNS!
Please ADD THEM to columns.py OR REMOVE THEM

# FILTER DB

In [15]:
green_bonds = filter_dataset.filter_dataset(green_bonds)

Excluded 0 rows over 1871. Green bonds before 2009 must be due to errors and 2020 might pollute data due to COVID and year not finished yet.


# GET FIRST ANNOUNCEMENT DATE

In [17]:
bonds, err = ek.get_data(green_bonds.ISIN.dropna().to_list(), 'TR.FirstAnnounceDate')

In [18]:
bonds

Unnamed: 0,Instrument,First Announcement Date
0,XS1890845875,2019-02-05
1,CND10000C3L5,2016-09-01
2,XS2082433736,2019-11-26
3,CND100017CJ8,2017-09-08
4,CND10001WQ91,2019-01-02
...,...,...
1849,XS1207105161,2015-03-20
1850,US83417KFC71,2015-11-30
1851,XS1512929842,2016-10-28
1852,XS1566937154,2017-07-26


In [19]:
isin_to_date = dict(zip(bonds['Instrument'], bonds['First Announcement Date']))

In [20]:
isin_to_date

{'XS1890845875': '2019-02-05',
 'CND10000C3L5': '2016-09-01',
 'XS2082433736': '2019-11-26',
 'CND100017CJ8': '2017-09-08',
 'CND10001WQ91': '2019-01-02',
 'XS2079413527': '2019-11-18',
 'FR0013310505': '2018-01-10',
 'CND100017C17': '2016-10-26',
 'XS1721244371': '2017-11-15',
 'XS1591694481': '2017-03-29',
 'XS1797138960': '2018-03-19',
 'CND10000C3M3': '2016-05-23',
 'INE028A08083': '2016-12-01',
 'CND1000163S9': '2018-04-04',
 'XS1908374322': '2018-11-28',
 'FR0013398229': '2019-01-17',
 'CND100017CK6': '2018-05-24',
 'CND10000J7Q8': '2017-09-12',
 'HK0000525334': '2019-09-09',
 'US83417KDY10': '2015-09-14',
 'XS0970680541': '2013-09-05',
 'XS1882030510': '2018-09-18',
 'XS1687735107': '2017-09-26',
 'US48245ACL35': '2017-09-25',
 'XS1566941933': '2017-09-01',
 'US83417KED63': '2015-09-14',
 'MYBVG1702910': '2017-10-04',
 'XS1691909334': '2017-09-28',
 'XS1692890343': '2017-09-28',
 'US83417KEJ34': '2015-10-16',
 'US865622BY94': '2015-10-14',
 'XS1303791336': '2015-10-02',
 'SE0007

In [21]:
green_bonds['First Announcement Date'] = green_bonds.ISIN.map(isin_to_date)

In [22]:
sum(green_bonds['First Announcement Date'].isna())

17

In [None]:
#missing_RICS = [ric for ric in missing_RICS if isinstance(ric, str)]
bonds, err = ek.get_data(green_bonds[green_bonds['First Announcement Date'].isna(
)]['Preferred RIC'].dropna().to_list(), 'TR.FirstAnnounceDate')

In [27]:
rics_to_dates = dict(
    zip(bonds['Instrument'], bonds['First Announcement Date']))

In [28]:
rics_to_dates

{'CN114265SZ=': '2017-12-01',
 'CN143952SH=': '2018-03-07',
 'CN150502SH=': '2018-06-21',
 'CN150701SH=': '2018-11-07',
 'CN155057SH=': '2018-11-29',
 'CN155956SH=': '2019-02-27',
 'CN143518SH=': '2018-03-14',
 'CN143745SH=': '2018-07-31',
 'CN143525SH=': '2018-08-15',
 'CN150646SH=': '2018-08-21',
 'CN143822SH=': '2018-09-17',
 'CN114375SZ=': '2018-09-20',
 'CN114415SZ=': '2018-12-19',
 'CN151739SH=': '2019-06-27',
 'CN151819SH=': '2019-07-11',
 'CN163044SH=': '2019-11-28'}

In [30]:
sum(green_bonds['First Announcement Date'].isna())

1

In [29]:
green_bonds['First Announcement Date'] = green_bonds['First Announcement Date'].fillna(
    green_bonds['Preferred RIC'].map(rics_to_dates))

In [31]:
green_bonds['First Announcement Date']

0       2019-02-05
1       2016-09-01
2       2019-11-26
3       2017-09-08
4       2019-01-02
           ...    
1866    2015-03-20
1867    2015-11-30
1868    2016-10-28
1869    2017-07-26
1870    2015-05-18
Name: First Announcement Date, Length: 1871, dtype: object

In [39]:
green_bonds.IsPublic = green_bonds.IsPublic.astype(bool)

In [38]:
green_bonds.to_csv('cleaned_green_bonds.csv', index=False)