In [19]:
import os
import eikon as ek
import numpy as np
import pickle
import pandas as pd
import cufflinks as cf
import configparser as cp

## Connecting to Eikon Data API

This code sets the app_id to connect to the Eikon Data API Proxy which needs to be running locally. It requires the previously created text file eikon.cfg to be in the current working directory.

In [20]:
cfg = cp.ConfigParser()
cfg.read('eikon.cfg')  # adjust for different file location

['eikon.cfg']

In [21]:
ek.set_app_key(cfg['eikon']['app_id']) #set_app_id function being deprecated

****

## Get a List of RICs

In [5]:
# temporarily, we download data for all RICs we have
# late on, we will remove some of those RICs
# for that purpose, we need to read RICs from three csv files:
# that is 1.banktupt_listv2.csv and 2.*_list.csv files

# set the path to xlsx file
# path_xlsx = 'D:\\studyproject\\bankruptcy\\data\\Eikon\\' + \
#       'Identifiers_Mapping\\1.CUSIP-to-ISIN\\' # for win
path_xlsx = '/Users/user/Documents/Bankruptcy/bankruptcy/data/Eikon/' + \
       'Identifiers_Mapping/1.CUSIP-to-ISIN/' # for mac

# set the path to csv files
# path_csv = 'D:\\studyproject\\bankruptcy\\data\\Eikon\\' + \
#       'Identifiers_Mapping\\2.ISIN-to-RIC\\' # for win
path_csv = '/Users/user/Documents/Bankruptcy/bankruptcy/data/Eikon/' + \
       'Identifiers_Mapping/2.ISIN-to-RIC/' # for mac



# read xlsx bankrupt file, downloaded from Google Drive
# after being manually improved
bankrupt_m = pd.read_excel(path_xlsx + '1.bankrupt_list_v2.xlsx', sheet_name='1.bankrupt_list',
                           dtype=object)

# read final csv files
bankrupt = pd.read_csv(path_csv + '2.bankrupt_list.csv', dtype=object)
healthy = pd.read_csv(path_csv + '2.healthy_list.csv', dtype=object)

print(bankrupt_m.count())
bankrupt_m.head()

Identifier            112
Company               112
Data Deletion Date    112
Deletion Reason       112
Ticker                112
CUSIP                 112
CIK                   100
ISIN                  112
ISINc                 112
RIC                   101
RICc                  112
Country               112
dtype: int64


Unnamed: 0,Identifier,Company,Data Deletion Date,Deletion Reason,Ticker,CUSIP,CIK,ISIN,ISINc,RIC,RICc,Country
0,1367,Amber Resources Company of C,08/31/2012,2.0,3AMBE,023184203,276750,US0231842032,.,AMBEQ.OB^H12,.,US
1,2033,Fairchild Corp. (The),11/01/2011,2.0,FCHDQ,303698104,9779,US3036981047,.,FCHDQ.PK^K11,.,US
2,4049,Constar International Inc,06/01/2011,2.0,CNSTQ,21036U206,29806,US21036U2069,.,CNST.OQ^A11,.,US
3,4352,Energy Conversion Devices In,09/04/2012,2.0,ENERQ,292659109,32878,US2926591098,.,ENERQ.PK^I12,.,US
4,4768,Fleetwood Enterprises Inc.,08/23/2010,2.0,FLTWQ,339099103,314132,US3390991038,.,FLTWQ.PK^H10,.,US


In [6]:
print(bankrupt.count())
bankrupt.head()

Company               105
Data Deletion Date    105
Deletion Reason       105
Ticker                105
CUSIP                 105
CIK                    93
ISIN                  105
ISINc                 105
RIC                    57
RIC_ek                  6
RICc                  105
Country               105
dtype: int64


Unnamed: 0,Company,Data Deletion Date,Deletion Reason,Ticker,CUSIP,CIK,ISIN,ISINc,RIC,RIC_ek,RICc,Country
0,Amber Resources Company of C,08/31/2012,2.0,3AMBE,023184203,276750,US0231842032,0,,,0,US
1,Fairchild Corp. (The),11/01/2011,2.0,FCHDQ,303698104,9779,US3036981047,0,FCHDQ.PK^K11,,1,US
2,Constar International Inc,06/01/2011,2.0,CNSTQ,21036U206,29806,US21036U2069,0,,,0,US
3,Energy Conversion Devices In,09/04/2012,2.0,ENERQ,292659109,32878,US2926591098,0,ENERQ.PK^I12,,1,US
4,Fleetwood Enterprises Inc.,08/23/2010,2.0,FLTWQ,339099103,314132,US3390991038,0,FLTWQ.PK^H10,,1,US


In [7]:
print(healthy.count())
healthy.head()

Company               19158
Data Deletion Date    19158
Deletion Reason        7780
Ticker                19154
CUSIP                 19158
CIK                   13462
ISIN                  19158
ISINc                 19158
RIC                    9945
RIC_ek                 8551
RICc                  19158
Country               18573
dtype: int64


Unnamed: 0,Company,Data Deletion Date,Deletion Reason,Ticker,CUSIP,CIK,ISIN,ISINc,RIC,RIC_ek,RICc,Country
0,AAR Corp,.,,AIR,000361105,1750,US0003611052,0,AIR,AIR,0,US
1,ADC Telecommunications Inc.,12/10/2010,1.0,ADCT,000886309,61478,US0008863096,0,,,0,US
2,AFP Imaging Corp,09/15/2014,7.0,IWKS,001058205,319126,US0010582056,0,IWKS.PK,IWKS.PK,1,US
3,Alpharma Inc.,12/31/2008,1.0,ALO.2,020813101,730469,US0208131013,0,,,0,US
4,American Airlines Group Inc,.,,AAL,02376R102,6201,US02376R1023,0,AAL.O - AAL.Z,AAL.O,3,US


In [8]:
# read a dataseries and convert it to a list
def series_to_list(ds):
    '''
    this function takes a dataseries, breaks the double-value
    rows, and returns a list.
    '''
    # drop NANs
    ds.dropna(inplace=True)
    
    the_list = []
    # iterate over series rows
    for index, value in ds.items():
        if ' - ' in value:
            the_list.append(value[:value.find(' - ')])
            the_list.append(value[value.find(' - ') + 3:])
        else:
            the_list.append(value)
    
    return the_list

In [9]:
# actually extract, and merge the lists together
bankrupt_RIC = list(set(series_to_list(bankrupt_m.RIC) + series_to_list(bankrupt.RIC)
                        + series_to_list(bankrupt.RIC_ek)))

healthy_RIC = list(set(series_to_list(healthy.RIC) + series_to_list(healthy.RIC_ek)))

print(len(bankrupt_RIC), 'RICs for bankrupt companies, and')
print(len(healthy_RIC), 'RICs for healthy companies.')

101 RICs for bankrupt companies, and
10184 RICs for healthy companies.


## And Finally, Download the Data

In [10]:
# create folders for saving data
if not os.path.exists('data_bankrupt_RIC'):
    os.makedirs('data_bankrupt_RIC')
    
if not os.path.exists('data_healthy_RIC'):
    os.makedirs('data_healthy_RIC')

In [11]:
# Eikon does not work with large lists, so probably we
# need to break the healthy_RIC list
#healthy_RIC_1 = healthy_RIC[]
healthy_RIC_list = [healthy_RIC[i:i + 2000] for i in range(0, len(healthy_RIC), 2000)]


### Historical Data

In [None]:
# define a function for getting timeseries
def timeseries(ric_list, folder):
    '''
    the function gets a list of RICs and name of a folder,
    and saves a dataframe of historical data for each valid
    RIC in the list, in that folder.
    '''
    for ric in ric_list:
        try:

            df = ek.get_timeseries(ric,  # the RIC for the company
                                 fields='*',  # all fields
                                 start_date='2000-01-01',  # start date
                                 end_date='2019-08-19')  # end date
            df.to_csv(folder + '/{}.csv'.format(ric))
            
        except:
            pass

        

# get timeseries for bankrupt companies
timeseries(bankrupt_RIC, 'data_bankrupt_RIC')


In [None]:
# and healthy ones
# ----------------------------------------------------
# for eikon data-download limitation I write the code
# for each list in a separate line, and you know what,
# in a separate cell, as well. This way we could resume
# downloading after 12 hours without any unnecessary
# confusion.
# ----------------------------------------------------

# 1 out of 6 - undone
timeseries(healthy_RIC_list[0], 'data_healthy_RIC')


In [None]:
# 2 out of 6 - undone
timeseries(healthy_RIC_list[1], 'data_healthy_RIC')


In [None]:
# 3 out of 6 - undone
timeseries(healthy_RIC_list[2], 'data_healthy_RIC')


In [None]:
# 4 out of 6 - undone
timeseries(healthy_RIC_list[3], 'data_healthy_RIC')


In [None]:
# 5 out of 6 - undone
timeseries(healthy_RIC_list[4], 'data_healthy_RIC')


In [None]:
# 6 out of 6 - undone
timeseries(healthy_RIC_list[5], 'data_healthy_RIC')


### Financial Factors

In [12]:
# add all financial factors we need to replicate *the paper*

fields = ['TR.TotalCurrentAssets', # Current Assets
          'TR.TotalCurrLiabilities', # Current Liabilities
          'TR.AccountsPayableCFStmt', # Accounts Payable
          'TR.NetSales', # 
          'TR.CashAndSTInvestments',
          'TR.TotalAssetsReported',
          'TR.CashAndSTInvestments',
          'TR.MarketCapDS',
          'TR.TotalCurrLiabilities',
          'TR.Cash',
          'TR.TotalAssetsReported',
          'TR.Cash',
          'TR.TotalCurrLiabilities',
          'TR.BGS.BusEarningsBeforeInterestandTaxesEbit',
          'TR.BGS.GeoEarningsBeforeInterestandTaxesEbit',
          'TR.DepreciationAmort',
          'TR.TotalAssetsReported',
          'TR.BGS.BusEarningsBeforeInterestandTaxesEbit',
          'TR.BGS.GeoEarningsBeforeInterestandTaxesEbit',
          'TR.TotalAssetsReported',
          'TR.BGS.BusEarningsBeforeInterestandTaxesEbit',
          'TR.BGS.GeoEarningsBeforeInterestandTaxesEbit',
          'TR.NetSales',
          'TR.TotalDebtOutstanding',
          'TR.NetDebt',
          'TR.TotalAssetsReported',
          'TR.PCTotInvt5YrCompAnnGrowthRate',
          'TR.Inventories',
          'TR.Inventories',
          'TR.NetSales',
          'TR.TotalCurrLiabilities',
          'TR.Cash',
          'TR.TotalAssetsReported',
          'TR.TotalCurrLiabilities',
          'TR.TotalAssetsReported',
          'TR.TotalCurrLiabilities',
          'TR.TotalLiabilities',
          'TR.TotalCurrLiabilities',
          'TR.NetSales',
          'TR.TotalLiabilities',
          'TR.TotalAssetsReported',
          'TR.TotalLiabilities',
          'TR.TotalLiabilities',
          'TR.TangibleBookValueRptd',
          'TR.TangibleBookValueBnk',
          'TR.TangibleBookValueIns',
          'TR.TangibleBookValueUtil',
          'TR.TangibleBVPS',
          'TR.NetIncomeStartingLine',
          'TR.TotalAssetsReported',
          'TR.NetIncomeStartingLine',
          'TR.NetIncomeStartingLine',
          'TR.NetSales',
          'TR.OperatingIncome',
          'TR.TotalAssetsReported',
          'TR.STOCKPRICE',
          'TR.FiPrice',
          'TR.TotalCurrLiabilities',
          'TR.RetainedEarnings',
          'TR.TotalAssetsReported',
          'TR.MARKETCAPITALISATION',
          'MKT_CAP',
          'TR.TotalEquity',
          'TR.TotalAssetsReported',
          'TR.FiPricingVolatility',
          'TR.PCWrkgCapPrdOverPrdAvg(Period=FY0)',
          'TR.PCWrkgCapPrdOverPrdPctChg(Period=FY0)', 
          'TR.PZPHasFiledForBankruptcy', # bankruptcy check (not working)
          'TR.PZPBankruptcyFilingDate', # bankruptcy check (not working)
          'TR.InstrumentIsActive', # bankruptcy check (not reliable)
          'TR.IsDelistedQuote']  # bankruptcy check (not reliable)

# remove duplicates
fields = list(set(fields))

In [22]:
# download the financial factors

# for the bankrupt companies
# get the data
bankrupt_data, err = ek.get_data(bankrupt_RIC, fields=fields,
                                 parameters={'Frq':'Q',
                                             'SDate':'2000-01-01',
                                             'EDate':'2019-06-30'})
# and save the dataframe as a csv file
bankrupt_data.to_csv('data_bankrupt_RIC/bankrupt_data.csv')


"# for the healthy companies\n# get the data\nhealthy_data_list = []\n\nfor ric_list in healthy_RIC_list:\n    healthy_df, err = ek.get_data(ric_list, fields=fields,\n                                  parameters={'Period':'Period=FY2000;FY2001;FY2002;                                                                FY2003;FY2004;FY2005;                                                                FY2006;FY2007;FY2008;                                                                FY2009;FY2010;FY2011;                                                                FY2012;FY2013;FY2014;                                                                FY2015;FY2016;FY2017;                                                                FY2018;FY2019', \n                                              'Frq':'M',\n                                              'SDate':'2000-01-01',\n                                              'EDate':'0'})\n    # add the dataframe to the list\n    healthy_data_

In [None]:
# again for the healthy list we need to arrange
# download precedure in different cells

# set a list to append the resulting dataframe
healthy_data_list = []

# get data

# 1 out of 6
healthy_df, err = ek.get_data(healthy_RIC_list[0], fields=fields,
                              parameters={'Frq':'Q',
                                          'SDate':'2000-01-01',
                                          'EDate':'2019-06-30'})
healthy_data_list.append(healthy_df)

In [None]:

# 2 out of 6
healthy_df, err = ek.get_data(healthy_RIC_list[1], fields=fields,
                              parameters={'Frq':'Q',
                                          'SDate':'2000-01-01',
                                          'EDate':'2019-06-30'})
healthy_data_list.append(healthy_df)


In [None]:

# 3 out of 6
healthy_df, err = ek.get_data(healthy_RIC_list[2], fields=fields,
                              parameters={'Frq':'Q',
                                          'SDate':'2000-01-01',
                                          'EDate':'2019-06-30'})
healthy_data_list.append(healthy_df)



In [None]:

# 4 out of 6
healthy_df, err = ek.get_data(healthy_RIC_list[3], fields=fields,
                              parameters={'Frq':'Q',
                                          'SDate':'2000-01-01',
                                          'EDate':'2019-06-30'})
healthy_data_list.append(healthy_df)



In [None]:

# 5 out of 6
healthy_df, err = ek.get_data(healthy_RIC_list[4], fields=fields,
                              parameters={'Frq':'Q',
                                          'SDate':'2000-01-01',
                                          'EDate':'2019-06-30'})
healthy_data_list.append(healthy_df)



In [None]:

# 6 out of 6
healthy_df, err = ek.get_data(healthy_RIC_list[5], fields=fields,
                              parameters={'Frq':'Q',
                                          'SDate':'2000-01-01',
                                          'EDate':'2019-06-30'})
healthy_data_list.append(healthy_df)



In [None]:

# concatenate the 6 dataframes in the list
healthy_data = pd.concat([df for df in healthy_data_list])
    
# and save the merged dataframe as a csv file
healthy_data.to_csv('data_healthy_RIC/healthy_data.csv')

In [25]:

# test the resulting dataframe for bankrupt company
print(len(bankrupt_data))
bankrupt_data.head()


7339


Unnamed: 0,Instrument,Tangible Book Value Per Share,Accounts Payable (CF),Net Debt Incl. Pref.Stock & Min.Interest,Depreciation And Amortization,Tangible Book Value - Banks,Pricing Volatility,Tangible Book Value - Reported,Inventories (CF),Tangible Book Value - Insurance,...,Geographic Earnings Before Interest and Taxes (EBIT),Total Liabilities,Retained Earnings (Accumulated Deficit),"Total Inventory, 5 Year Compounded Annual Growth Rate (Pvt)",Tangible Book Value - Utility,Cash and Short Term Investments,Cash,Stock Price,Total Current Assets,Total Current Liabilities
0,RONCQ.PK^B12,0.53783,783000.0,8132000.0,582000.0,2420000.0,,2420000.0,-1350000.0,2420000.0,...,,14472000.0,-27636000.0,,2420000.0,187000.0,,,7572000.0,10740000.0
1,RONCQ.PK^B12,0.53783,783000.0,8132000.0,582000.0,2420000.0,,2420000.0,-1350000.0,2420000.0,...,,14472000.0,-27636000.0,,2420000.0,187000.0,,,7572000.0,10740000.0
2,RONCQ.PK^B12,0.53783,783000.0,8132000.0,582000.0,2420000.0,,2420000.0,-1350000.0,2420000.0,...,,14472000.0,-27636000.0,,2420000.0,187000.0,,,7572000.0,10740000.0
3,RONCQ.PK^B12,0.53783,783000.0,8132000.0,582000.0,2420000.0,,2420000.0,-1350000.0,2420000.0,...,,14472000.0,-27636000.0,,2420000.0,187000.0,,,7572000.0,10740000.0
4,RONCQ.PK^B12,0.59602,-557000.0,5891000.0,683000.0,2898000.0,,2898000.0,1918000.0,2898000.0,...,,12294000.0,-27202000.0,,2898000.0,81000.0,,,6466000.0,7613000.0


In [18]:
bankrupt_data

Unnamed: 0,Instrument,Tangible Book Value Per Share,Accounts Payable (CF),Net Debt Incl. Pref.Stock & Min.Interest,Depreciation And Amortization,Tangible Book Value - Banks,Pricing Volatility,Tangible Book Value - Reported,Inventories (CF),Tangible Book Value - Insurance,...,Geographic Earnings Before Interest and Taxes (EBIT),Total Liabilities,Retained Earnings (Accumulated Deficit),"Total Inventory, 5 Year Compounded Annual Growth Rate (Pvt)",Tangible Book Value - Utility,Cash and Short Term Investments,Cash,Stock Price,Total Current Assets,Total Current Liabilities
0,RONCQ.PK^B12,0.53783,783000.0,8.132000e+06,5.820000e+05,2.420000e+06,,2.420000e+06,-1350000.0,2.420000e+06,...,,1.447200e+07,-2.763600e+07,,2.420000e+06,1.870000e+05,,,7.572000e+06,1.074000e+07
1,RONCQ.PK^B12,0.53783,783000.0,8.132000e+06,5.820000e+05,2.420000e+06,,2.420000e+06,-1350000.0,2.420000e+06,...,,1.447200e+07,-2.763600e+07,,2.420000e+06,1.870000e+05,,,7.572000e+06,1.074000e+07
2,RONCQ.PK^B12,0.53783,783000.0,8.132000e+06,5.820000e+05,2.420000e+06,,2.420000e+06,-1350000.0,2.420000e+06,...,,1.447200e+07,-2.763600e+07,,2.420000e+06,1.870000e+05,,,7.572000e+06,1.074000e+07
3,RONCQ.PK^B12,0.53783,783000.0,8.132000e+06,5.820000e+05,2.420000e+06,,2.420000e+06,-1350000.0,2.420000e+06,...,,1.447200e+07,-2.763600e+07,,2.420000e+06,1.870000e+05,,,7.572000e+06,1.074000e+07
4,RONCQ.PK^B12,0.59602,-557000.0,5.891000e+06,6.830000e+05,2.898000e+06,,2.898000e+06,1918000.0,2.898000e+06,...,,1.229400e+07,-2.720200e+07,,2.898000e+06,8.100000e+04,,,6.466000e+06,7.613000e+06
5,RONCQ.PK^B12,0.59602,-557000.0,5.891000e+06,6.830000e+05,2.898000e+06,,2.898000e+06,1918000.0,2.898000e+06,...,,1.229400e+07,-2.720200e+07,,2.898000e+06,8.100000e+04,,,6.466000e+06,7.613000e+06
6,RONCQ.PK^B12,0.59602,-557000.0,5.891000e+06,6.830000e+05,2.898000e+06,,2.898000e+06,1918000.0,2.898000e+06,...,,1.229400e+07,-2.720200e+07,,2.898000e+06,8.100000e+04,,,6.466000e+06,7.613000e+06
7,RONCQ.PK^B12,0.59602,-557000.0,5.891000e+06,6.830000e+05,2.898000e+06,,2.898000e+06,1918000.0,2.898000e+06,...,,1.229400e+07,-2.720200e+07,,2.898000e+06,8.100000e+04,,,6.466000e+06,7.613000e+06
8,RONCQ.PK^B12,0.59067,-574000.0,3.276000e+06,7.260000e+05,2.872000e+06,,2.872000e+06,1762000.0,2.872000e+06,...,,9.755000e+06,-2.710100e+07,,2.872000e+06,6.890000e+05,,,5.528000e+06,5.295000e+06
9,RONCQ.PK^B12,0.59067,-574000.0,3.276000e+06,7.260000e+05,2.872000e+06,,2.872000e+06,1762000.0,2.872000e+06,...,,9.755000e+06,-2.710100e+07,,2.872000e+06,6.890000e+05,,,5.528000e+06,5.295000e+06


****

In [None]:
# test the resulting dataframe for healthy company
print(len(healthy_data))
healthy_data.head()


In [None]:
healthy_data