In [1]:
import os
import eikon as ek
import numpy as np
import pickle
import pandas as pd
import cufflinks as cf
import configparser as cp

In [None]:
cfg = cp.ConfigParser()
cfg.read('eikon.cfg')  # adjust for different file location

In [None]:
ek.set_app_key(cfg['eikon']['app_id']) #set_app_id function being deprecated

## Get a List of CIKs

In [2]:
# set the path to csv files
# path = 'D:\\studyproject\\bankruptcy\\data\\Compustat\\' # for win
path = '/Users/user/Documents/Bankruptcy/bankruptcy/data/Compustat/' # for mac

# read final csv files
bankrupt = pd.read_csv(path + 'list_bankrupt.csv', dtype=object, index_col=0)
healthy = pd.read_csv(path + 'list_healthy.csv', dtype=object, index_col=0)

bankrupt.head()

Unnamed: 0,Identifier,Company,Data Deletion Date,Deletion Reason,Ticker,CUSIP,CIK
0,1367,Amber Resources Company of C,08/31/2012,2.0,3AMBE,023184203,276750
1,2033,Fairchild Corp. (The),11/01/2011,2.0,FCHDQ,303698104,9779
2,4049,Constar International Inc,06/01/2011,2.0,CNSTQ,21036U206,29806
3,4352,Energy Conversion Devices In,09/04/2012,2.0,ENERQ,292659109,32878
4,4768,Fleetwood Enterprises Inc.,08/23/2010,2.0,FLTWQ,339099103,314132


In [3]:
healthy.head()

Unnamed: 0,Identifier,Company,Data Deletion Date,Deletion Reason,Ticker,CUSIP,CIK
0,1004,AAR Corp,.,,AIR,000361105,1750
1,1013,ADC Telecommunications Inc.,12/10/2010,1.0,ADCT,000886309,61478
2,1021,AFP Imaging Corp,09/15/2014,7.0,IWKS,001058205,319126
3,1034,Alpharma Inc.,12/31/2008,1.0,ALO.2,020813101,730469
4,1045,American Airlines Group Inc,.,,AAL,02376R102,6201


In [5]:
bankrupt_CIK = bankrupt.CIK.dropna().to_list()
healthy_CIK = healthy.CIK.dropna().to_list()

print(len(bankrupt_CIK), 'CIKs available for bankrupt companies, and')
print(len(healthy_CIK), 'CIKs available for healthy companies.')


100 CIKs available for bankrupt companies, and
14761 CIKs available for healthy companies.


## Download the Data

In [6]:
# create folders for saving data
if not os.path.exists('data_bankrupt_CIK'):
    os.makedirs('data_bankrupt_CIK')
    
if not os.path.exists('data_healthy_CIK'):
    os.makedirs('data_healthy_CIK')

In [7]:
# Eikon does not work with large lists, so we
# need to break the healthy_CIK list

healthy_CIK_list = [healthy_CIK[i:i + 2000] for i in range(0, len(healthy_CIK), 2000)]


### Historical Data

In [None]:
# define a function for getting timeseries
def timeseries(cik_list, folder):
    '''
    the function gets a list of CIKs and name of a folder,
    and saves a dataframe of historical data for each valid
    CIK in the list, in that folder.
    '''
    for cik in cik_list:
        try:

            df = ek.get_timeseries(cik,  # the CIK for the company
                                 fields='*',  # all fields
                                 start_date='2000-01-01',  # start date
                                 end_date='2019-08-19')  # end date
            df.to_csv(folder + '/{}.csv'.format(cik))
            
        except:
            pass

        

# get timeseries for bankrupt companies
timeseries(bankrupt_CIK, 'data_bankrupt_CIK')



In [None]:
# and healthy ones
# ----------------------------------------------------
# for eikon data-download limitation I write the code
# for each list in a separate line, and you know what,
# in a separate cell, as well. This way we could resume
# downloading after 12 hours without any unnecessary
# confusion.
# ----------------------------------------------------

# 1 out of 8 - undone
timeseries(healthy_CIK_list[0], 'data_healthy_CIK')


In [None]:
# 2 out of 8 - undone
timeseries(healthy_CIK_list[1], 'data_healthy_CIK')


In [None]:
# 3 out of 8 - undone
timeseries(healthy_CIK_list[2], 'data_healthy_CIK')


In [None]:
# 4 out of 8 - undone
timeseries(healthy_CIK_list[3], 'data_healthy_CIK')


In [None]:
# 5 out of 8 - undone
timeseries(healthy_CIK_list[4], 'data_healthy_CIK')


In [None]:
# 6 out of 8 - undone
timeseries(healthy_CIK_list[5], 'data_healthy_CIK')


In [None]:
# 7 out of 8 - undone
timeseries(healthy_CIK_list[6], 'data_healthy_CIK')


In [None]:
# 8 out of 8 - undone
timeseries(healthy_CIK_list[7], 'data_healthy_CIK')


### Financial Factors


In [None]:
# add all financial factors we need to replicate *the paper*

fields = ['TR.TotalCurrentAssets', # Current Assets
          'TR.TotalCurrLiabilities', # Current Liabilities
          'TR.AccountsPayableCFStmt', # Accounts Payable
          'TR.NetSales', # 
          'TR.CashAndSTInvestments',
          'TR.TotalAssetsReported',
          'TR.CashAndSTInvestments',
          'TR.MarketCapDS',
          'TR.TotalCurrLiabilities',
          'TR.Cash',
          'TR.TotalAssetsReported',
          'TR.Cash',
          'TR.TotalCurrLiabilities',
          'TR.BGS.BusEarningsBeforeInterestandTaxesEbit',
          'TR.BGS.GeoEarningsBeforeInterestandTaxesEbit',
          'TR.DepreciationAmort',
          'TR.TotalAssetsReported',
          'TR.BGS.BusEarningsBeforeInterestandTaxesEbit',
          'TR.BGS.GeoEarningsBeforeInterestandTaxesEbit',
          'TR.TotalAssetsReported',
          'TR.BGS.BusEarningsBeforeInterestandTaxesEbit',
          'TR.BGS.GeoEarningsBeforeInterestandTaxesEbit',
          'TR.NetSales',
          'TR.TotalDebtOutstanding',
          'TR.NetDebt',
          'TR.TotalAssetsReported',
          'TR.PCTotInvt5YrCompAnnGrowthRate',
          'TR.Inventories',
          'TR.Inventories',
          'TR.NetSales',
          'TR.TotalCurrLiabilities',
          'TR.Cash',
          'TR.TotalAssetsReported',
          'TR.TotalCurrLiabilities',
          'TR.TotalAssetsReported',
          'TR.TotalCurrLiabilities',
          'TR.TotalLiabilities',
          'TR.TotalCurrLiabilities',
          'TR.NetSales',
          'TR.TotalLiabilities',
          'TR.TotalAssetsReported',
          'TR.TotalLiabilities',
          'TR.TotalLiabilities',
          'TR.TangibleBookValueRptd',
          'TR.TangibleBookValueBnk',
          'TR.TangibleBookValueIns',
          'TR.TangibleBookValueUtil',
          'TR.TangibleBVPS',
          'TR.NetIncomeStartingLine',
          'TR.TotalAssetsReported',
          'TR.NetIncomeStartingLine',
          'TR.NetIncomeStartingLine',
          'TR.NetSales',
          'TR.OperatingIncome',
          'TR.TotalAssetsReported',
          'TR.STOCKPRICE',
          'TR.FiPrice',
          'TR.TotalCurrLiabilities',
          'TR.RetainedEarnings',
          'TR.TotalAssetsReported',
          'TR.MARKETCAPITALISATION',
          'MKT_CAP',
          'TR.TotalEquity',
          'TR.TotalAssetsReported',
          'TR.FiPricingVolatility',
          'TR.PCWrkgCapPrdOverPrdAvg(Period=FY0)',
          'TR.PCWrkgCapPrdOverPrdPctChg(Period=FY0)',
          'TR.PZPHasFiledForBankruptcy', # bankruptcy check (not working)
          'TR.PZPBankruptcyFilingDate', # bankruptcy check (not working)
          'TR.InstrumentIsActive', # bankruptcy check (not reliable)
          'TR.IsDelistedQuote']  # bankruptcy check (not reliable)

# remove duplicates
fields = list(set(fields))

In [None]:
# download the financial factors

# for the bankrupt companies
# get the data
bankrupt_data, err = ek.get_data(bankrupt_CIK, fields=fields,
                                 parameters={'Frq':'Q',
                                             'SDate':'2000-01-01',
                                             'EDate':'2019-06-30'})
# and save the dataframe as a csv file
bankrupt_data.to_csv('data_bankrupt_CIK/bankrupt_data.csv')


In [None]:
# again for the healthy list we need to arrange
# download precedure in different cells

# set a list to append the resulting dataframe
healthy_data_list = []

# get data

# 1 out of 8
healthy_df, err = ek.get_data(healthy_CIK_list[0], fields=fields,
                              parameters={'Frq':'Q',
                                          'SDate':'2000-01-01',
                                          'EDate':'2019-06-30'})
healthy_data_list.append(healthy_df)


In [None]:
# 2 out of 8
healthy_df, err = ek.get_data(healthy_CIK_list[1], fields=fields,
                              parameters={'Frq':'Q',
                                          'SDate':'2000-01-01',
                                          'EDate':'2019-06-30'})
healthy_data_list.append(healthy_df)


In [None]:
# 3 out of 8
healthy_df, err = ek.get_data(healthy_CIK_list[2], fields=fields,
                              parameters={'Frq':'Q',
                                          'SDate':'2000-01-01',
                                          'EDate':'2019-06-30'})
healthy_data_list.append(healthy_df)


In [None]:
# 4 out of 8
healthy_df, err = ek.get_data(healthy_CIK_list[3], fields=fields,
                              parameters={'Frq':'Q',
                                          'SDate':'2000-01-01',
                                          'EDate':'2019-06-30'})
healthy_data_list.append(healthy_df)


In [None]:
# 5 out of 8
healthy_df, err = ek.get_data(healthy_CIK_list[4], fields=fields,
                              parameters={'Frq':'Q',
                                          'SDate':'2000-01-01',
                                          'EDate':'2019-06-30'})
healthy_data_list.append(healthy_df)


In [None]:
# 6 out of 8
healthy_df, err = ek.get_data(healthy_CIK_list[5], fields=fields,
                              parameters={'Frq':'Q',
                                          'SDate':'2000-01-01',
                                          'EDate':'2019-06-30'})
healthy_data_list.append(healthy_df)


In [None]:
# 7 out of 8
healthy_df, err = ek.get_data(healthy_CIK_list[6], fields=fields,
                              parameters={'Frq':'Q',
                                          'SDate':'2000-01-01',
                                          'EDate':'2019-06-30'})
healthy_data_list.append(healthy_df)


In [None]:
# 8 out of 8
healthy_df, err = ek.get_data(healthy_CIK_list[7], fields=fields,
                              parameters={'Frq':'Q',
                                          'SDate':'2000-01-01',
                                          'EDate':'2019-06-30'})
healthy_data_list.append(healthy_df)


In [None]:
# concatenate the 8 dataframes in the list
healthy_data = pd.concat([df for df in healthy_data_list])
    
# and save the merged dataframe as a csv file
healthy_data.to_csv('data_healthy_CIK/healthy_data.csv')


In [None]:
# test the resulting dataframe for bankrupt company
print(len(bankrupt_data))
bankrupt_data.head()


In [None]:
bankrupt_data

In [None]:
# test the resulting dataframe for healthy company
print(len(healthy_data))
healthy_data.head()


In [None]:
healthy_data

****

In [None]:
# use screen to find more bankrupt companies using eikon data item
# something = ek.get_data('SCREEN(IN(TR.PZPHasFiledForBankruptcy, Y))', ['TR.CommonName', 'TR.RIC])

