In [26]:
import math
import pickle

In [9]:
with open('financial_statements.pickle', 'rb') as fp:
    data = pickle.load(fp)

In [17]:
[x for x in data[0]]

['SimFin ID',
 'company',
 'ticker',
 'financial year end (month)',
 'industry code',
 'Share Price',
 'Common Shares Outstanding',
 'Avg. Basic Shares Outstanding',
 'Avg. Diluted Shares Outstanding',
 'Revenues',
 'COGS',
 'SG&A',
 'EBIT',
 'EBITDA',
 'Net Income from Discontinued Op.',
 'Net Profit',
 'Dividends',
 'Cash and Cash Equivalents',
 'Receivables',
 'Current Assets',
 'Net PP&E',
 'Intangible Assets',
 'Goodwill',
 'Total Noncurrent Assets',
 'Total Assets',
 'Short term debt',
 'Accounts Payable',
 'Current Liabilities',
 'Long Term Debt',
 'Total Noncurrent Liabilities',
 'Total Liabilities',
 'Preferred Equity',
 'Share Capital',
 'Treasury Stock',
 'Retained Earnings',
 'Equity Before Minorities',
 'Minorities',
 'Total Equity',
 'Cash From Operating Activities',
 'Cash From Investing Activities',
 'Cash From Financing Activities',
 'Net Change in Cash']

In [11]:
for obj in data:
    for key in obj:
        if isinstance(obj[key], list):
            for i in range(len(obj[key])):
                try:
                    obj[key][i] = float(obj[key][i])
                except ValueError:
                    obj[key][i] = None

In [18]:
[x for x in data[0]['EBIT'] if x]

[14.238,
 17.968,
 21.469,
 23.638,
 28.293,
 25.786,
 24.907,
 26.846,
 19.156,
 23.706,
 24.159,
 70.293,
 56.935,
 37.617,
 21.974,
 41.524,
 44.526,
 43.282,
 42.444,
 44.612,
 41.563,
 46.359,
 49.865,
 37.78,
 41.626,
 41.048,
 37.685]

In [31]:
# X21	sales (n) / sales (n-1) 
# X22	profit on operating activities / total assets 
# X23	net profit / sales 
# X24	gross profit (in 3 years) / total assets 
# X25	(equity - share capital) / total assets 
# X26	(net profit + depreciation) / total liabilities 
# X27	profit on operating activities / financial expenses 
# X28	working capital / fixed assets 
# X29	logarithm of total assets 
# X30	(total liabilities - cash) / sales 
# X31	(gross profit + interest) / sales 
# X32	(current liabilities * 365) / cost of products sold 
# X33	operating expenses / short-term liabilities 
# X34	operating expenses / total liabilities 
# X35	profit on sales / total assets 
# X36	total sales / total assets 
# X37	(current assets - inventories) / long-term liabilities 
# X38	constant capital / total assets 
# X39	profit on sales / sales 
# X40	(current assets - inventory - receivables) / short-term liabilities 

def get_21_40_features(obj):
    def catchError(obj, i, func):
        try:
            return func(obj, i)
        except (TypeError, ZeroDivisionError, ValueError):
            return '';
    funcs = {
        'x21': lambda obj, i: '',
        'x22': lambda obj, i: obj['Cash From Operating Activities'][i] / obj['Total Assets'][i], # assume cash from operating activities === cash from operating activities
        'x23': lambda obj, i: obj['Net Profit'][i] / obj['Total Assets'][i],
        'x24': lambda obj, i: '', # gross profit (in 3 years) missing 
        'x25': lambda obj, i: (obj['Total Equity'][i] - obj['Share Capital'][i])  / obj['Total Assets'][i],
        'x26': lambda obj, i: '', # depreciation missing 
        'x27': lambda obj, i: '', # financial expenses missing 
        'x28': lambda obj, i: (obj['Current Assets'][i] - obj['Current Liabilities'][i]) / obj['Net PP&E'][i], # assume net pp&e === fixed assets check 53
        'x29': lambda obj, i: math.log10(obj['Total Assets'][i]),
        'x30': lambda obj, i: (obj['Total Liabilities'][i] - obj['Cash and Cash Equivalents'][i]) / obj['Revenues'][i],
        'x31': lambda obj, i: '', # interest missing
        'x32': lambda obj, i: obj['Current Liabilities'][i] * 365 / obj['COGS'][i], # same as #52
        'x33': lambda obj, i: (obj['COGS'][i] + obj['SG&A'][i]) / obj['Current Liabilities'][i], # assume cogs + sg&a === operating expenses
        'x34': lambda obj, i: (obj['COGS'][i] + obj['SG&A'][i]) / obj['Total Liabilities'][i], # assume cogs + sg&a === operating expenses
        'x35': lambda obj, i: '', # missing net revenue
        'x36': lambda obj, i: obj['Revenues'][i] / obj['Total Assets'][i],
        'x37': lambda obj, i: '', # missing inventories
        'x38': lambda obj, i: '', # constant capital ?? === fixed assets === total noncurrent assets??
        'x39': lambda obj, i: '', # missing net revenue
        'x40': lambda obj, i: '' # missing inventories
    }
    res = {ratio:[catchError(obj, i, funcs[ratio]) for i in range(len(obj['Share Price']))] for ratio in funcs}
    return res

In [32]:
x = get_21_40_features(data[0])

In [33]:
[a for a in x['x22'] if a != '']

[0.07258812615955473,
 0.15170491165579128,
 0.14810701630787493,
 0.14406003962896072,
 0.16186624617382434,
 0.1385395464524359,
 0.07474832520659377,
 0.11044964453514641,
 0.12062762227714235,
 0.15898702764520553,
 0.14219929654058247,
 0.34154979363465593,
 0.30386411955795206,
 0.2529467502761164,
 0.07441448497978191,
 0.09313881342646953,
 0.12261028383994632,
 0.11467150222391892,
 0.10463891096682557,
 0.09050403877221325,
 0.0775530846724145,
 0.11043133563813419,
 0.14355897433895534,
 0.10047274582551718,
 0.11286869710512613,
 0.10219324597250955,
 0.13174128985118524]