In [1]:
import pickle
import math
import json

In [2]:
with open('financial_statements.pickle', 'rb') as fp:
    data = pickle.load(fp)

In [3]:
[x for x in data[0]]

['SimFin ID',
 'company',
 'ticker',
 'financial year end (month)',
 'industry code',
 'Share Price',
 'Common Shares Outstanding',
 'Avg. Basic Shares Outstanding',
 'Avg. Diluted Shares Outstanding',
 'Revenues',
 'COGS',
 'SG&A',
 'EBIT',
 'EBITDA',
 'Net Income from Discontinued Op.',
 'Net Profit',
 'Dividends',
 'Cash and Cash Equivalents',
 'Receivables',
 'Current Assets',
 'Net PP&E',
 'Intangible Assets',
 'Goodwill',
 'Total Noncurrent Assets',
 'Total Assets',
 'Short term debt',
 'Accounts Payable',
 'Current Liabilities',
 'Long Term Debt',
 'Total Noncurrent Liabilities',
 'Total Liabilities',
 'Preferred Equity',
 'Share Capital',
 'Treasury Stock',
 'Retained Earnings',
 'Equity Before Minorities',
 'Minorities',
 'Total Equity',
 'Cash From Operating Activities',
 'Cash From Investing Activities',
 'Cash From Financing Activities',
 'Net Change in Cash']

In [4]:
def cast_to_float(item):
    try: 
        return float(item)
    except ValueError:
        return None

for obj in data:
    for key in obj:
        if isinstance(obj[key], list):
            obj[key] = [*map(lambda item: cast_to_float(item), obj[key])]

In [5]:
[x for x in data[0]['EBIT'] if x]

[14.238,
 17.968,
 21.469,
 23.638,
 28.293,
 25.786,
 24.907,
 26.846,
 19.156,
 23.706,
 24.159,
 70.293,
 56.935,
 37.617,
 21.974,
 41.524,
 44.526,
 43.282,
 42.444,
 44.612,
 41.563,
 46.359,
 49.865,
 37.78,
 41.626,
 41.048,
 37.685]

In [6]:
# X21	sales (n) / sales (n-1) 
# X22	profit on operating activities / total assets 
# X23	net profit / sales 
# X24	gross profit (in 3 years) / total assets 
# X25	(equity - share capital) / total assets 
# X26	(net profit + depreciation) / total liabilities 
# X27	profit on operating activities / financial expenses 
# X28	working capital / fixed assets 
# X29	logarithm of total assets 
# X30	(total liabilities - cash) / sales 
# X31	(gross profit + interest) / sales 
# X32	(current liabilities * 365) / cost of products sold 
# X33	operating expenses / short-term liabilities 
# X34	operating expenses / total liabilities 
# X35	profit on sales / total assets 
# X36	total sales / total assets 
# X37	(current assets - inventories) / long-term liabilities 
# X38	constant capital / total assets 
# X39	profit on sales / sales 
# X40	(current assets - inventory - receivables) / short-term liabilities 

# X41	total liabilities / ((profit on operating activities + depreciation) * (12/365)) 
# X42	profit on operating activities / sales 
# X43	rotation receivables + inventory turnover in days 
# X44	(receivables * 365) / sales 
# X45	net profit / inventory 
# X46	(current assets - inventory) / short-term liabilities 
# X47	(inventory * 365) / cost of products sold 
# X48	EBITDA (profit on operating activities - depreciation) / total assets 
# X49	EBITDA (profit on operating activities - depreciation) / sales 
# X50	current assets / total liabilities 
# X51	short-term liabilities / total assets 
# X52	(short-term liabilities * 365) / cost of products sold) 
# X53	equity / fixed assets 
# X54	constant capital / fixed assets 
# X55	working capital 
# X56	(sales - cost of products sold) / sales 
# X57	(current assets - inventory - short-term liabilities) / (sales - gross profit - depreciation) 
# X58	total costs /total sales 
# X59	long-term liabilities / equity 
# X60	sales / inventory 
# X61	sales / receivables 
# X62	(short-term liabilities *365) / sales 
# X63	sales / short-term liabilities 
# X64	sales / fixed assets

def get_features(obj):
    def catchError(obj, i, func):
        try:
            return func(obj, i)
        except (TypeError, ZeroDivisionError, ValueError):
            return '';
    funcs = {
        # X1:net profit / total assets 
        'x1': lambda obj, i: obj['Net Profit'][i] / obj['Total Assets'][i],
        
        # X2:total liabilities / total assets 
        'x2': lambda obj, i: obj['Total Liabilities'][i] / obj['Total Assets'][i],
        
        # X3:working capital / total assets 
        # working capital = current assets - current liabilities 
        'x3': lambda obj, i: (obj['Current Assets'][i] - obj['Current Liabilities'][i]) / obj['Total Assets'][i], 
        
        # X4:current assets / short-term liabilities 
        # assume short term liability = current liability
        'x4': lambda obj, i:  obj['Current Assets'][i] / obj['Current Liabilities'][i], 
        
        # X5:[(cash + short-term securities + receivables - short-term liabilities) / (operating expenses - depreciation)] * 365
        # cash + short-term securities + receivables = current assets
        #  Operating expenses - depreciation = sg&A   
        'x5': lambda obj, i: (obj['Current Assets'][i]-obj['Current Liabilities'][i]) / obj['SG&A'][i] * 365,
        
        # X6:retained earnings / total assets 
        'x6': lambda obj, i: obj['Retained Earnings'][i] / obj['Total Assets'][i],
        
        # X7:EBIT / total assets 
        'x7': lambda obj, i: obj['EBIT'][i] / obj['Total Assets'][i],
        
        # X8:book value of equity / total liabilities 
        # book value of equity = total assests - total liabilities
        'x8': lambda obj, i: (obj['Total Assets'][i]-obj['Total Liabilities'][i]) / obj['Total Liabilities'][i], 
        
        # X9:sales / total assets 
        'x9': lambda obj, i: obj['Revenues'][i] / obj['Total Assets'][i],
        
        # X10:equity / total assets 
        'x10': lambda obj, i: obj['Total Equity'][i] / obj['Total Assets'][i], 
        
        # X11:(gross profit + extraordinary items + financial expenses) / total assets
        # extroordinary items and financial expenses missing
        'x11': lambda obj, i: '', 
        
        # X12:gross profit / short-term liabilities
        # gross profit = revenue - cogs 
        'x12': lambda obj, i: (obj['Revenues'][i]-obj['COGS'][i]) / obj['Current Liabilities'][i],
       
        # X13:(gross profit + depreciation) / sales 
        # depreciation = operating expenses - sg&A = gross profit - net income - sg&A
        # gross profit + depreciation = 2 * gross profit -net income - sg&A
        'x13': lambda obj, i: (2 * (obj['Revenues'][i]-obj['COGS'][i]) - obj['Net Profit'][i] - obj['SG&A'][i]) / obj['Revenues'][i],
        
        # X14:(gross profit + interest) / total assets 
        # interest missing
        'x14': lambda obj, i:  '', 
        
        # X15:(total liabilities * 365) / (gross profit + depreciation) 
        # gross profit + depreciation = 2 * gross profit -net income - sg&A
        'x15': lambda obj, i: (obj['Total Liabilities'][i] * 365) / (2 * (obj['Revenues'][i]-obj['COGS'][i]) - obj['Net Profit'][i] - obj['SG&A'][i]), 
        
        # X16:(gross profit + depreciation) / total liabilities 
        'x16': lambda obj, i: (2 * (obj['Revenues'][i]-obj['COGS'][i]) - obj['Net Profit'][i] - obj['SG&A'][i]) / obj['Total Liabilities'][i], 
        
        # X17:total assets / total liabilities 
        'x17': lambda obj, i: obj['Total Assets'][i] / obj['Total Liabilities'][i],
        
        # X18:gross profit / total assets
        'x18': lambda obj, i: (obj['Revenues'][i]-obj['COGS'][i]) / obj['Total Assets'][i],
        
        # X19:gross profit / sales 
        'x19': lambda obj, i: (obj['Revenues'][i]-obj['COGS'][i]) / obj['Revenues'][i],
        
         # X20:(inventory * 365) / sales
        # inventory missing
        'x20': lambda obj, i: '', 
        
        'x21': lambda obj, i: '',
        'x22': lambda obj, i: obj['Cash From Operating Activities'][i] / obj['Total Assets'][i], # assume cash from operating activities === cash from operating activities
        'x23': lambda obj, i: obj['Net Profit'][i] / obj['Total Assets'][i],
        'x24': lambda obj, i: '', # gross profit (in 3 years) missing 
        'x25': lambda obj, i: (obj['Total Equity'][i] - obj['Share Capital'][i])  / obj['Total Assets'][i],
        'x26': lambda obj, i: '', # depreciation missing 
        'x27': lambda obj, i: '', # financial expenses missing 
        'x28': lambda obj, i: (obj['Current Assets'][i] - obj['Current Liabilities'][i]) / obj['Net PP&E'][i], # assume net pp&e === fixed assets check 53
        'x29': lambda obj, i: math.log10(obj['Total Assets'][i]),
        'x30': lambda obj, i: (obj['Total Liabilities'][i] - obj['Cash and Cash Equivalents'][i]) / obj['Revenues'][i],
        'x31': lambda obj, i: '', # interest missing
        'x32': lambda obj, i: obj['Current Liabilities'][i] * 365 / obj['COGS'][i], # same as #52
        'x33': lambda obj, i: (obj['COGS'][i] + obj['SG&A'][i]) / obj['Current Liabilities'][i], # assume cogs + sg&a === operating expenses
        'x34': lambda obj, i: (obj['COGS'][i] + obj['SG&A'][i]) / obj['Total Liabilities'][i], # assume cogs + sg&a === operating expenses
        'x35': lambda obj, i: '', # missing net revenue
        'x36': lambda obj, i: obj['Revenues'][i] / obj['Total Assets'][i],
        'x37': lambda obj, i: '', # missing inventories
        'x38': lambda obj, i: '', # constant capital ?? === fixed assets === total noncurrent assets??
        'x39': lambda obj, i: '', # missing net revenue
        'x40': lambda obj, i: '', # missing inventories
        
        'x41': lambda obj, i: obj['Total Liabilities'][i] / (obj['EBIT'][i] * (12.0/365.0)),
        'x42': lambda obj, i: obj['EBIT'][i] / obj['Revenues'][i],
        'x43': lambda obj, i: '', # inventory missing
        'x44': lambda obj, i: obj['Receivables'][i] * 365 / obj['Revenues'][i],
        'x45': lambda obj, i: '', # inventory missing
        'x46': lambda obj, i: '', # inventory missing
        'x47': lambda obj, i: '', # inventory missing
        'x48': lambda obj, i: obj['EBITDA'][i] / obj['Total Assets'][i],
        'x49': lambda obj, i: obj['EBITDA'][i] / obj['Revenues'][i],
        'x50': lambda obj, i: obj['Current Assets'][i] / obj['Total Liabilities'][i],
        'x51': lambda obj, i: obj['Current Liabilities'][i] / obj['Total Assets'][i], # assume short term liability === current liability
        'x52': lambda obj, i: obj['Current Liabilities'][i] * 365 / obj['COGS'][i],
        'x53': lambda obj, i: obj['Total Equity'][i] / obj['Total Noncurrent Assets'][i],
        'x54': lambda obj, i: obj['Net PP&E'][i] / obj['Total Noncurrent Assets'][i],
        'x55': lambda obj, i: obj['Current Assets'][i] - obj['Current Liabilities'][i],
        'x56': lambda obj, i: (obj['Revenues'][i] - obj['COGS'][i]) / obj['Revenues'][i],
        'x57': lambda obj, i: '', # inventory missing
        'x58': lambda obj, i: (obj['Revenues'][i] - obj['Net Profit'][i]) / obj['Revenues'][i],
        'x59': lambda obj, i: obj['Total Noncurrent Liabilities'][i] / obj['Total Equity'][i],
        'x60': lambda obj, i: '', # inventory missing
        'x61': lambda obj, i: obj['Revenues'][i] / obj['Receivables'][i],
        'x62': lambda obj, i: obj['Current Liabilities'][i] * 365 / obj['Revenues'][i],
        'x63': lambda obj, i: obj['Revenues'][i] / obj['Current Liabilities'][i],
        'x64': lambda obj, i: obj['Revenues'][i] / obj['Total Noncurrent Assets'][i]
    }
#     res = {ratio: [catchError(obj, i, funcs[ratio]) for i in range(len(obj['Share Price']))] for ratio in funcs}
    res = {ratio: [*filter(lambda item: item != '', [catchError(obj, i, funcs[ratio]) for i in range(len(obj['Share Price']))])] for ratio in funcs}
    res = {**{'company': data[0]['company'], 'ticker': data[0]['ticker']}, **{key: res[key] for key in res if len(res[key]) > 0}}
    return res

In [7]:
x = get_features(data[0])
[len([a for a in x[key]]) for key in x]

[21,
 4,
 27,
 28,
 28,
 28,
 27,
 28,
 27,
 28,
 27,
 28,
 27,
 27,
 27,
 27,
 28,
 27,
 27,
 27,
 27,
 28,
 28,
 28,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 27,
 26,
 26,
 28,
 28,
 27,
 28,
 28,
 29,
 27,
 27,
 28,
 27,
 27,
 27,
 27]

In [8]:
new_data = [get_features(obj) for obj in data]
new_data[0]

{'company': '1 800 FLOWERS COM INC',
 'ticker': 'FLWS',
 'x1': [0.03551048518580986,
  0.05958861761645529,
  0.04782347653769136,
  0.04612390187448491,
  0.058903626750765235,
  0.049269613272924305,
  0.04073203636978378,
  0.05041714690549957,
  0.03923597141598176,
  0.05745060152708273,
  0.050123073596102345,
  0.07322939881365176,
  0.06710421956633156,
  0.04081291882681216,
  0.03313718394252013,
  0.05638065797633401,
  0.06931062116778798,
  0.0733187391761658,
  0.042458222920604055,
  0.0435831987075929,
  0.04626545485533477,
  0.07971654569478885,
  0.09093694921076723,
  0.0837902284809707,
  0.0999854476087616,
  0.0714517182849906,
  0.0679738441190888],
 'x2': [0.5005305841344803,
  0.398948924042043,
  0.3831427122225061,
  0.4544722585660593,
  0.4024408634203651,
  0.36802151933957894,
  0.3231136508139623,
  0.45634598364486756,
  0.358957820050544,
  0.30720156330823606,
  0.3044111986067145,
  0.42207247031286765,
  0.6191044484692607,
  0.5713085858713804,
  

In [9]:
with open('data_with_ratios.json', 'w') as fp:
    json.dump(new_data, fp)

In [10]:
with open('data_with_ratios.pickle', 'wb') as fp:
    pickle.dump(new_data, fp)