In [None]:
from yahooquery import Ticker
import pandas as pd
import bs4 as bs
import pickle
import requests
import time

def save_russell1000_tickers():
    resp = requests.get('https://en.wikipedia.org/wiki/Russell_1000_Index')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class': 'wikitable sortable'})
    tickers = []

    for row in table.findAll('tr')[1:]:        
        ticker = row.findAll('td')[1].text.rstrip()
        tickers.append(ticker)
        
    with open("russell1000tickers.pickle","wb") as f:
        pickle.dump(tickers,f)
        
    return tickers

In [None]:
tickers=save_russell1000_tickers()
tickers.sort()
# tickers

In [None]:
retry=[]

In [None]:
dfResult=pd.DataFrame()
counter=0
FINAL_COUNT=1000

flag = True
last=''
for t in tickers:    
    if not flag and t != last:
        #print('skipping ticker ', t)        
        continue
    elif not flag and t == last:
        flag = True
        continue
    else:
        time.sleep(2)  
        print('processing ticker',t)
        if counter==FINAL_COUNT:
            break
        counter=counter+1
    
    t=t.replace('.','-')
    ticker=Ticker(t)
    balanceSheet=ticker.balance_sheet(frequency="q")
    
    try:
        balanceSheet=balanceSheet[balanceSheet['periodType']=="3M"].sort_values('asOfDate').groupby('symbol').tail(1)
        # print(balanceSheet)
    except: 
        print('balanceSheet error:',balanceSheet)
        retry.append(t)
        time.sleep(30)
        continue
        
    incomeStatement=ticker.income_statement(frequency="q")
    try:
        incomeStatement=incomeStatement[incomeStatement['periodType']=="TTM"].sort_values('asOfDate').groupby('symbol').tail(1)
        # print(incomeStatement)
    except:
        print('incomeStatement error:',incomeStatement)
        retry.append(t)
        time.sleep(30)
        continue

    try:
        valuationMeasure = pd.DataFrame.from_dict(ticker.summary_detail).loc[['marketCap']].transpose()
        valuationMeasure.index.name = 'symbol'
        valuationMeasure.columns=['MarketCap']
        # print(valuationMeasure)
        industry = pd.DataFrame.from_dict(ticker.summary_profile).loc[['industry']].transpose()
        industry.index.name = 'symbol'
        industry.columns=['industry']
    except:
        print('valuationMeasure error:', t)
        retry.append(t)
        continue
    
    mergedBsIs = pd.merge(balanceSheet, incomeStatement, on=['symbol']).fillna(0)
    if mergedBsIs.empty:        
        print('bad join 1:',t)
        retry.append(t)
    elif 'asOfDate_x' in mergedBsIs.columns:
        mergedBsIs.rename(columns={'asOfDate_x':'asOfDate'}, inplace=True)
        
    mergedBsIsSum = pd.merge(mergedBsIs, valuationMeasure, on=['symbol']).fillna(0)
    mergedBsIsSum = pd.merge(mergedBsIsSum, industry, on=['symbol']).fillna(0)
    if mergedBsIsSum.empty:
        print('bad join 2:',t)
        retry.append(t)
    
    if 'EBIT' not in mergedBsIsSum.columns:
        print('missing EBIT:', t)
        continue
        
    if 'InvestedCapital' not in mergedBsIsSum.columns:
        print('missing InvestedCapital:', t)
        continue
    
    mergedBsIsSum['nopat'] = mergedBsIsSum.EBIT*(1-mergedBsIsSum.TaxRateForCalcs)
    mergedBsIsSum['roic'] = mergedBsIsSum.nopat / mergedBsIsSum.InvestedCapital 
    try:
        cash = mergedBsIsSum.CashAndCashEquivalents
    except:
        print('cash error: ',t)
        continue
    cdl = mergedBsIsSum.CurrentDeferredLiabilities if 'CurrentDeferredLiabilities' in mergedBsIsSum.columns else 0
    ltdclb = mergedBsIsSum.LongTermDebtAndCapitalLeaseObligation if 'LongTermDebtAndCapitalLeaseObligation' in mergedBsIsSum.columns else 0
    ncdl = mergedBsIsSum.NonCurrentDeferredLiabilities if 'NonCurrentDeferredLiabilities' in mergedBsIsSum.columns else 0
    oncl = mergedBsIsSum.OtherNonCurrentLiabilities if 'OtherNonCurrentLiabilities' in mergedBsIsSum.columns else 0
    debt = cdl+ltdclb+ncdl+oncl
    mergedBsIsSum['totalDebt']=debt
    
    try:
        preferredequity = mergedBsIsSum.CapitalStock-mergedBsIsSum.CommonStock
        mergedBsIsSum['preferredequity']=preferredequity
    except:
        print('capital or common stock missing:', t)
        continue
    
    try:
        networth = mergedBsIsSum.InvestedCapital+cash-debt-preferredequity
    except:
        print('InvestedCapital missing:',t)
        continue
        
    mergedBsIsSum['faustmannRatio'] = mergedBsIsSum.MarketCap/networth
    dfResult=pd.concat([dfResult,mergedBsIsSum[['asOfDate','EBIT','InvestedCapital','roic','MarketCap','CashAndCashEquivalents','totalDebt','preferredequity','faustmannRatio','industry']]])
dfResult

In [None]:
dfResult.to_csv('austrian.csv', sep=';')

In [None]:
retry

In [None]:
dfResult=pd.DataFrame()
counter=0
FINAL_COUNT=1000

flag = True
last=''
for t in retry:    
    if not flag and t != last:
        #print('skipping ticker ', t)        
        continue
    elif not flag and t == last:
        flag = True
        continue
    else:
        print('processing ticker',t)
        if counter==FINAL_COUNT:
            break
        counter=counter+1
    
    t=t.replace('.','-')
    ticker=Ticker(t)
    balanceSheet=ticker.balance_sheet(frequency="q")
    
    try:
        balanceSheet=balanceSheet[balanceSheet['periodType']=="3M"].sort_values('asOfDate').groupby('symbol').tail(1)
        # print(balanceSheet)
    except: 
        print('balanceSheet error:',balanceSheet)
        continue
        
    incomeStatement=ticker.income_statement(frequency="q")
    try:
        incomeStatement=incomeStatement[incomeStatement['periodType']=="TTM"].sort_values('asOfDate').groupby('symbol').tail(1)
        # print(incomeStatement)
    except:
        print('incomeStatement error:',incomeStatement)
        continue

    try:
        valuationMeasure = pd.DataFrame.from_dict(ticker.summary_detail).loc[['marketCap']].transpose()
        valuationMeasure.index.name = 'symbol'
        valuationMeasure.columns=['MarketCap']
        industry = pd.DataFrame.from_dict(ticker.summary_profile).loc[['industry']].transpose()
        industry.index.name = 'symbol'
        industry.columns=['industry']
    except:
        print('valuationMeasure error:', t)
        continue
    
    mergedBsIs = pd.merge(balanceSheet, incomeStatement, on=['symbol']).fillna(0)
    if mergedBsIs.empty:        
        print('bad join 1:',t)
    elif 'asOfDate_x' in mergedBsIs.columns:
        mergedBsIs.rename(columns={'asOfDate_x':'asOfDate'}, inplace=True)
        
    mergedBsIsSum = pd.merge(mergedBsIs, valuationMeasure, on=['symbol']).fillna(0)
    mergedBsIsSum = pd.merge(mergedBsIsSum, industry, on=['symbol']).fillna(0)
    if mergedBsIsSum.empty:
        print('bad join 2:',t)
    
    if 'EBIT' not in mergedBsIsSum.columns:
        print('missing EBIT:', t)
        continue
        
    if 'InvestedCapital' not in mergedBsIsSum.columns:
        print('missing InvestedCapital:', t)
        continue
    
    mergedBsIsSum['nopat'] = mergedBsIsSum.EBIT*(1-mergedBsIsSum.TaxRateForCalcs)
    mergedBsIsSum['roic'] = mergedBsIsSum.nopat / mergedBsIsSum.InvestedCapital 
    try:
        cash = mergedBsIsSum.CashAndCashEquivalents
    except:
        print('cash error: ',t)
        continue
    cdl = mergedBsIsSum.CurrentDeferredLiabilities if 'CurrentDeferredLiabilities' in mergedBsIsSum.columns else 0
    ltdclb = mergedBsIsSum.LongTermDebtAndCapitalLeaseObligation if 'LongTermDebtAndCapitalLeaseObligation' in mergedBsIsSum.columns else 0
    ncdl = mergedBsIsSum.NonCurrentDeferredLiabilities if 'NonCurrentDeferredLiabilities' in mergedBsIsSum.columns else 0
    oncl = mergedBsIsSum.OtherNonCurrentLiabilities if 'OtherNonCurrentLiabilities' in mergedBsIsSum.columns else 0
    debt = cdl+ltdclb+ncdl+oncl
    mergedBsIsSum['totalDebt']=debt
    
    try:
        preferredequity = mergedBsIsSum.CapitalStock-mergedBsIsSum.CommonStock
        mergedBsIsSum['preferredequity']=preferredequity
    except:
        print('capital or common stock missing:', t)
        continue
    
    try:
        networth = mergedBsIsSum.InvestedCapital+cash-debt-preferredequity
    except:
        print('InvestedCapital missing:',t)
        continue
        
    mergedBsIsSum['faustmannRatio'] = mergedBsIsSum.MarketCap/networth
    dfResult=pd.concat([dfResult,mergedBsIsSum[['asOfDate','EBIT','InvestedCapital','roic','MarketCap','CashAndCashEquivalents','totalDebt','preferredequity','faustmannRatio','industry']]])
dfResult

In [None]:
dfResult.to_csv('austrian_retry.csv', sep=';')