In [8]:
import pandas as pd
import requests
import io
import re
import asyncio
from functools import partial
from bs4 import BeautifulSoup
from IPython.core.debugger import set_trace

In [2]:
def etf_alloc(*etfs):
    
    def _name(df):
        name = df.index.name
        if name.lower() != 'region':
            return name
        elif df.index.str.contains('america|asia|europe|africa|middle', case=False).any():
            return name
        else:
            return 'Market Tier'
    
    def _df(df, etf):
        return pd.DataFrame({etf.upper():df.dropna().Percentage.str.rstrip('%').astype('float')})

    
    async def get_tables(etf):
        url = 'https://etfdb.com/etf/' + etf
        read_html_partial = partial(pd.read_html, attrs={'class':'chart base-table'}, index_col=0, flavor=['lxml', 'bs4'])
        tables = await loop.run_in_executor(None, read_html_partial, url)
        return {_name(df):_df(df, etf) for df in tables}


    async def main():
        fts = [asyncio.ensure_future(get_tables(etf)) for etf in etfs]
        return await asyncio.gather(*fts)
    
    asyncio.set_event_loop(asyncio.new_event_loop())
    loop = asyncio.get_event_loop()
    
    # 다음 코드를 주피터에서 돌리려면, tornado를 downgrade 해야함
    # pip install tornado==4.5.3
    res = loop.run_until_complete(main())
    loop.close()
    
    etfs = {
        k:pd.concat([dic[k] for dic in res], axis=1, sort='False').fillna(0) for k in res[0]
    }
    
    return pd.concat(etfs, axis=0)

In [3]:
%%time
etfs = etf_alloc('SPY','ACWI')#,'MTUM','VLUE','QUAL','XLB','XLY','XLP','XLE','XLF','XLV','XLI','IYR'); etfs

Wall time: 3.03 s


In [3]:
BASE_URL_ETFDB = 'https://etfdb.com/etf/'

In [5]:
class ETF:
    def __init__(self, ticker):
        self.ticker = ticker
    
    def price(self):
        pass
    
    def holdings(self, src='etfdb'):
        pass
    
    def alloc(self, src='etfdb'):
        return etf_alloc(self.ticker, src)

In [12]:
def etf_alloc(ticker, src):
    
    def _tname_etfdb(df):
        tname = df.index.name
        if tname.lower() != 'region':
            return tname
        elif df.index.str.contains('america|asia|europe|africa|middle', case=False).any():
            return tname
        else:
            return 'Market Tier'
    
    def _table_etfdb(df):
        #return pd.DataFrame({ticker.upper():df.dropna().Percentage.str.rstrip('%').astype('float')})
        return df.dropna().Percentage.str.rstrip('%').astype('float')
      
    if src=='etfdb':
        url = BASE_URL_ETFDB + ticker
        dfs = pd.read_html(url, attrs={'class':'chart base-table'}, index_col=0, flavor=['lxml', 'bs4'])
        return {_tname_etfdb(df):_table_etfdb(df) for df in dfs}

    else:
        return None

In [13]:
etf_alloc('ACWI', 'etfdb')

{'Asset': Asset
 Common equity         98.08
 ETF Cash Component     1.25
 Preferred stock        0.67
 Name: Percentage, dtype: float64, 'Sector': Sector
 Financials                17.08
 Technology                14.41
 Healthcare                11.72
 Consumer, Cyclical        10.01
 Industrials                9.91
 Communications             8.89
 Consumer, Non-Cyclical     8.05
 Energy                     6.22
 Basic Materials            4.28
 Utilities                  3.23
 Real Estate                3.02
 Other                      1.92
 ETF Cash Component         1.25
 Name: Percentage, dtype: float64, 'Market Cap': Market Cap
 Large cap             89.79
 Mid cap                8.60
 ETF Cash Component     1.25
 Small cap              0.35
 Micro cap              0.01
 Unknown                0.00
 Name: Percentage, dtype: float64, 'Region': Region
 North America    57.61
 Europe           21.22
 Asia-Pacific     10.88
 Asia              7.84
 Latin America     1.32
 Africa   