In [83]:
%matplotlib inline

from bs4 import BeautifulSoup
import matplotlib
import matplotlib.pyplot as plt
import re
import requests
import numpy as np
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver
from webdriver_manager.chrome import ChromeDriverManager
import warnings

driver = webdriver.Chrome(ChromeDriverManager().install())

warnings.filterwarnings('ignore')


def get_table(soup):
    for t in soup.select('table'):
        header = t.select('thead tr th')
        if len(header) > 2:
            if (header[0].get_text().strip() == 'Symbol'
                and header[2].get_text().strip().startswith('% Holding')):
                return t
    raise Exception('could not find symbol list table')
    
# Scrapes ETF holdings from barchart.com
def get_etf_holdings(etf_symbol):
    '''
    etf_symbol: str
    
    return: pd.DataFrame
    '''
    url = 'https://www.barchart.com/stocks/quotes/{}/constituents?page=all'.format(etf_symbol)

    # Loads the ETF constituents page and reads the holdings table
    browser = WebDriver() # webdriver.PhantomJS()
    browser.get(url)
    html = browser.page_source
    soup = BeautifulSoup(html, 'html')
    table = get_table(soup)

    # Reads the holdings table line by line and appends each asset to a
    # dictionary along with the holdings percentage
    asset_dict = {}
    for row in table.select('tr')[1:26]:
        try:
            cells = row.select('td')
            # print(row)
            symbol = cells[0].get_text().strip()
            # print(symbol)
            name = cells[1].text.strip()
            celltext = cells[2].get_text().strip()
            percent = float(celltext.rstrip('%'))
            shares = int(cells[3].text.strip().replace(',', ''))
            if symbol != "" and percent != 0.0:
                asset_dict[symbol] = {
                    'name': name,
                    'percent': percent,
                    'shares': shares,
                }
        except BaseException as ex:
            print(ex)
    browser.quit()
    return pd.DataFrame(asset_dict)



Looking for [chromedriver 79.0.3945.36 win32] driver in cache
File found in cache by path [C:\Users\lukes\.wdm\drivers\chromedriver\79.0.3945.36\win32\chromedriver.exe]


In [84]:
constituent = get_etf_holdings('VONE')

constituent.T


Unnamed: 0,name,percent,shares
AAPL,Apple Inc.,4.37,591423
MSFT,Microsoft Corp.,3.99,1006217
AMZN,Amazon.com Inc.,2.56,55050
FB,Facebook Inc.,1.64,318066
BRK.B,Berkshire Hathaway Inc.,1.48,260098
JPM,JPMorgan Chase & Co.,1.45,414630
GOOG,Alphabet Inc. Class C,1.34,40007
GOOGL,Alphabet Inc.,1.34,39730
JNJ,Johnson & Johnson,1.3,353453
V,Visa Inc.,1.08,227871
