In [1]:
# Libraries
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas_datareader.data as data
import datetime
import json
from yahoofinancials import YahooFinancials
from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver
from webdriver_manager.chrome import ChromeDriverManager
import re
import requests
import warnings

driver = webdriver.Chrome()

asset_dict = {} 

warnings.filterwarnings('ignore')


def get_table(soup):
    for t in soup.select('table'):
        header = t.select('thead tr th')
        if len(header) > 2:
            if (header[0].get_text().strip() == 'Symbol'
                and header[2].get_text().strip().startswith('% Holding')):
                return t
    raise Exception('could not find symbol list table')
    
# Scrapes ETF holdings from barchart.com
def get_etf_holdings():
    '''
    etf_symbol: str
    
    return: pd.DataFrame
    '''
    url = 'https://www.barchart.com/stocks/quotes/VONE/constituents?page=all'

    # Loads the ETF constituents page and reads the holdings table
    browser = WebDriver() # webdriver.PhantomJS()
    browser.get(url)
    html = browser.page_source
    soup = BeautifulSoup(html, 'html')
    table = get_table(soup)

    # Reads the holdings table line by line and appends each asset to a
    # dictionary along with the holdings percentage
    for row in table.select('tr')[1:26]:
        try:
            cells = row.select('td')
            # print(row)
            symbol = cells[0].get_text().strip()
            # print(symbol)
            # name = cells[1].text.strip()
            celltext = cells[2].get_text().strip()
            percent = float(celltext.rstrip('%'))
            # shares = int(cells[3].text.strip().replace(',', ''))
            if symbol != "" and percent != 0.0:
                asset_dict[symbol] = {
                    'percent': percent,
                }
        except BaseException as ex:
            print(ex)
    browser.quit()
    return pd.DataFrame(asset_dict)

constituent = get_etf_holdings()

constituent.T

# Assets
russell_index = ["^RUI"]
tickers = []

# Set timeframe
start = "2019-03-31"
end = "2019-04-29"

def russell():
    # Pull data from Yahoo! Finance for Russell 1000 Index
    df = data.DataReader(russell_index, data_source="yahoo", start=start, end=end)

    percentage = df.pct_change()
    russellReturn = percentage["Adj Close"]

    return russellReturn


#russell()

def pullTickers():
    exec(open("scrapeETF.py").read())

    list(asset_dict.keys())

    for key in asset_dict.keys():
        tickers.append(key.replace('.','-'))
    return tickers


pullTickers()



['AAPL',
 'MSFT',
 'AMZN',
 'FB',
 'BRK-B',
 'JPM',
 'GOOG',
 'GOOGL',
 'JNJ',
 'V',
 'PG',
 'XOM',
 'BAC',
 'T',
 'UNH',
 'MA',
 'INTC',
 'VZ',
 'DIS',
 'HD',
 'MRK',
 'CVX',
 'PFE',
 'KO',
 'CSCO']

In [5]:
yahoo_financials = YahooFinancials('AAPL')
print(yahoo_financials.get_key_statistics_data()['AAPL']['sharesOutstanding'])

4384030208


In [102]:
yahoo_financials = YahooFinancials('AMZN')
print(yahoo_financials.get_financial_stmts('quarterly', 'balance')['balanceSheetHistoryQuarterly']['AMZN'][2])

{'2019-03-31': {'capitalSurplus': 28059000000, 'totalLiab': 129692000000, 'totalStockholderEquity': 48410000000, 'otherCurrentLiab': 7298000000, 'totalAssets': 178102000000, 'commonStock': 5000000, 'otherCurrentAssets': 359000000, 'retainedEarnings': 23193000000, 'otherLiab': 9400000000, 'goodWill': 14708000000, 'treasuryStock': -2847000000, 'otherAssets': 12189000000, 'cash': 23115000000, 'totalCurrentLiabilities': 63695000000, 'shortLongTermDebt': 1608000000, 'otherStockholderEquity': -1010000000, 'propertyPlantEquipment': 81180000000, 'totalCurrentAssets': 69431000000, 'longTermInvestments': 594000000, 'netTangibleAssets': 33702000000, 'shortTermInvestments': 13905000000, 'netReceivables': 15620000000, 'longTermDebt': 23322000000, 'inventory': 16432000000, 'accountsPayable': 31809000000}}


In [31]:
yahoo_financials = YahooFinancials('MSFT')
print(yahoo_financials.get_financial_stmts('quarterly', 'balance')['balanceSheetHistoryQuarterly']['MSFT'][2]['2019-03-31']['commonStock'])

77791000000


In [54]:
replicatedWeights = {}

marketCaps = {}

portfolioAUM = np.zeros(0)

def pullFundamentals():

    for i in tickers:
        yahoo_financials = YahooFinancials(i)
        sharesOutstanding = yahoo_financials.get_key_statistics_data()[i]['sharesOutstanding']
        
        prices = yahoo_financials.get_historical_price_data('2019-04-01', '2019-04-02', 'daily')[i]['prices'][0]['adjclose']

        mcap = sharesOutstanding * prices

        marketCaps[i] = mcap
        
        print(marketCaps)

pullFundamentals()

{'AAPL': 829533959525.7891}
{'AAPL': 829533959525.7891, 'MSFT': 906409833473.5938}
{'AAPL': 829533959525.7891, 'MSFT': 906409833473.5938, 'AMZN': 899469915865.875}
{'AAPL': 829533959525.7891, 'MSFT': 906409833473.5938, 'AMZN': 899469915865.875, 'FB': 405971462761.6367}
{'AAPL': 829533959525.7891, 'MSFT': 906409833473.5938, 'AMZN': 899469915865.875, 'FB': 405971462761.6367, 'BRK-B': 283601106560.0}
{'AAPL': 829533959525.7891, 'MSFT': 906409833473.5938, 'AMZN': 899469915865.875, 'FB': 405971462761.6367, 'BRK-B': 283601106560.0, 'JPM': 313468157958.9844}
{'AAPL': 829533959525.7891, 'MSFT': 906409833473.5938, 'AMZN': 899469915865.875, 'FB': 405971462761.6367, 'BRK-B': 283601106560.0, 'JPM': 313468157958.9844, 'GOOG': 410347648937.8867}
{'AAPL': 829533959525.7891, 'MSFT': 906409833473.5938, 'AMZN': 899469915865.875, 'FB': 405971462761.6367, 'BRK-B': 283601106560.0, 'JPM': 313468157958.9844, 'GOOG': 410347648937.8867, 'GOOGL': 359247973587.8906}
{'AAPL': 829533959525.7891, 'MSFT': 9064098334

In [58]:
aggregateValue = sum(marketCaps.values())    

for i in tickers:
    weights = marketCaps[i] / aggregateValue

    replicatedWeights[i] = weights

    print(replicatedWeights)

{'AAPL': 0.09736714980299824}
{'AAPL': 0.09736714980299824, 'MSFT': 0.10639051123257888}
{'AAPL': 0.09736714980299824, 'MSFT': 0.10639051123257888, 'AMZN': 0.10557593337284002}
{'AAPL': 0.09736714980299824, 'MSFT': 0.10639051123257888, 'AMZN': 0.10557593337284002, 'FB': 0.047651194717876676}
{'AAPL': 0.09736714980299824, 'MSFT': 0.10639051123257888, 'AMZN': 0.10557593337284002, 'FB': 0.047651194717876676, 'BRK-B': 0.033287885456201295}
{'AAPL': 0.09736714980299824, 'MSFT': 0.10639051123257888, 'AMZN': 0.10557593337284002, 'FB': 0.047651194717876676, 'BRK-B': 0.033287885456201295, 'JPM': 0.03679355226386422}
{'AAPL': 0.09736714980299824, 'MSFT': 0.10639051123257888, 'AMZN': 0.10557593337284002, 'FB': 0.047651194717876676, 'BRK-B': 0.033287885456201295, 'JPM': 0.03679355226386422, 'GOOG': 0.04816485274247681}
{'AAPL': 0.09736714980299824, 'MSFT': 0.10639051123257888, 'AMZN': 0.10557593337284002, 'FB': 0.047651194717876676, 'BRK-B': 0.033287885456201295, 'JPM': 0.03679355226386422, 'GOOG'

In [75]:
weightedPrice = {}

def replicateETF():
    for i in tickers:
        yahoo_financials = YahooFinancials(i)
        
        prices = data.DataReader(i, data_source="yahoo", start=start, end=end)['Adj Close']

        weightedPrice = prices * replicatedWeights[i]
        print(weightedPrice)
    
replicateETF()











Date
2019-04-01    18.423540
2019-04-02    18.691358
2019-04-03    18.819487
2019-04-04    18.852239
2019-04-05    18.978441
2019-04-08    19.277087
2019-04-09    19.219285
2019-04-10    19.327182
2019-04-11    19.166299
2019-04-12    19.158593
2019-04-15    19.193274
2019-04-16    19.195201
2019-04-17    19.568990
2019-04-18    19.639316
2019-04-22    19.703863
2019-04-23    19.988056
2019-04-24    19.957229
2019-04-25    19.776116
2019-04-26    19.681705
2019-04-29    19.711569
2019-04-30    19.332000
Name: Adj Close, dtype: float64
Date
2019-04-01    12.531387
2019-04-02    12.549286
2019-04-03    12.631411
2019-04-04    12.567185
2019-04-05    12.622988
2019-04-08    12.627199
2019-04-09    12.558761
2019-04-10    12.654574
2019-04-11    12.669314
2019-04-12    12.734593
2019-04-15    12.745123
2019-04-16    12.715641
2019-04-17    12.820929
2019-04-18    12.989391
2019-04-22    13.030453
2019-04-23    13.207337
2019-04-24    13.162063
2019-04-25    13.597954
2019-04-26    13.67586

In [None]:
replicatedETF_price = sum(marketCaps.values())

In [59]:
def russell():
    # Pull data from Yahoo! Finance for Russell 1000 Index
    df = data.DataReader(russell_index, data_source="yahoo", start=start, end=end)

    percentage = df.pct_change()
    russellReturn = percentage["Adj Close"]

    return russellReturn


russell()

Symbols,^RUI
Date,Unnamed: 1_level_1
2019-04-01,
2019-04-02,-5e-05
2019-04-03,0.002461
2019-04-04,0.001652
2019-04-05,0.004871
2019-04-08,0.001048
2019-04-09,-0.006319
2019-04-10,0.004083
2019-04-11,0.000212
2019-04-12,0.006676


In [50]:
ticker = 'AAPL'
yahoo_financials = YahooFinancials(ticker)

historical_stock_prices = yahoo_financials.get_historical_price_data('2019-04-01', '2019-04-02', 'daily')['AAPL']['prices'][0]['adjclose']

print(historical_stock_prices)

189.2172088623047
