Gather all tickers

In [2]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from typing import List, Tuple

wiki_page: str = requests.get('https://en.wikipedia.org/wiki/List_of_American_exchange-traded_funds').text
soup: BeautifulSoup = BeautifulSoup(wiki_page,'lxml')

list_items = soup.select('li:contains("|")')
tickers: List[str] = []

for list_item in list_items:
    li_text: str = list_item.text
    start_index: int = li_text.find('|')
    end_index: int = li_text.find(')',start_index)
    tickers.append(li_text[start_index +1:end_index].strip())

tickers.append('^GSPTSE') # add S&P/TSX Composite as benchmark
print(tickers)

['ITOT', 'ACWI', 'IWV', 'SCHB', 'FNDB', 'VT', 'VTI', 'VXUS', 'VTHR', 'DIA', 'RSP', 'IOO', 'IVV', 'SPY', 'SHE', 'VOO', 'IWM', 'OEF', 'QQQ', 'CVY', 'RPG', 'RPV', 'IWB', 'IWF', 'IWD', 'IVV', 'IVW', 'IVE', 'PKW', 'PRF', 'SPLV', 'SCHX', 'SCHG', 'SCHV', 'SCHD', 'FNDX', 'SDY', 'VOO', 'VOOG', 'VOOV', 'VV', 'VUG', 'VTV', 'MGC', 'MGK', 'MGV', 'VONE', 'VONG', 'VONV', 'VIG', 'VYM', 'DTN', 'DLN', 'MDY', 'DVY', 'IWR', 'IWP', 'IWS', 'IJH', 'IJK', 'IJJ', 'PDP', 'SCHM', 'IVOO', 'IVOG', 'IVOV', 'VO', 'VOT', 'VOE', 'VXF', 'DON', 'IWC', 'IWM', 'IWO', 'IWN', 'IJR', 'IJT', 'IJS', 'SCHA', 'FNDA', 'VIOO', 'VIOG', 'VIOV', 'VB', 'VBK', 'VBR', 'VTWO', 'VTWG', 'VTWV', 'EEB', 'ECON', 'IDV', 'ACWX', 'BKF', 'EFA', 'EFG', 'EFV', 'SCZ', 'EEM', 'PID', 'SCHC', 'SCHE', 'SCHF', 'FNDF', 'FNDC', 'FNDE', 'DWX', 'VEA', 'VWO', 'VXUS', 'VEU', 'VSS', 'DEM', 'DGS', 'AAXJ', 'EZU', 'EPP', 'IEV', 'ILF', 'FEZ', 'VGK', 'VPL', 'HEDJ', 'DFE', 'AND', 'GXF', 'EWA', 'EWC', 'EWG', 'EIS', 'EWI', 'EWJ', 'EWD', 'EWL', 'EWP', 'EWU', 'DXJ', 'NOR

Fetch ticker info from Yahoo

In [3]:
import yfinance as yf

data: pd.DataFrame = yf.download(tickers=" ".join(tickers), period="5y", interval="1d", group_by='ticker')
print(data)

[*********************100%***********************]  426 of 426 completed

16 Failed downloads:
- CRDT: No data found for this date range, symbol may be delisted
- GLDE: No data found for this date range, symbol may be delisted
- RRF: No data found, symbol may be delisted
- RPX: No data found, symbol may be delisted
- QEH: No data found, symbol may be delisted
- FTGS: No data found, symbol may be delisted
- BABZ: No data found for this date range, symbol may be delisted
- WDTI: No data found, symbol may be delisted
- IRV: No data found for this date range, symbol may be delisted
- ONEF: No data found for this date range, symbol may be delisted
- BGU: No data found for this date range, symbol may be delisted
- HDGI: No data found for this date range, symbol may be delisted
- ACCU: No data found for this date range, symbol may be delisted
- YPRO: No data found, symbol may be delisted
- GGBP: No data found for this date range, symbol may be delisted
- RWG: No data found, symbol may be deli

Save output to file to prevent further network requests.

In [4]:
import os

found_tickers: List[str] = data.columns.get_level_values(0).unique().to_list()

for found_ticker in found_tickers:
    data[found_ticker].to_csv(os.path.join("data", found_ticker + '.csv'))


Read files back from directory.

In [5]:
import glob

csv_paths: List[str] = glob.glob(os.path.join("data", '*.csv'))
prices_df: pd.DataFrame = None

for csv_path in csv_paths:
    (ticker_id, extension) = csv_path.split(".", 1)
    df: pd.DataFrame = pd.read_csv(csv_path, index_col='Date', usecols=['Date', 'Adj Close'], header=0, parse_dates=True)
    df = df.rename(columns={'Adj Close': os.path.split(ticker_id)[1]})

    if prices_df is not None:
        prices_df = prices_df.join(df)
    else:
        prices_df = df

prices_df = prices_df.sort_values(by='Date', axis=0)
prices_df

Unnamed: 0_level_0,AADR,AAXJ,ACCU,ACWI,ACWX,AGG,ALD,AMLP,AND,ARGT,...,XLF,XLI,XLK,XLP,XLU,XLV,XLY,XOP,YPRO,^GSPTSE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-07-17,,,,,,,,,,,...,,,,,,,,,,14642.799805
2015-07-20,38.463535,55.513893,,54.502068,39.286201,95.445511,41.598351,50.861473,7.708112,18.456688,...,14.356327,49.486988,40.371029,43.819538,36.419109,70.830788,73.741211,151.471603,,14425.599609
2015-07-21,38.881107,55.568291,,54.367611,39.207188,95.603722,41.567722,50.861473,7.659996,18.514244,...,14.339384,48.955448,40.102940,43.714710,36.089794,70.518906,73.508438,152.373383,,14376.200195
2015-07-22,38.502384,55.015324,,54.071789,38.856030,95.647682,41.567722,49.860271,7.602258,18.322388,...,14.446692,48.739231,39.455826,43.810806,36.250233,70.674850,73.862267,149.292114,,14307.099609
2015-07-23,38.599491,54.788696,,53.802868,38.698006,95.920113,41.546368,49.893639,7.400172,18.025009,...,14.322443,48.288773,39.372623,43.644825,35.709816,70.417992,73.424622,149.517609,,14265.400391
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-07-13,54.959999,74.739998,,75.760002,44.639999,118.480003,,22.700001,,25.340000,...,23.559999,68.290001,105.839996,60.200001,57.830002,101.279999,131.619995,48.330002,,15639.400391
2020-07-14,55.290001,74.720001,,76.699997,45.099998,118.669998,,22.600000,,25.299999,...,23.680000,69.769997,107.059998,61.080002,58.369999,103.290001,132.929993,50.389999,,15908.500000
2020-07-15,56.250000,74.830002,,77.459999,45.560001,118.680000,,23.790001,,25.680000,...,24.150000,71.550003,107.639999,61.130001,58.139999,104.629997,134.830002,51.889999,,16063.299805
2020-07-16,56.040001,73.449997,,76.989998,45.180000,118.760002,,23.740000,,25.270000,...,24.150000,71.570000,106.309998,61.259998,58.880001,104.459999,134.470001,51.759998,,16024.500000


Calculate price returns

In [197]:
returns_df = prices_df.pct_change().dropna(axis='columns', how='all')
# drop any security with a most recent price of <= 0
returns_df = returns_df.drop(returns_df.columns[returns_df.iloc[-1,:] <= 0], axis=1)
found_tickers = returns_df.columns
returns_df

Unnamed: 0_level_0,AADR,AAXJ,ACWI,ACWX,AGG,ARGT,ARKG,ARKK,ARKQ,ARKW,...,VYM,XBI,XHB,XLB,XLI,XLK,XLP,XLU,XLV,^GSPTSE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-07-17,,,,,,,,,,,...,,,,,,,,,,
2015-07-20,,,,,,,,,,,...,,,,,,,,,,-0.014833
2015-07-21,0.010856,0.000980,-0.002467,-0.002011,0.001658,0.003118,,-0.003645,,-0.001255,...,-0.005674,-0.016905,-0.001642,-0.006136,-0.010741,-0.006641,-0.002392,-0.009042,-0.004403,-0.003424
2015-07-22,-0.009741,-0.009951,-0.005441,-0.008956,0.000460,-0.010363,,-0.010059,,-0.005444,...,-0.002927,0.004468,0.019463,-0.004258,-0.004417,-0.016136,0.002198,0.004446,0.002211,-0.004807
2015-07-23,0.002522,-0.004119,-0.004973,-0.004067,0.002848,-0.016230,,0.006928,,0.009263,...,-0.005283,-0.009419,-0.003227,-0.014539,-0.009242,-0.002109,-0.003789,-0.014908,-0.003634,-0.002915
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-07-13,-0.008658,-0.004661,-0.008766,-0.006455,-0.000084,-0.016686,-0.021357,-0.033997,-0.012898,-0.042317,...,0.001643,-0.025844,-0.013955,0.000000,0.004117,-0.020725,-0.001493,0.001039,0.005660,-0.004735
2020-07-14,0.006004,-0.000268,0.012408,0.010305,0.001604,-0.001579,0.022923,0.008702,0.007325,0.003861,...,0.012743,0.025458,0.032413,0.025177,0.021672,0.011527,0.014618,0.009338,0.019846,0.017207
2020-07-15,0.017363,0.001472,0.009909,0.010200,0.000084,0.015020,0.032180,0.026770,0.020440,0.011432,...,0.012458,0.021254,0.026973,0.016821,0.025512,0.005418,0.000819,-0.003940,0.012973,0.009731
2020-07-16,-0.003733,-0.018442,-0.006068,-0.008341,0.000674,-0.015966,-0.026487,-0.014457,-0.007897,-0.010563,...,0.001600,-0.008018,0.011195,0.003309,0.000279,-0.012356,0.002127,0.012728,-0.001625,-0.002415


Calucate expected return using geomean from price return

In [198]:
from scipy.stats import gmean

exp_return_df = pd.DataFrame()
for found_ticker in found_tickers:
    returns_sr = returns_df[pd.notnull(returns_df[found_ticker])][found_ticker]
    if exp_return_df.empty:
        exp_return_df = pd.DataFrame(data={
            'ticker': found_ticker,
            'exp_return': [0] if returns_sr.empty else [gmean(returns_sr + 1) - 1]
        })
    else:
        exp_return_df = pd.concat([
            exp_return_df,
            pd.DataFrame(data={
                'ticker': found_ticker,
                'exp_return': [0] if returns_sr.empty else [gmean(returns_sr + 1) - 1]
            })
        ])

exp_return_df = exp_return_df.set_index('ticker').T
exp_return_df

ticker,AADR,AAXJ,ACWI,ACWX,AGG,ARGT,ARKG,ARKK,ARKQ,ARKW,...,VYM,XBI,XHB,XLB,XLI,XLK,XLP,XLU,XLV,^GSPTSE
exp_return,0.000304,0.000224,0.000273,0.000113,0.000172,0.000246,0.003574,0.001073,0.002444,0.001204,...,0.000259,0.000222,0.000233,0.000283,0.000293,0.000761,0.000266,0.000393,0.000314,7.5e-05


Calculate expected covariance using price return

In [199]:
covar_df = returns_df.cov()
covar_df

Unnamed: 0,AADR,AAXJ,ACWI,ACWX,AGG,ARGT,ARKG,ARKK,ARKQ,ARKW,...,VYM,XBI,XHB,XLB,XLI,XLK,XLP,XLU,XLV,^GSPTSE
AADR,0.000192,1.338585e-04,1.234416e-04,0.000128,8.309580e-06,0.000168,0.000736,0.000179,0.000694,0.000166,...,1.088189e-04,1.547315e-04,0.000146,1.333147e-04,1.291264e-04,1.439270e-04,0.000073,0.000071,0.000101,0.000107
AAXJ,0.000134,1.745944e-04,1.334799e-04,0.000143,9.793756e-07,0.000160,0.000637,0.000171,0.000609,0.000165,...,1.164311e-04,1.546401e-04,0.000144,1.387192e-04,1.356668e-04,1.521660e-04,0.000077,0.000075,0.000106,0.000101
ACWI,0.000123,1.334799e-04,1.345097e-04,0.000133,5.728478e-07,0.000151,0.000713,0.000168,0.000699,0.000159,...,1.279359e-04,1.606541e-04,0.000159,1.453079e-04,1.466477e-04,1.556520e-04,0.000091,0.000092,0.000117,0.000106
ACWX,0.000128,1.429719e-04,1.329849e-04,0.000141,2.242652e-06,0.000157,0.000700,0.000162,0.000673,0.000153,...,1.205625e-04,1.502134e-04,0.000153,1.422344e-04,1.404634e-04,1.448434e-04,0.000083,0.000084,0.000106,0.000107
AGG,0.000008,9.793756e-07,5.728478e-07,0.000002,8.304214e-06,0.000004,0.000054,0.000004,0.000051,0.000003,...,-6.234796e-07,-5.160818e-07,0.000005,-6.544315e-08,-2.346715e-07,-5.934189e-07,0.000001,0.000005,-0.000001,0.000005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
XLK,0.000144,1.521660e-04,1.556520e-04,0.000145,-5.934189e-07,0.000166,0.000830,0.000216,0.000809,0.000213,...,1.477942e-04,2.005048e-04,0.000177,1.621574e-04,1.659472e-04,2.139686e-04,0.000107,0.000102,0.000141,0.000118
XLP,0.000073,7.732490e-05,9.117048e-05,0.000083,1.209440e-06,0.000088,0.000500,0.000091,0.000478,0.000085,...,1.003969e-04,8.804109e-05,0.000106,9.778293e-05,1.006307e-04,1.065206e-04,0.000105,0.000101,0.000089,0.000070
XLU,0.000071,7.463142e-05,9.219039e-05,0.000084,5.157215e-06,0.000080,0.000679,0.000084,0.000632,0.000079,...,1.061285e-04,6.993864e-05,0.000117,1.009148e-04,1.037272e-04,1.023467e-04,0.000101,0.000172,0.000090,0.000078
XLV,0.000101,1.062736e-04,1.168555e-04,0.000106,-1.405350e-06,0.000119,0.000628,0.000153,0.000567,0.000136,...,1.188963e-04,1.863553e-04,0.000135,1.249683e-04,1.275189e-04,1.407978e-04,0.000089,0.000090,0.000143,0.000088


Reorder expected return to line up with covar matrix

In [200]:
exp_return_df = exp_return_df[covar_df.columns]
exp_return_df

ticker,AADR,AAXJ,ACWI,ACWX,AGG,ARGT,ARKG,ARKK,ARKQ,ARKW,...,VYM,XBI,XHB,XLB,XLI,XLK,XLP,XLU,XLV,^GSPTSE
exp_return,0.000304,0.000224,0.000273,0.000113,0.000172,0.000246,0.003574,0.001073,0.002444,0.001204,...,0.000259,0.000222,0.000233,0.000283,0.000293,0.000761,0.000266,0.000393,0.000314,7.5e-05


Optimization of portfolio

In [215]:
alpha_df = exp_return_df.T - exp_return_df['^GSPTSE'] # subtract market return
alpha_df = alpha_df[alpha_df['exp_return'] > 0] # filter to only positive alpha
alpha_sr = alpha_df['exp_return'].to_numpy()

covar_df = covar_df[covar_df.index.isin(alpha_df.index)][alpha_df.index] # filter to only positive alpha
covar = covar_df.round(8).to_numpy()
alpha_df

Unnamed: 0_level_0,exp_return
ticker,Unnamed: 1_level_1
AADR,0.000229
AAXJ,0.000149
ACWI,0.000198
ACWX,0.000037
AGG,0.000096
...,...
XLI,0.000218
XLK,0.000686
XLP,0.000191
XLU,0.000318


In [227]:
from scipy.optimize import minimize
import numpy as np

def treynor_ratio(weights: np.ndarray, covar_matrix: np.ndarray, alpha_returns: np.ndarray) -> float:
    # we are minimizing the negative to get a maximum
    objective = float(-weights.dot(alpha_returns) / np.sqrt(weights.dot(covar_matrix).dot(weights.T)))
    return objective

weights = np.ones(covar_df.columns.__len__())
constraints = ({'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1})
bounds = [(0, None,) for i in range(len(weights))]
portfolio = minimize(treynor_ratio, weights, args=(covar, alpha_sr), bounds=bounds, constraints=constraints)
portfolio

     fun: -0.14678073373999065
     jac: array([ 1.59922556e+00,  1.31378898e+00,  1.48078329e+00,  1.53261023e+00,
        2.25233648e-01,  1.70718772e+00,  6.41098246e-04,  2.02409923e+00,
        8.05478543e-04,  1.62719688e+00,  1.05966160e-01,  1.34467652e+00,
        5.94812218e-01,  3.12065916e-01,  2.22696230e-01,  5.88728394e-02,
        2.01607700e-01,  7.47363158e-01,  1.12878673e+00,  1.52697099e+00,
        1.41371198e+00,  1.53351747e+00,  1.68193882e+00,  1.54663205e+00,
        1.49554097e+00,  1.37417016e+00,  8.91937792e-01,  4.07710377e-01,
        1.72771351e+00,  5.77388816e-01,  2.23719051e+00,  1.74415621e+00,
        1.72174858e+00,  1.72215242e+00,  1.07848837e+00,  1.08567543e+00,
        1.30402393e+00,  1.13352185e+00,  1.69149404e+00,  2.65983776e+00,
        1.52557877e+00,  1.68118797e+00,  1.27854876e+00,  1.29088796e+00,
        1.70811675e+00,  3.33106136e-01,  1.58991633e+00,  1.47319340e+00,
        1.49795908e+00,  1.20508041e+00,  1.10535014e+00,  

In [228]:
pd.Series(portfolio.x).to_clipboard()