### The Requirements:
Step 1: Choose an ETF with a minimum of 100 assets, identify those assets

Step 2: Retrieve historical data for your chosen ETF

Step 3: Calculate the price momentum factors for each asset in your ETF

Step 4: Using the price momentum factors, calculate the monthly z-factor score for each asset

Step 5: Identify long and short baskets (10 to 15 assets in each) using calculated z-factors

Step 6: Create a backtest to validate performance of your algorithm based on monthly restructuring over the previous 5 years.

Step 7: Chart:

1. Monthly portfolio return bar chart (pos/neg coloring) vs ETF

2. Monthly return for/ long picks vs short picks vs ETF

3. Cumulative portfolio return vs ETF

In [42]:
# Import Libraries
import pandas as pd
import yfinance as yf
import numpy as np
import matplotlib.pyplot as plt


In [3]:
# Step 1: Choose an ETF with a minimum of 100 assets, identify those assets
# etf = ["SPY"]

# Get the list of S&P 500 constituents
# SPY_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]['Symbol'].tolist()
tickers_list = pd.read_excel("https://www.ssga.com/us/en/intermediary/etfs/library-content/products/fund-data/etfs/us/holdings-daily-us-en-spy.xlsx", header=4).Ticker.dropna().to_list()

print(f'{len(tickers_list)} tickers')


504 tickers


In [27]:
# Step 2: Retrieve historical data for your chosen ETF

data = yf.download(tickers_list, period = '5y')['Adj Close']
sp500 = data.dropna(how= 'all', axis= 1)

sp500.tail()

[*********************100%%**********************]  504 of 504 completed


3 Failed downloads:
['BF.B']: Exception('%ticker%: No price data found, symbol may be delisted (period=5y)')
['-', 'BRK.B']: Exception('%ticker%: No data found, symbol may be delisted')





Unnamed: 0_level_0,A,AAL,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-11-17 00:00:00,113.150002,12.29,189.690002,138.300003,127.150002,99.550003,83.599998,327.829987,602.659973,183.050003,...,86.870003,60.560001,104.959999,29.690001,100.830002,127.660004,111.669998,218.020004,36.07,174.800003
2023-11-20 00:00:00,113.980003,12.4,191.449997,138.309998,129.699997,101.199997,84.459999,330.899994,612.700012,183.820007,...,87.169998,60.060001,104.5,29.940001,100.209999,128.119995,112.5,220.539993,35.779999,176.059998
2023-11-21 00:00:00,123.919998,12.13,190.639999,138.729996,126.82,101.910004,86.129997,330.26001,610.98999,181.25,...,86.860001,60.02,104.459999,30.99,101.019997,128.240005,112.900002,222.410004,34.310001,178.729996
2023-11-22 00:00:00,123.989998,12.31,191.309998,138.470001,129.039993,102.699997,85.949997,333.130005,619.719971,182.669998,...,87.529999,60.43,104.010002,31.360001,102.220001,128.940002,113.830002,225.860001,34.41,179.350006
2023-11-24 00:00:00,126.620003,12.31,189.970001,138.669998,128.369995,102.870003,86.599998,334.040009,619.429993,183.050003,...,88.309998,60.52,104.57,31.32,103.260002,128.509995,113.43,228.919998,34.349998,180.210007


In [35]:
def calculate_slope(data):
    x = np.arange(len(data))
    y = data.values
    slope, _ = np.polyfit(x, y, 1)
    return slope

In [41]:

lag = 20

slope_52_week_trend = sp500.rolling(window=52).apply(calculate_slope, raw=False).shift(lag)
slope_52_week_trend

Unnamed: 0_level_0,A,AAL,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-11-26,,,,,,,,,,,...,,,,,,,,,,
2018-11-27,,,,,,,,,,,...,,,,,,,,,,
2018-11-28,,,,,,,,,,,...,,,,,,,,,,
2018-11-29,,,,,,,,,,,...,,,,,,,,,,
2018-11-30,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-17,-0.318488,-0.085106,-0.083359,-0.005209,-0.126225,-0.256551,0.189151,-0.154762,0.321561,-0.194488,...,-0.192038,0.013795,0.065694,-0.147476,-0.262879,-0.280819,-0.340656,-1.369805,-0.037334,-0.392574
2023-11-20,-0.304920,-0.085293,-0.092425,-0.011156,-0.140313,-0.252740,0.190116,-0.195187,0.295920,-0.204914,...,-0.186225,0.017819,0.051417,-0.146294,-0.261677,-0.281620,-0.342482,-1.429806,-0.043239,-0.404226
2023-11-21,-0.297822,-0.085273,-0.100609,-0.011774,-0.153680,-0.251538,0.193003,-0.234324,0.270793,-0.211544,...,-0.170001,0.023754,0.036032,-0.143793,-0.259632,-0.276276,-0.344129,-1.489554,-0.048682,-0.413149
2023-11-22,-0.295527,-0.085484,-0.113380,-0.012711,-0.178678,-0.250954,0.195505,-0.282837,0.186841,-0.231127,...,-0.166352,0.030977,0.024952,-0.142001,-0.260402,-0.269263,-0.348780,-1.545564,-0.052308,-0.423789


In [None]:
# Plotting the results
plt.figure(figsize=(10, 6))
plt.plot(slope_52_week_trend, label='52-Week Slope (20-Day Lag)')
plt.title(f'52-Week Slope of {ticker} Stock Price with a 20-Day Lag')
plt.xlabel('Date')
plt.ylabel('Slope')
plt.legend()
plt.show()

In [28]:
returns = sp500.pct_change()


In [29]:
returns

Unnamed: 0_level_0,A,AAL,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-11-26,,,,,,,,,,,...,,,,,,,,,,
2018-11-27,0.001589,0.004987,-0.002176,0.016698,,0.013079,-0.010298,0.004483,0.001250,-0.005440,...,0.011747,0.012525,0.000909,0.004865,-0.000862,0.013135,-0.001148,-0.002721,-0.003121,0.007596
2018-11-28,0.030565,0.016976,0.038453,0.014370,,0.044757,0.020811,0.019483,0.073025,0.018182,...,-0.000266,-0.003479,0.018170,0.009682,0.023306,0.025260,0.013968,0.056531,0.008139,0.028825
2018-11-29,0.001259,-0.013354,-0.007682,0.010906,,0.005904,-0.002461,-0.007028,-0.000482,-0.013227,...,-0.041489,-0.000582,0.007776,-0.014651,0.002390,0.001417,0.002267,-0.001067,-0.002070,-0.000323
2018-11-30,0.010898,0.045289,-0.005403,0.048493,,0.010784,0.008457,0.021419,0.007226,0.026810,...,0.011839,0.017854,0.005565,0.021357,0.023563,0.003919,0.017919,0.010567,0.009334,0.011858
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-17,-0.009108,0.008203,-0.000105,0.000145,0.006889,-0.007082,-0.001314,0.001558,0.000997,0.017849,...,0.012353,-0.002306,0.024400,0.003041,-0.000988,-0.001330,0.001076,0.011788,0.010081,-0.009856
2023-11-20,0.007335,0.008950,0.009278,0.000072,0.020055,0.016575,0.010287,0.009365,0.016660,0.004207,...,0.003453,-0.008256,-0.004383,0.008420,-0.006149,0.003603,0.007433,0.011559,-0.008040,0.007208
2023-11-21,0.087208,-0.021774,-0.004231,0.003037,-0.022205,0.007016,0.019773,-0.001934,-0.002791,-0.013981,...,-0.003556,-0.000666,-0.000383,0.035070,0.008083,0.000937,0.003556,0.008479,-0.041084,0.015165
2023-11-22,0.000565,0.014839,0.003514,-0.001874,0.017505,0.007752,-0.002090,0.008690,0.014288,0.007834,...,0.007714,0.006831,-0.004308,0.011939,0.011879,0.005458,0.008237,0.015512,0.002915,0.003469


In [31]:
returns.mean()

A       0.000683
AAL    -0.000151
AAPL    0.001415
ABBV    0.000698
ABNB    0.000408
          ...   
YUM     0.000499
ZBH     0.000253
ZBRA    0.000587
ZION    0.000294
ZTS     0.000746
Length: 501, dtype: float64

1. **Slope of 52-week Trend Line (20-day lag):**
   - This indicator likely involves calculating the slope of the trend line based on the past 52 weeks of price data, with a 20-day lag. The slope provides information about the direction and strength of the trend.

In [30]:
lag = 20

momentum_factors = returns.rolling(window=lag).mean()
momentum_factors

Unnamed: 0_level_0,A,AAL,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-11-26,,,,,,,,,,,...,,,,,,,,,,
2018-11-27,,,,,,,,,,,...,,,,,,,,,,
2018-11-28,,,,,,,,,,,...,,,,,,,,,,
2018-11-29,,,,,,,,,,,...,,,,,,,,,,
2018-11-30,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-17,0.001964,0.005400,0.004785,-0.002691,0.004793,0.001472,0.000724,0.005009,0.005594,0.004916,...,-0.000003,0.002166,-0.002287,-0.003148,0.004990,0.003166,0.003419,0.003578,0.010352,0.002600
2023-11-20,0.002281,0.004899,0.005214,-0.002174,0.004121,0.002817,0.001782,0.005824,0.006478,0.005922,...,-0.001109,0.001796,-0.001773,-0.003497,0.005858,0.003842,0.003503,0.003975,0.009800,0.003137
2023-11-21,0.008333,0.004165,0.004876,-0.002568,0.002338,0.003674,0.001818,0.005532,0.006417,0.004900,...,-0.002166,0.000875,-0.001308,-0.002222,0.006380,0.003405,0.003833,0.004248,0.008062,0.003709
2023-11-22,0.009422,0.005665,0.005726,-0.002303,0.004652,0.004716,0.001672,0.006542,0.008839,0.006850,...,-0.000424,0.000887,-0.001615,-0.000339,0.007913,0.003511,0.004817,0.006723,0.008593,0.004915


In [43]:
data.pct_change(252 - 20).rolling(window=20).mean() * 100


Unnamed: 0_level_0,-,A,AAL,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-11-26,,,,,,,,,,,...,,,,,,,,,,
2018-11-27,,,,,,,,,,,...,,,,,,,,,,
2018-11-28,,,,,,,,,,,...,,,,,,,,,,
2018-11-29,,,,,,,,,,,...,,,,,,,,,,
2018-11-30,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-17,,-29.216754,-16.344158,23.421964,-8.868768,25.007196,-8.671330,42.781316,7.586687,68.455379,...,9.570259,-11.471206,-0.263211,-2.225450,-14.493433,-1.877241,-11.430820,-20.402021,-31.217051,10.369747
2023-11-20,,-29.184343,-15.550785,24.814708,-9.333398,26.185410,-8.606569,42.463021,8.724328,69.506278,...,8.892375,-11.500272,-0.062630,-2.594940,-13.971518,-1.783104,-11.642834,-19.995642,-30.210247,10.682798
2023-11-21,,-28.576786,-14.680333,26.299494,-9.775541,27.325028,-8.385646,42.343969,10.043134,70.644716,...,8.215011,-11.495902,0.118010,-2.778155,-13.274552,-1.619761,-11.720598,-19.452747,-29.328161,11.204795
2023-11-22,,-27.943589,-13.753681,27.765390,-10.099436,28.498067,-8.060984,41.945508,11.324445,71.701135,...,7.545549,-11.476687,0.144871,-2.900011,-12.449652,-1.476833,-11.707486,-18.753301,-28.436689,11.964702


In [24]:
# Slope is our (y2-y1)[change of price] 
# divided by (x1-x2)[change of 1 year]
open_price = sp500.iloc[0] # first data entry
close_price = sp500.iloc[-1] # most recent data entry
price_difference = close_price - open_price
# Since our change of time is 1 year, im just going to divide by count of rows
slope = price_difference / len(sp500) * 100
slope

A        4.757988
AAL     -2.001886
AAPL    11.770228
ABBV     5.595449
ABNB          NaN
          ...    
YUM      3.768910
ZBH      0.603597
ZBRA     4.759937
ZION    -0.506714
ZTS      7.446604
Length: 501, dtype: float64

In [None]:
slope = []

for asset in tickers_list:
    returns = data[asset].pct_change(252 - 20).rolling(window=20).mean() * 100
    slope.append(returns)

In [None]:
# Step 3: Calculate the price momentum factors for each asset in your ETF

def calculate_momentum_factors(data, lag=20):
    # Factor 1: Slope of 52-week trend line (20-day lag)
    data['Slope_52Week'] = data['Close'].pct_change(252 - lag).rolling(window=20).mean() * 100

    # Factor 2: Percent above 260-day low (20-day lag)
    data['Percent_Above_260Day_Low'] = (data['Close'] - data['Low'].rolling(window=260 - lag).min()) / (data['High'].rolling(window=260 - lag).max() - data['Low'].rolling(window=260 - lag).min()) * 100

    # Factor 3: 4/52 Week Price Oscillator (20-day lag)
    data['Price_Oscillator'] = (data['Close'].rolling(window=4).mean() / data['Close'].rolling(window=52 - lag).mean() - 1) * 100

    # Factor 4: 39-week return (20-day lag)
    data['39Week_Return'] = data['Close'].pct_change(39 - lag) * 100

    # Factor 5: 51-week Volume Price Trend (20-day lag)
    data['Volume_Price_Trend'] = (data['Close'].pct_change() * data['Volume']).rolling(window=51 - lag).sum()

    return data[['Slope_52Week', 'Percent_Above_260Day_Low', 'Price_Oscillator', '39Week_Return', 'Volume_Price_Trend']]

# Apply the function to ETF data and asset data
#etf_momentum_factors = calculate_momentum_factors(data)
all_asset = {}

for asset in tickers_list:
    assets_momentum_factors = data[asset].apply(calculate_momentum_factors)

# Display the calculated price momentum factors
# print("ETF Momentum Factors:")
# print(etf_momentum_factors.head())
print("\nAsset Momentum Factors:")
print(assets_momentum_factors.head())

# # Displaying the signals for each asset
# for asset, signals in assets_momentum_factors.items():
#     print(f"Signals for {asset}:\n{signals}\n")



TypeError: 'float' object is not subscriptable