### The Requirements:
Step 1: Choose an ETF with a minimum of 100 assets, identify those assets

Step 2: Retrieve historical data for your chosen ETF

Step 3: Calculate the price momentum factors for each asset in your ETF

Step 4: Using the price momentum factors, calculate the monthly z-factor score for each asset

Step 5: Identify long and short baskets (10 to 15 assets in each) using calculated z-factors

Step 6: Create a backtest to validate performance of your algorithm based on monthly restructuring over the previous 5 years.

Step 7: Chart:

1. Monthly portfolio return bar chart (pos/neg coloring) vs ETF

2. Monthly return for/ long picks vs short picks vs ETF

3. Cumulative portfolio return vs ETF

In [26]:
# Import Libraries
import pandas as pd
import yfinance as yf

In [27]:
# Step 1: Choose an ETF with a minimum of 100 assets, identify those assets
# etf = ["SPY"]

# Get the list of S&P 500 constituents
SPY_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]['Symbol'].tolist()

print(f'{len(SPY_tickers)} tickers')

503 tickers


In [117]:
# Step 2: Retrieve historical data for your chosen ETF

data = yf.download(SPY_tickers, period = '5y')
data.head()

[*********************100%%**********************]  503 of 503 completed


2 Failed downloads:
['BF.B']: Exception('%ticker%: No price data found, symbol may be delisted (period=5y)')
['BRK.B']: Exception('%ticker%: No data found, symbol may be delisted')





Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,A,AAL,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018-11-19 00:00:00,60.362831,35.250011,44.597614,70.844467,,64.864494,28.25,151.756714,219.690002,77.722237,...,2223500,5332100,9430800,1997300,673600,2565300,1106529,427400,2257300,2195800
2018-11-20 00:00:00,65.096603,35.023674,42.466839,69.743942,,63.503571,27.92,148.148605,219.729996,80.903107,...,3958700,5892000,15533700,2858400,1625100,1871100,1192019,555900,2292400,2606200
2018-11-21 00:00:00,65.289421,35.732212,42.418835,68.208,,63.01622,28.0,146.35849,225.979996,81.502937,...,1997600,4026500,9685300,2154400,1398900,1751900,726356,328800,1676200,2271100
2018-11-23 00:00:00,65.761826,37.346119,41.341442,67.796295,,62.630009,28.0,146.31189,225.559998,80.557747,...,714100,1270100,10875400,697100,375900,853400,276555,81000,924000,836300
2018-11-26 00:00:00,66.764511,37.493729,41.900547,68.279236,,63.273697,28.16,147.682404,231.960007,81.857353,...,2741800,4587200,13741100,2176800,1087700,1743400,620781,200600,1664400,1908100


In [120]:
# Step 3: Calculate the price momentum factors for each asset in your ETF

def calculate_momentum_factors(data, lag=20):
    # Factor 1: Slope of 52-week trend line (20-day lag)
    data['Slope_52Week'] = data['Close'].pct_change(252 - lag).rolling(window=20).mean() * 100

    # Factor 2: Percent above 260-day low (20-day lag)
    data['Percent_Above_260Day_Low'] = (data['Close'] - data['Low'].rolling(window=260 - lag).min()) / (data['High'].rolling(window=260 - lag).max() - data['Low'].rolling(window=260 - lag).min()) * 100

    # Factor 3: 4/52 Week Price Oscillator (20-day lag)
    data['Price_Oscillator'] = (data['Close'].rolling(window=4).mean() / data['Close'].rolling(window=52 - lag).mean() - 1) * 100

    # Factor 4: 39-week return (20-day lag)
    data['39Week_Return'] = data['Close'].pct_change(39 - lag) * 100

    # Factor 5: 51-week Volume Price Trend (20-day lag)
    data['Volume_Price_Trend'] = (data['Close'].pct_change() * data['Volume']).rolling(window=51 - lag).sum()

    return data[['Slope_52Week', 'Percent_Above_260Day_Low', 'Price_Oscillator', '39Week_Return', 'Volume_Price_Trend']]

# Apply the function to ETF data and asset data
#etf_momentum_factors = calculate_momentum_factors(data)
all_asset = {}

for asset in SPY_tickers:
    assets_momentum_factors = data[asset].apply(calculate_momentum_factors)

# Display the calculated price momentum factors
# print("ETF Momentum Factors:")
# print(etf_momentum_factors.head())
print("\nAsset Momentum Factors:")
print(assets_momentum_factors.head())

# # Displaying the signals for each asset
# for asset, signals in assets_momentum_factors.items():
#     print(f"Signals for {asset}:\n{signals}\n")



KeyError: 'MMM'

In [119]:
data

Unnamed: 0_level_0,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,Adj Close,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Unnamed: 0_level_1,A,AAL,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,...,WYNN,XEL,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZTS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018-11-19,60.362831,35.250011,44.597614,70.844467,,64.864494,28.250000,151.756714,219.690002,77.722237,...,2223500,5332100,9430800,1997300,673600,2565300,1106529,427400,2257300,2195800
2018-11-20,65.096603,35.023674,42.466839,69.743942,,63.503571,27.920000,148.148605,219.729996,80.903107,...,3958700,5892000,15533700,2858400,1625100,1871100,1192019,555900,2292400,2606200
2018-11-21,65.289421,35.732212,42.418835,68.208000,,63.016220,28.000000,146.358490,225.979996,81.502937,...,1997600,4026500,9685300,2154400,1398900,1751900,726356,328800,1676200,2271100
2018-11-23,65.761826,37.346119,41.341442,67.796295,,62.630009,28.000000,146.311890,225.559998,80.557747,...,714100,1270100,10875400,697100,375900,853400,276555,81000,924000,836300
2018-11-26,66.764511,37.493729,41.900547,68.279236,,63.273697,28.160000,147.682404,231.960007,81.857353,...,2741800,4587200,13741100,2176800,1087700,1743400,620781,200600,1664400,1908100
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-11-13,107.459999,11.780000,184.800003,138.639999,119.150002,95.790001,86.570000,315.630005,590.340027,170.660004,...,3779800,2960500,15308400,3955600,1261100,1230500,1702200,411200,1240300,1544900
2023-11-14,111.610001,12.250000,187.440002,138.059998,126.680000,97.080002,85.500000,320.489990,604.330017,177.580002,...,4061300,4250000,18260500,4303800,1500400,1353700,1909700,542700,3270200,1421500
2023-11-15,113.599998,12.420000,188.009995,137.600006,128.350006,98.000000,82.660004,325.500000,595.309998,180.779999,...,2544600,3439500,20137600,4925400,1297000,1309400,3648600,501600,2444400,1714100
2023-11-16,114.190002,12.190000,189.710007,138.279999,126.279999,100.260002,83.709999,327.320007,602.059998,179.839996,...,3022800,3970800,22469100,3552800,1314800,1554600,2300700,275700,2217300,1566900


In [None]:
momentum_signals.items()

dict_items([('A',             signal  positions
Date                         
2018-11-19     0.0        NaN
2018-11-20     0.0        0.0
2018-11-21     0.0        0.0
2018-11-23     0.0        0.0
2018-11-26     0.0        0.0
...            ...        ...
2023-11-13     1.0        1.0
2023-11-14     1.0        0.0
2023-11-15     1.0        0.0
2023-11-16     1.0        0.0
2023-11-17     1.0        0.0

[1258 rows x 2 columns]), ('AAL',             signal  positions
Date                         
2018-11-19     0.0        NaN
2018-11-20     0.0        0.0
2018-11-21     0.0        0.0
2018-11-23     0.0        0.0
2018-11-26     0.0        0.0
...            ...        ...
2023-11-13     1.0        0.0
2023-11-14     1.0        0.0
2023-11-15     1.0        0.0
2023-11-16     1.0        0.0
2023-11-17     1.0        0.0

[1258 rows x 2 columns]), ('AAPL',             signal  positions
Date                         
2018-11-19     0.0        NaN
2018-11-20     0.0        0.0
2018-11-21 