### The Requirements:
Step 1: Choose an ETF with a minimum of 100 assets, identify those assets

Step 2: Retrieve historical data for your chosen ETF

Step 3: Calculate the price momentum factors for each asset in your ETF

Step 4: Using the price momentum factors, calculate the monthly z-factor score for each asset

Step 5: Identify long and short baskets (10 to 15 assets in each) using calculated z-factors

Step 6: Create a backtest to validate performance of your algorithm based on monthly restructuring over the previous 5 years.

Step 7: Chart:

1. Monthly portfolio return bar chart (pos/neg coloring) vs ETF

2. Monthly return for/ long picks vs short picks vs ETF

3. Cumulative portfolio return vs ETF

In [24]:
# Import Libraries
import pandas as pd
import yfinance as yf

import numpy as np

In [None]:
class Factors:
    
    def __init__(self) -> None:
        pass
    
    def get_data(tickers_list):
        # Step 2: Retrieve historical data for your chosen ETF
        # Adj Close
        df = yf.download(tickers_list, period = '10y')['Adj Close']
        return df

In [25]:
# Step 1: Choose an ETF with a minimum of 100 assets, identify those assets
# etf = ["SPY"]

# Get the list of S&P 500 constituents
# SPY_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]['Symbol'].tolist()
SPY_tickers = pd.read_excel("https://www.ssga.com/us/en/intermediary/etfs/library-content/products/fund-data/etfs/us/holdings-daily-us-en-spy.xlsx", header=4).Ticker.dropna().to_list()

print(f'{len(SPY_tickers)} tickers')


504 tickers


In [26]:
# Step 2: Retrieve historical data for your chosen ETF
df = yf.download(SPY_tickers, period = '10y')['Adj Close']


[*********************100%%**********************]  504 of 504 completed


3 Failed downloads:
['-', 'BRK.B']: Exception('%ticker%: No data found, symbol may be delisted')
['BF.B']: Exception('%ticker%: No price data found, symbol may be delisted (period=10y)')





In [27]:
# Drop na
sp500 = df.dropna(how= 'all', axis= 1)
# sp500.head()
returns = sp500.pct_change()

In [56]:
tickers = list(sp500.columns)

In [28]:
# Step 3: Calculate the price momentum factors for each asset in your ETF

sp500['A'].rolling(window=252).mean()


Date
2013-12-11           NaN
2013-12-12           NaN
2013-12-13           NaN
2013-12-16           NaN
2013-12-17           NaN
                 ...    
2023-12-05    128.827606
2023-12-06    128.737452
2023-12-07    128.651705
2023-12-08    128.550707
2023-12-11    128.442994
Name: A, Length: 2517, dtype: float64

In [29]:
rolling_mean = sp500['A'].rolling(window=252)

In [30]:
# Assuming you have a DataFrame named 'sp500' and want to calculate the slope for column 'A'
# sp500 = ...

# Calculate the 52-week rolling mean
rolling_mean = sp500['A'].rolling(window=252).mean()

# Drop NaN values introduced by the rolling mean calculation
rolling_mean = rolling_mean.dropna()

# Prepare data for linear regression
X = np.arange(len(rolling_mean)).reshape(-1, 1)
y = rolling_mean.values.reshape(-1, 1)

# Use NumPy for linear regression
slope = np.polyfit(X.flatten(), y.flatten(), 1)[0]

# Print the calculated slope
print("52-Week Slope:", slope)


52-Week Slope: 0.055499019133098876


In [31]:
# Calculate the 52-week rolling mean with a 20 day lag 
# Drop NaN values introduced by the rolling mean calculation
lagged_rolling_mean = rolling_mean.shift(20).dropna()

# Prepare data for linear regression
X = np.arange(len(lagged_rolling_mean)).reshape(-1, 1)
y = lagged_rolling_mean.values.reshape(-1, 1)

# Use NumPy for linear regression
slope = np.polyfit(X.flatten(), y.flatten(), 1)[0]

# Print the calculated slope
print("52-Week Slope:", slope)

52-Week Slope: 0.055800132942397516


In [58]:

# 20 day lag time period 
starting_period = -20 -252
end_period = -20
lagged_closed_price = sp500[starting_period : end_period]

# calculating 52 Week trend 
polyfit_regression = np.polyfit((range(0,252)), lagged_closed_price, 1) 
# The first parameter of our regression is x, 0-252 days. Second is the y, Closing price for slice of data with 20 - day lag
slope_info = pd.DataFrame(polyfit_regression) # Convert so that we can use the results
slope_info.columns = sp500.columns # Setting column names to match ticker
_52_week_trend= slope_info.iloc[0] # Only need first row 
_52_week_trend

A      -0.201892
AAL    -0.005509
AAPL    0.208212
ABBV   -0.036966
ABNB    0.155169
          ...   
YUM     0.000418
ZBH    -0.032652
ZBRA   -0.220058
ZION   -0.073594
ZTS     0.114156
Name: 0, Length: 501, dtype: float64

In [44]:
pd.DataFrame(np.polyfit(range(len(sp500[-2 - 252 : -2])), sp500[-2 - 252 : -2], 1))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,491,492,493,494,495,496,497,498,499,500
0,-0.18031,-0.009383,0.214895,-0.038531,0.136895,-0.03405,0.096279,0.26103,1.208095,0.018377,...,-0.040434,-0.04599,0.001338,-0.011804,-0.042581,-0.012404,-0.067215,-0.308605,-0.05122,0.093069
1,151.28059,15.676822,141.291366,150.718516,103.989485,107.573083,61.424092,261.061523,296.995618,175.668324,...,104.184619,68.997392,106.717659,37.55925,107.874634,130.249062,133.396846,308.433066,41.862322,158.452118


In [61]:
for i in range(1,21):
    print(i)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20


In [57]:
def trendline(ticker):
    df = pd.DataFrame(index=tickers)
    
    # for 20 day
    for i in range(1,21):
        df[i] = np.polyfit(range(len(sp500[-i - 252 : -i])), sp500[-i - 252 : -i], 1)[0]
    # averages out the day
    return df.mean(axis=1)
    
trendline(ticker=tickers)

A      -0.195110
AAL    -0.007703
AAPL    0.212375
ABBV   -0.041042
ABNB    0.145643
          ...   
YUM    -0.007209
ZBH    -0.053532
ZBRA   -0.273062
ZION   -0.061951
ZTS     0.100985
Length: 501, dtype: float64

In [33]:
lag = 20
lagged_rolling_mean = sp500['A'].rolling(window=52).mean().shift(lag).dropna()

# Prepare data for linear regression
X = np.arange(len(lagged_rolling_mean)).reshape(-1, 1)
y = lagged_rolling_mean.values.reshape(-1, 1)

# Use NumPy for linear regression
slope = np.polyfit(X.flatten(), y.flatten(), 1)[0]

# Print the calculated slope
print("52-Week Slope:", slope)

52-Week Slope: 0.05183973686820035


In [34]:
# Step 3: Calculate the price momentum factors for each asset in your ETF

def calculate_momentum_factors(data, lag=20):
    # Factor 1: Slope of 52-week trend line (20-day lag)
    data['Slope_52Week'] = data['Close'].pct_change(252 - lag).rolling(window=20).mean() * 100

    # Factor 2: Percent above 260-day low (20-day lag)
    data['Percent_Above_260Day_Low'] = (data['Close'] - data['Low'].rolling(window=260 - lag).min()) / (data['High'].rolling(window=260 - lag).max() - data['Low'].rolling(window=260 - lag).min()) * 100

    # Factor 3: 4/52 Week Price Oscillator (20-day lag)
    data['Price_Oscillator'] = (data['Close'].rolling(window=4).mean() / data['Close'].rolling(window=52 - lag).mean() - 1) * 100

    # Factor 4: 39-week return (20-day lag)
    data['39Week_Return'] = data['Close'].pct_change(39 - lag) * 100

    # Factor 5: 51-week Volume Price Trend (20-day lag)
    data['Volume_Price_Trend'] = (data['Close'].pct_change() * data['Volume']).rolling(window=51 - lag).sum()

    return data[['Slope_52Week', 'Percent_Above_260Day_Low', 'Price_Oscillator', '39Week_Return', 'Volume_Price_Trend']]

# Apply the function to ETF data and asset data
#etf_momentum_factors = calculate_momentum_factors(data)
all_asset = {}

for asset in SPY_tickers:
    assets_momentum_factors = data[asset].apply(calculate_momentum_factors)

# Display the calculated price momentum factors
# print("ETF Momentum Factors:")
# print(etf_momentum_factors.head())
print("\nAsset Momentum Factors:")
print(assets_momentum_factors.head())

# # Displaying the signals for each asset
# for asset, signals in assets_momentum_factors.items():
#     print(f"Signals for {asset}:\n{signals}\n")



NameError: name 'data' is not defined