In [22]:
# Import libraries for data analysis and visualization
import pandas as pd
import numpy as np
import yfinance as yf # Yahoo Finance data retrieval

# Import libraries for plotting and visualization
import matplotlib.pyplot as plt
import hvplot.pandas

# Set display options for Pandas
# Display numbers as decimals instead of scientific value
pd.set_option('display.float_format', lambda x: '%.3f' % x) 

In [3]:
# Define the start and end dates  
start_date = '2023-01-01'
end_date   = '2024-10-15'

# Define the list of tickers
tickers = ['TSLA', 'GE']

# Download historical prices for the list of tickers
historical_prices = yf.download(tickers, start=start_date, end=end_date)

historical_prices.tail()

[*********************100%***********************]  2 of 2 completed


Price,Adj Close,Adj Close,Close,Close,High,High,Low,Low,Open,Open,Volume,Volume
Ticker,GE,TSLA,GE,TSLA,GE,TSLA,GE,TSLA,GE,TSLA,GE,TSLA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2024-10-08 00:00:00+00:00,187.47,244.5,187.47,244.5,189.88,246.21,186.56,240.56,187.62,243.56,3627200,56303200
2024-10-09 00:00:00+00:00,189.28,241.05,189.28,241.05,189.42,247.43,186.5,239.51,186.63,243.82,2312600,66289500
2024-10-10 00:00:00+00:00,188.12,238.77,188.12,238.77,189.75,242.79,187.39,232.34,188.15,241.81,2300900,83087100
2024-10-11 00:00:00+00:00,191.16,217.8,191.16,217.8,191.77,223.34,189.06,214.38,189.41,220.13,2983900,142628900
2024-10-14 00:00:00+00:00,192.63,219.16,192.63,219.16,193.93,221.91,191.66,213.74,191.98,220.13,3132900,86291900


In [4]:

# Define the list of tickers
sp500_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]['Symbol'].to_list()
# Filter out Class B shares that have a '.B' in the ticker name
sp500_tickers = [ticker for ticker in sp500_tickers if '.B' not in ticker]

In [5]:
# Define the start and end dates  
start_date = '2000-01-01'
end_date   = '2024-10-14'

# Download historical prices for the list of tickers
historical_prices = yf.download(sp500_tickers, start=start_date, end=end_date)

[*********************100%***********************]  501 of 501 completed


In [6]:
historical_prices  = historical_prices.loc[:, historical_prices.columns.get_level_values(0) == 'Adj Close']
historical_prices.columns = historical_prices.columns.droplevel(0)   
historical_prices.head()

Ticker,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-03 00:00:00+00:00,43.463,0.844,,,8.288,1.278,,16.275,28.215,6.307,...,,11.353,,6.848,18.036,,4.634,,25.028,
2000-01-04 00:00:00+00:00,40.143,0.773,,,8.051,1.271,,14.909,26.787,6.242,...,,10.926,,7.006,17.69,,4.541,,24.667,
2000-01-05 00:00:00+00:00,37.653,0.784,,,8.037,1.389,,15.204,27.178,6.143,...,,11.505,,7.276,18.655,,4.564,,25.139,
2000-01-06 00:00:00+00:00,36.219,0.716,,,8.318,1.375,,15.328,26.435,6.176,...,,12.043,,7.209,19.619,,4.526,,23.778,
2000-01-07 00:00:00+00:00,39.237,0.75,,,8.407,1.451,,16.073,27.178,6.274,...,,11.647,,7.209,19.562,,4.425,,23.514,


In [7]:
list_of_momentums = [1, 2, 3, 4 ,5]

In [8]:
# Initialize the forecast horizon
forecast_horizon = 1
# Compute forward returns by taking percentage change of close prices
f_returns = historical_prices.pct_change(forecast_horizon)

In [9]:
# We then shift the forward returns
f_returns = f_returns.shift(-forecast_horizon)
f_returns.iloc[:,0:10].head()

Ticker,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2000-01-03 00:00:00+00:00,-0.076,-0.084,,,-0.029,-0.005,,-0.084,-0.051,-0.01
2000-01-04 00:00:00+00:00,-0.062,0.015,,,-0.002,0.093,,0.02,0.015,-0.016
2000-01-05 00:00:00+00:00,-0.038,-0.087,,,0.035,-0.01,,0.008,-0.027,0.005
2000-01-06 00:00:00+00:00,0.083,0.047,,,0.011,0.056,,0.049,0.028,0.016
2000-01-07 00:00:00+00:00,0.061,-0.018,,,-0.007,0.033,,0.039,0.087,0.0


In [10]:
# Pivot the dataframe
f_returns = pd.DataFrame(f_returns.unstack())
# Name the column based on the forecast horizon
name = "F_" + str(forecast_horizon) + "_d_returns"
f_returns.rename(columns={0: name}, inplace=True)
f_returns.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,F_1_d_returns
Ticker,Date,Unnamed: 2_level_1
A,2000-01-03 00:00:00+00:00,-0.076
A,2000-01-04 00:00:00+00:00,-0.062
A,2000-01-05 00:00:00+00:00,-0.038
A,2000-01-06 00:00:00+00:00,0.083
A,2000-01-07 00:00:00+00:00,0.061


In [11]:
# Initialize total_returns with forward returns
total_returns = f_returns

In [12]:
# Iterate over the list of momentum values
for i in list_of_momentums:  
    # Compute returns for each momentum value
    feature = historical_prices.pct_change(i)
    feature = pd.DataFrame(feature.unstack())
    # Name the column based on the momentum value
    name = str(i) + "_d_returns"        
    feature.rename(columns={0: name}, inplace=True)
    # Rename columns and reset index
    feature.rename(columns={0: name, 'level_0': 'Ticker'}, inplace=True)
    # Merge computed feature returns with total_returns based on Ticker and Date
    total_returns = pd.merge(total_returns, feature, left_index=True, right_index=True,how='outer')
    

In [13]:
# Drop rows with any NaN values
total_returns.dropna(axis=0, how='any', inplace=True) 
total_returns.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,F_1_d_returns,1_d_returns,2_d_returns,3_d_returns,4_d_returns,5_d_returns
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
A,2000-01-10 00:00:00+00:00,-0.014,0.061,0.149,0.105,0.037,-0.043
A,2000-01-11 00:00:00+00:00,-0.02,-0.014,0.046,0.133,0.09,0.023
A,2000-01-12 00:00:00+00:00,0.015,-0.02,-0.034,0.025,0.11,0.068
A,2000-01-13 00:00:00+00:00,0.011,0.015,-0.006,-0.019,0.04,0.127
A,2000-01-14 00:00:00+00:00,0.046,0.011,0.026,0.006,-0.008,0.052


In [14]:
total_returns.corr()

Unnamed: 0,F_1_d_returns,1_d_returns,2_d_returns,3_d_returns,4_d_returns,5_d_returns
F_1_d_returns,1.0,-0.025,-0.024,-0.024,-0.029,-0.031
1_d_returns,-0.025,1.0,0.695,0.564,0.486,0.43
2_d_returns,-0.024,0.695,1.0,0.806,0.694,0.615
3_d_returns,-0.024,0.564,0.806,1.0,0.856,0.759
4_d_returns,-0.029,0.486,0.694,0.856,1.0,0.884
5_d_returns,-0.031,0.43,0.615,0.759,0.884,1.0


In [15]:
# Function to create 'go_long' based on bottom n% of '1_d_returns'
def create_go_long_signal(df, column,n=0.50):
    # Calculate the nth percentile threshold for '1_d_returns'
    threshold = df[column].quantile(n)
    
    # Create a 'go_long' column: 1 if '1_d_returns' is in the bottom 20%, else 0
    df['go_long'] = (df[column] <= threshold).astype(int)
    
    return df

# Apply the function to create the 'go_long' column
total_returns_with_signal = create_go_long_signal(total_returns, '1_d_returns')
total_returns_with_signal



Unnamed: 0_level_0,Unnamed: 1_level_0,F_1_d_returns,1_d_returns,2_d_returns,3_d_returns,4_d_returns,5_d_returns,go_long
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A,2000-01-10 00:00:00+00:00,-0.014,0.061,0.149,0.105,0.037,-0.043,0
A,2000-01-11 00:00:00+00:00,-0.020,-0.014,0.046,0.133,0.090,0.023,1
A,2000-01-12 00:00:00+00:00,0.015,-0.020,-0.034,0.025,0.110,0.068,1
A,2000-01-13 00:00:00+00:00,0.011,0.015,-0.006,-0.019,0.040,0.127,0
A,2000-01-14 00:00:00+00:00,0.046,0.011,0.026,0.006,-0.008,0.052,0
...,...,...,...,...,...,...,...,...
ZTS,2024-10-04 00:00:00+00:00,-0.014,-0.005,-0.013,-0.023,-0.027,-0.025,1
ZTS,2024-10-07 00:00:00+00:00,0.000,-0.014,-0.019,-0.027,-0.037,-0.041,1
ZTS,2024-10-08 00:00:00+00:00,0.015,0.000,-0.014,-0.019,-0.026,-0.036,1
ZTS,2024-10-09 00:00:00+00:00,0.001,0.015,0.015,0.001,-0.004,-0.012,0


In [16]:
# Compare performance of go_long with rest of the universe, notice we double the mean
total_returns_with_signal[['F_1_d_returns']].describe()

Unnamed: 0,F_1_d_returns
count,2747440.0
mean,0.001
std,0.023
min,-0.681
25%,-0.009
50%,0.001
75%,0.01
max,1.024


In [17]:
# Compare performance of go_long with rest of the universe
total_returns_with_signal[total_returns_with_signal['go_long']==1]['F_1_d_returns'].describe()

count   1373720.000
mean          0.001
std           0.024
min          -0.681
25%          -0.009
50%           0.001
75%           0.011
max           0.870
Name: F_1_d_returns, dtype: float64

In [18]:
# Compute 'realized_returns' by multiplying 'go_long' with 'F_1_d_returns'
total_returns_with_signal['realized_returns'] = total_returns_with_signal['go_long'] * total_returns_with_signal['F_1_d_returns']



In [19]:
# Calculate daily average realized returns (only where 'go_long' is 1)
average_realized_per_day = total_returns_with_signal.groupby('Date')['realized_returns'].mean()

# Calculate daily average for the entire universe (using 'F_1_d_returns')
average_all_per_day = total_returns_with_signal.groupby('Date')['F_1_d_returns'].mean()

# Compute cumulative returns for both realized and all universe
cumulative_realized = average_realized_per_day.cumsum()
cumulative_all = average_all_per_day.cumsum()

In [20]:

# Create a DataFrame for plotting
cumulative_returns = pd.DataFrame({
    'Cumulative Realized Returns (Bottom 20%)': cumulative_realized,
    'Cumulative Average Returns (All)': cumulative_all
})

In [21]:
# Plot using hvplot
cumulative_returns.hvplot(
    title='Cumulative Returns: Realized (Bottom n%) vs All Universe',
    xlabel='Date', ylabel='Cumulative Return', 
    width=800, height=400, grid=True, legend=True
)