In [1]:
# Import dependencies
import yfinance as yf
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import hvplot.pandas
import holoviews as hv
hv.extension('bokeh')

In [3]:
# Set pandas display options
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [5]:
# Fetch S&P 500 tickers
#Define dates
start_date = '2020-01-01'
end_date = '2024-10-14'

#Get tickers from wikipedia
sp500_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]['Symbol'].tolist()

# Filter out Class B shares that have a '.B' in the ticker name
sp500_tickers = [ticker for ticker in sp500_tickers if '.B' not in ticker]

print(f"Initial total S&P 500 tickers: {len(sp500_tickers)}")

Initial total S&P 500 tickers: 501


In [7]:
# Download historical prices
historical_prices = yf.download(sp500_tickers, start=start_date, end=end_date)['Adj Close']

# Fill NaN values with 0
historical_prices.fillna(0, inplace=True)

# Check if data was downloaded for all tickers
print(f"Successfully downloaded historical_prices: {len(historical_prices.columns)} out of {len(sp500_tickers)}")

# Display tickers that were successfully downloaded
downloaded_tickers = historical_prices.columns.get_level_values(0).unique()
print(f"Successfully downloaded tickers: {len(downloaded_tickers)} out of {len(sp500_tickers)}")
# Display data Frame
historical_prices.head()

[*********************100%***********************]  501 of 501 completed


Successfully downloaded historical_prices: 501 out of 501
Successfully downloaded tickers: 501 out of 501


Ticker,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-02 00:00:00+00:00,83.215,72.876,72.265,0.0,79.69,43.4,195.264,334.43,110.378,40.694,...,191.208,24.533,140.138,54.0,56.088,75.451,93.541,139.626,259.14,129.515
2020-01-03 00:00:00+00:00,81.879,72.168,71.579,0.0,78.718,43.35,194.938,331.81,108.435,40.614,...,191.255,24.575,138.059,54.26,55.637,75.895,93.248,139.261,256.05,129.534
2020-01-06 00:00:00+00:00,82.121,72.743,72.144,0.0,79.131,43.52,193.665,333.71,107.161,40.297,...,191.489,24.525,137.786,54.182,56.065,75.404,93.194,138.456,258.01,128.54
2020-01-07 00:00:00+00:00,82.373,72.401,71.732,0.0,78.691,43.16,189.484,333.39,109.599,39.811,...,191.096,24.366,138.44,54.07,55.606,75.12,93.358,138.335,256.47,128.974
2020-01-08 00:00:00+00:00,83.186,73.565,72.241,0.0,79.012,42.73,189.856,337.87,110.589,39.37,...,191.311,24.458,139.309,54.018,54.767,75.376,93.523,139.991,247.64,128.694


In [13]:
# Function to calculate daily volatility
def calculate_daily_volatility(prices):
    returns = prices.pct_change(fill_method=None)
    volatility = returns.rolling(window=21).std() * np.sqrt(252)  # Annualized daily volatility
    return volatility

In [21]:
# Calculate daily volatility for each ticker and store in a dictionary
volatility_dict = {ticker: calculate_daily_volatility(historical_prices[ticker]) for ticker in historical_prices.columns}

# Convert dictionary to DataFrame
daily_volatility_df = pd.DataFrame(volatility_dict)

# Rename columns to indicate daily volatility
daily_volatility_df.rename(columns={col: col + '_Daily_Volatility' for col in daily_volatility_df.columns}, inplace=True)

daily_volatility_df.tail()

Unnamed: 0_level_0,A_Daily_Volatility,AAPL_Daily_Volatility,ABBV_Daily_Volatility,ABNB_Daily_Volatility,ABT_Daily_Volatility,ACGL_Daily_Volatility,ACN_Daily_Volatility,ADBE_Daily_Volatility,ADI_Daily_Volatility,ADM_Daily_Volatility,...,WTW_Daily_Volatility,WY_Daily_Volatility,WYNN_Daily_Volatility,XEL_Daily_Volatility,XOM_Daily_Volatility,XYL_Daily_Volatility,YUM_Daily_Volatility,ZBH_Daily_Volatility,ZBRA_Daily_Volatility,ZTS_Daily_Volatility
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-10-07 00:00:00+00:00,0.243,0.243,0.156,0.294,0.168,0.279,0.297,0.389,0.303,0.238,...,0.159,0.222,0.424,0.109,0.253,0.147,0.207,0.15,0.224,0.149
2024-10-08 00:00:00+00:00,0.243,0.251,0.146,0.293,0.153,0.285,0.298,0.394,0.296,0.25,...,0.166,0.221,0.455,0.104,0.269,0.132,0.208,0.151,0.217,0.14
2024-10-09 00:00:00+00:00,0.242,0.256,0.139,0.292,0.157,0.303,0.296,0.39,0.3,0.251,...,0.173,0.214,0.437,0.104,0.228,0.14,0.208,0.162,0.211,0.151
2024-10-10 00:00:00+00:00,0.249,0.254,0.112,0.307,0.157,0.298,0.297,0.397,0.289,0.252,...,0.192,0.221,0.442,0.106,0.222,0.153,0.21,0.163,0.218,0.148
2024-10-11 00:00:00+00:00,0.241,0.255,0.112,0.307,0.158,0.298,0.297,0.393,0.283,0.253,...,0.191,0.217,0.441,0.107,0.22,0.164,0.21,0.165,0.221,0.148


In [23]:
# Concatenate the daily volatility DataFrame with historical prices DataFrame
historical_prices = pd.concat([historical_prices, daily_volatility_df], axis=1)

# Display results
print(historical_prices.head())

                               A   AAPL   ABBV  ABNB    ABT   ACGL     ACN  \
Date                                                                         
2020-01-02 00:00:00+00:00 83.215 72.876 72.265 0.000 79.690 43.400 195.264   
2020-01-03 00:00:00+00:00 81.879 72.168 71.579 0.000 78.718 43.350 194.938   
2020-01-06 00:00:00+00:00 82.121 72.743 72.144 0.000 79.131 43.520 193.665   
2020-01-07 00:00:00+00:00 82.373 72.401 71.732 0.000 78.691 43.160 189.484   
2020-01-08 00:00:00+00:00 83.186 73.565 72.241 0.000 79.012 42.730 189.856   

                             ADBE     ADI    ADM  ...  WTW_Daily_Volatility  \
Date                                              ...                         
2020-01-02 00:00:00+00:00 334.430 110.378 40.694  ...                   NaN   
2020-01-03 00:00:00+00:00 331.810 108.435 40.614  ...                   NaN   
2020-01-06 00:00:00+00:00 333.710 107.161 40.297  ...                   NaN   
2020-01-07 00:00:00+00:00 333.390 109.599 39.811  ...     