In [25]:
pip install --upgrade yfinance

Note: you may need to restart the kernel to use updated packages.


In [28]:
# Import yfinance library
import yfinance as yf

# Define the start and end dates
start_date = '2024-04-01'
end_date = '2024-05-01'

# Define the list of tickers
tickers = ['AAPL', 'MSFT']

historical_prices = yf.download(tickers, start=start_date, end=end_date)

[*********************100%***********************]  2 of 2 completed


In [30]:
print(historical_prices.columns)

MultiIndex([( 'Close', 'AAPL'),
            ( 'Close', 'MSFT'),
            (  'High', 'AAPL'),
            (  'High', 'MSFT'),
            (   'Low', 'AAPL'),
            (   'Low', 'MSFT'),
            (  'Open', 'AAPL'),
            (  'Open', 'MSFT'),
            ('Volume', 'AAPL'),
            ('Volume', 'MSFT')],
           names=['Price', 'Ticker'])


In [11]:
import pandas as pd

# Define the list of tickers
sp500_tickers = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]['Symbol'].to_list()

# Filter out Class B shares that have '.B' in the ticker name
sp500_tickers = [ticker for ticker in sp500_tickers if '.B' not in ticker]

In [33]:
# Define the start and end dates
start_date = '2000-01-01'
end_date = '2024-12-20'

# Download historical prices with adjusted close prices
historical_prices = yf.download(
    sp500_tickers,
    start=start_date,
    end=end_date,
    auto_adjust=False,  # Desativa o ajuste automático para incluir 'Adj Close'
    actions=False       # Remove colunas de dividendos e splits se não forem necessárias
)

[*********************100%***********************]  500 of 500 completed


In [36]:
historical_prices_test = historical_prices.loc[:, historical_prices.columns.get_level_values(0) == 'Adj Close']
historical_prices_test.columns = historical_prices_test.columns.droplevel(0)
historical_prices_test.head()

Ticker,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-03,43.463024,0.843076,,,8.288181,1.277778,,16.274675,28.09569,6.248422,...,,11.28276,,6.848379,18.035824,,4.61194,,25.027779,
2000-01-04,40.142925,0.771997,,,8.051376,1.270833,,14.909399,26.674355,6.183332,...,,10.858904,,7.006069,17.690365,,4.519238,,24.666668,
2000-01-05,37.652866,0.783294,,,8.036577,1.388889,,15.204172,27.063749,6.085701,...,,11.434136,,7.276401,18.65477,,4.542413,,25.138889,
2000-01-06,36.219193,0.715509,,,8.31778,1.375,,15.328288,26.323889,6.118248,...,,11.96901,,7.208817,19.61916,,4.503788,,23.777779,
2000-01-07,39.237446,0.749401,,,8.406583,1.451389,,16.072989,27.063749,6.215878,...,,11.575426,,7.208817,19.5616,,4.403359,,23.513889,


In [37]:
MIN_REQUIRED_NUM_OBS_PER_TICKER = 100

# Count non-missing values for each ticker
ticker_counts = historical_prices_test.count()
ticker_counts

Ticker
A       6282
AAPL    6282
ABBV    3013
ABNB    1013
ABT     6282
        ... 
XYL     3318
YUM     6282
ZBH     5889
ZBRA    6282
ZTS     2992
Length: 500, dtype: int64

In [39]:
# Filter out tickers with fewer than n = MIN... = 100 non-missing values
valid_tickers_mask = ticker_counts[ticker_counts >= MIN_REQUIRED_NUM_OBS_PER_TICKER].index

# Filter the DF based on valid tickers
historical_prices_test = historical_prices_test[valid_tickers_mask]

In [40]:
historical_prices_test.head()

Ticker,A,AAPL,ABBV,ABNB,ABT,ACGL,ACN,ADBE,ADI,ADM,...,WTW,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2000-01-03,43.463024,0.843076,,,8.288181,1.277778,,16.274675,28.09569,6.248422,...,,11.28276,,6.848379,18.035824,,4.61194,,25.027779,
2000-01-04,40.142925,0.771997,,,8.051376,1.270833,,14.909399,26.674355,6.183332,...,,10.858904,,7.006069,17.690365,,4.519238,,24.666668,
2000-01-05,37.652866,0.783294,,,8.036577,1.388889,,15.204172,27.063749,6.085701,...,,11.434136,,7.276401,18.65477,,4.542413,,25.138889,
2000-01-06,36.219193,0.715509,,,8.31778,1.375,,15.328288,26.323889,6.118248,...,,11.96901,,7.208817,19.61916,,4.503788,,23.777779,
2000-01-07,39.237446,0.749401,,,8.406583,1.451389,,16.072989,27.063749,6.215878,...,,11.575426,,7.208817,19.5616,,4.403359,,23.513889,
