In [1]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta



In [2]:
# Define your lists
stock_tickers = ["UNH", "PFE", "FSLR", "NEE", "XOM", "CVX", "COIN", "MSTR", "LMT", "RTX", "JPM", "GS"]
sector_tickers = ["XLK", "XLV", "XLF", "XLE", "XLY", "XLP", "XLI", "XLB", "XLU", "XLRE", "XLC"]
index_tickers = ["^GSPC", "^IXIC", "SPY", "DIA", "QQQ", "IWM", "MDY", "VTI", "IJR"]


# Get today's date and calculate one year ago
end_date = datetime(2024, 11, 15)
start_date = (end_date - timedelta(days=365))

# Combine tickers and indices for batch download
all_symbols = stock_tickers + sector_tickers + index_tickers

# Download data for the last year
data = yf.download(all_symbols, start=start_date, end=end_date, interval='1h', group_by="ticker", multi_level_index=False)

# Display a summary
print("Data successfully downloaded!")
data.head()

[*********************100%***********************]  32 of 32 completed

Data successfully downloaded!





Ticker,COIN,COIN,COIN,COIN,COIN,COIN,XOM,XOM,XOM,XOM,...,UNH,UNH,UNH,UNH,PFE,PFE,PFE,PFE,PFE,PFE
Price,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,...,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2023-11-16 14:30:00+00:00,97.68,97.790001,95.040001,97.010002,97.010002,2528149.0,102.900002,103.269997,101.989998,102.379997,...,537.869995,541.650024,541.650024,439395.0,30.27,30.450001,29.709999,29.735001,29.735001,5649396.0
2023-11-16 15:30:00+00:00,97.050003,97.360001,94.449997,96.010002,96.010002,1278782.0,102.389999,102.627899,102.139999,102.25,...,539.599976,539.710022,539.710022,273615.0,29.735901,29.806601,29.540001,29.594999,29.594999,4091078.0
2023-11-16 16:30:00+00:00,95.949997,97.650002,95.57,97.449997,97.449997,698899.0,102.239998,102.294998,101.68,101.779999,...,539.219971,540.136902,540.136902,194925.0,29.6,29.610001,29.469999,29.529699,29.529699,3499860.0
2023-11-16 17:30:00+00:00,97.464104,97.889999,96.080002,96.800003,96.800003,620416.0,101.790001,102.050003,101.150002,101.785004,...,539.640015,539.650024,539.650024,143243.0,29.5243,29.639999,29.5,29.559999,29.559999,2233247.0
2023-11-16 18:30:00+00:00,96.779999,96.909897,95.610001,95.629997,95.629997,469640.0,101.785004,102.220001,101.699997,102.010002,...,537.830017,537.869995,537.869995,209697.0,29.565001,29.610001,29.51,29.540001,29.540001,2418609.0


In [3]:
data_df = data.stack(level=0, future_stack=True).rename_axis(['Date', 'Ticker']).reset_index()
data_df.columns.name = None  # Remove the name of the columns
data_df.set_index(['Ticker', 'Date'], inplace=True)  # Set 'Ticker' and 'Date' as index fields

In [4]:
data_df.sample(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close,Adj Close,Volume
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
^GSPC,2023-12-19 20:30:00+00:00,4761.759766,4768.689941,4760.430176,4768.439941,4768.439941,320593000.0
FSLR,2024-04-10 15:30:00+00:00,182.880005,183.699997,182.199997,183.229996,183.229996,226785.0
QQQ,2024-03-26 13:30:00+00:00,446.279999,447.225006,445.790009,445.959991,445.959991,7270684.0
XLB,2024-05-03 16:30:00+00:00,89.5,89.559898,89.389999,89.480003,89.480003,600474.0
LMT,2024-02-27 19:30:00+00:00,429.652802,430.51001,428.829987,430.410004,430.410004,81275.0
XLF,2024-08-09 18:30:00+00:00,42.325001,42.43,42.279999,42.395,42.395,1983308.0
LMT,2024-06-13 16:30:00+00:00,457.059998,458.119995,457.040009,458.109985,458.109985,60551.0
XLY,2024-05-30 18:30:00+00:00,175.080002,175.190002,174.699997,174.899994,174.899994,238009.0
GS,2023-11-17 19:30:00+00:00,338.779999,338.88501,338.200012,338.700104,338.700104,135084.0
VTI,2024-10-02 14:30:00+00:00,280.450012,281.100006,280.309998,280.959991,280.959991,350185.0


In [5]:
# Save data to CSV for inspection (optional)
data_df.to_csv("ticker_data.csv", date_format='%Y-%m-%d %H:%M:%S')