In [39]:
import pandas as pd
import yfinance as yf
from tqdm import tqdm

In [None]:
# Specify the input file path for macOS
input_file = '/Users/Christopher/Desktop/stocks.csv'
# Specify the output file path for macOS
output_file = '/Users/Christopher/Desktop/stocks_data.csv'

# Read the CSV file (assuming headers: Symbol, Name, GICS Sector)
stocks_df = pd.read_csv(input_file)

In [44]:
# Filter rows to only include tickers in the 'Financials' and 'Health care' sectors
stocks_df = stocks_df[stocks_df['GICS Sector'].isin(['Financials'])]

# List to accumulate data frames for each ticker
final_df = []

final_df = pd.DataFrame(columns=["Ticker", "Date", "Adj Close", "GICS sector"])


In [45]:
# Loop through each stock entry with a progress bar
for idx, row in tqdm(stocks_df.iterrows(), total=stocks_df.shape[0], desc="Downloading stock data"):
    ticker = row['Symbol']         # Ticker symbol
    gics_sector = row['GICS Sector'] # GICS sector

    # Download historical data from 2000-01-01 to 2025-01-01
    data = yf.download(ticker, start="2000-01-01", end="2025-01-01", progress=False)
    
    if data.empty:
        continue

    # Reset index to bring Date in as a column
    data = data.reset_index()
    
    # Use 'Adj Close' if available; otherwise, fallback to 'Close'
    if 'Adj Close' in data.columns:
        price_column = 'Adj Close'
    elif 'Close' in data.columns:
        price_column = 'Close'
    else:
        continue

    # Keep only the Date and the price column, then rename if necessary
    data = data[['Date', price_column]]
    if price_column != 'Adj Close':
        data.rename(columns={price_column: 'Adj Close'}, inplace=True)
    
    # Assign the Ticker and GICS sector (this avoids duplicate columns)
    data['Ticker'] = ticker
    data['GICS sector'] = gics_sector

    # Reorder the columns to exactly: Ticker, Date, Adj Close, GICS sector
    data = data[['Ticker', 'Date', 'Adj Close', 'GICS sector']]
    
    # Append this new ticker's DataFrame to the bottom of final_df
    final_df = pd.concat([final_df, data], ignore_index=True)

Downloading stock data:  19%|█▉        | 14/73 [00:05<00:28,  2.06it/s]
1 Failed download:
['BRK.B']: YFTzMissingError('possibly delisted; no timezone found')
Downloading stock data: 100%|██████████| 73/73 [01:05<00:00,  1.11it/s]


In [46]:
final_df

Unnamed: 0,Ticker,Date,Adj Close,GICS sector,"(Ticker, )","(Date, )","(Adj Close, ACGL)","(GICS sector, )","(Adj Close, AFL)","(Adj Close, AIG)",...,"(Adj Close, STT)","(Adj Close, SYF)","(Adj Close, TFC)","(Adj Close, TROW)","(Adj Close, TRV)","(Adj Close, USB)","(Adj Close, V)","(Adj Close, WFC)","(Adj Close, WRB)","(Adj Close, WTW)"
0,,,,,ACGL,2000-01-03,1.215037,Financials,,,...,,,,,,,,,,
1,,,,,ACGL,2000-01-04,1.208433,Financials,,,...,,,,,,,,,,
2,,,,,ACGL,2000-01-05,1.320692,Financials,,,...,,,,,,,,,,
3,,,,,ACGL,2000-01-06,1.307485,Financials,,,...,,,,,,,,,,
4,,,,,ACGL,2000-01-07,1.380124,Financials,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
412769,,,,,WTW,2024-12-24,,Financials,,,...,,,,,,,,,,315.142151
412770,,,,,WTW,2024-12-26,,Financials,,,...,,,,,,,,,,316.976990
412771,,,,,WTW,2024-12-27,,Financials,,,...,,,,,,,,,,315.421387
412772,,,,,WTW,2024-12-30,,Financials,,,...,,,,,,,,,,312.359985
