In [1]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import os
import time

stocks = {
    "AAPL": "Apple",
    "MSFT": "Microsoft",
    "AMD": "AMD",
    "INTC": "Intel",
    "CRM": "Salesforce",
    
    "JPM": "JPMorgan",
    "BAC": "Bank of America",
    "WFC": "Wells Fargo",
    "GS": "Goldman Sachs",
    "SCHW": "Charles Schwab",

    "GOOGL": "Alphabet",
    "NVDA": "NVIDIA",
    "ADBE": "Adobe",
    "CSCO": "Cisco Systems",
    "ORCL": "Oracle",
    "QCOM": "Qualcomm",

    "AXP": "American Express",
    "C": "Citigroup",
    "USB": "U.S. Bancorp",
    "BLK": "BlackRock",
    "TROW": "T. Rowe Price",
    "MS": "Morgan Stanley"
}

chunk_days = 30
total_days = 720
now = datetime.now()
output_dir = "hourly_stock_chunks"
os.makedirs(output_dir, exist_ok=True)

for ticker, company in stocks.items():
    print(f"\nFetching: {ticker} ({company})")
    current_end = now
    current_start = now - timedelta(days=chunk_days)
    all_chunks = []

    for i in range(total_days // chunk_days):
        print(f"Chunk {i+1}: {current_start.date()} → {current_end.date()}")
        try:
            df = yf.download(
                ticker,
                start=current_start.strftime('%Y-%m-%d'),
                end=current_end.strftime('%Y-%m-%d'),
                interval='1h',
                progress=False
            )
            if not df.empty:
                df = df[['Open', 'High', 'Low', 'Close', 'Volume']].copy()
                df.insert(0, 'Datetime', df.index)
                df['ticker'] = ticker
                df['company'] = company
                all_chunks.append(df)
                print(f"{len(df)} rows fetched.")
            else:
                print("No data.")
        except Exception as e:
            print(f"Error: {e}")

        current_end = current_start
        current_start = current_end - timedelta(days=chunk_days)
        time.sleep(1)

    if all_chunks:
        full_df = pd.concat(all_chunks, ignore_index=True)
        full_df = full_df[['Datetime', 'Open', 'High', 'Low', 'Close', 'Volume', 'ticker', 'company']]
        path = os.path.join(output_dir, f"{ticker}_hourly.csv")
        full_df.to_csv(path, index=False)
        print(f"Saved {ticker} data to {path}")
    else:
        print(f"No data for {ticker}")

print("Done")



Fetching: AAPL (Apple)
Chunk 1: 2025-02-25 → 2025-03-27
YF.download() has changed argument auto_adjust default to True
154 rows fetched.
Chunk 2: 2025-01-26 → 2025-02-25
140 rows fetched.
Chunk 3: 2024-12-27 → 2025-01-26
126 rows fetched.
Chunk 4: 2024-11-27 → 2024-12-27
132 rows fetched.
Chunk 5: 2024-10-28 → 2024-11-27
154 rows fetched.
Chunk 6: 2024-09-28 → 2024-10-28
140 rows fetched.
Chunk 7: 2024-08-29 → 2024-09-28
147 rows fetched.
Chunk 8: 2024-07-30 → 2024-08-29
154 rows fetched.
Chunk 9: 2024-06-30 → 2024-07-30
136 rows fetched.
Chunk 10: 2024-05-31 → 2024-06-30
140 rows fetched.
Chunk 11: 2024-05-01 → 2024-05-31
147 rows fetched.
Chunk 12: 2024-04-01 → 2024-05-01
154 rows fetched.
Chunk 13: 2024-03-02 → 2024-04-01
133 rows fetched.
Chunk 14: 2024-02-01 → 2024-03-02
147 rows fetched.
Chunk 15: 2024-01-02 → 2024-02-01
147 rows fetched.
Chunk 16: 2023-12-03 → 2024-01-02
133 rows fetched.
Chunk 17: 2023-11-03 → 2023-12-03
136 rows fetched.
Chunk 18: 2023-10-04 → 2023-11-03
154 

In [3]:
import os
import pandas as pd

input_dir = "hourly_stock_chunks"
merged_dfs = []

for file in os.listdir(input_dir):
    if file.endswith(".csv"):
        df = pd.read_csv(os.path.join(input_dir, file))
        df = df[['Datetime', 'Open', 'High', 'Low', 'Close', 'Volume', 'ticker', 'company']]
        merged_dfs.append(df)

merged_df = pd.concat(merged_dfs, ignore_index=True)
merged_df.sort_values(by=['ticker', 'Datetime'], inplace=True)
merged_df.to_csv("final_hourly_stocks_cleaned.csv", index=False)

print("Done")


Done
