#### This notebook loads historical stock data from the Alpaca API

Load every minute stock data from todays NASDAQ 100 index companies, from 2000-01-01 to 2025-01-01

In [23]:
from alpaca.data import StockHistoricalDataClient
from alpaca.data.requests import StockBarsRequest
from alpaca.data.timeframe import TimeFrame
from pytickersymbols import PyTickerSymbols
from datetime import datetime
import pandas as pd
import os

# Keys
API_KEY = os.getenv("ALPACA_API_KEY")
SECRET_KEY = os.getenv("ALPACA_SECRET_KEY")

# Create client
stock_client = StockHistoricalDataClient(API_KEY, SECRET_KEY)

# Get all stock symbols in NASDAQ 100
stock_data = PyTickerSymbols()
nasdaq_100 = stock_data.get_stocks_by_index('NASDAQ 100')

# Create empty DataFrame
columns = ["symbol", "timestamp", "open", "high", "low", "close", "volume", "trade_count", "vwap"]
stock_df = pd.DataFrame(columns=columns)

stock_nr = 0
for stock in nasdaq_100:
    symbol = stock["symbol"]
    stock_nr += 1
    print(f"Processing stock {stock_nr}/100 - {symbol}")

    # Get the stock bars
    request_params = StockBarsRequest(symbol_or_symbols=[symbol], timeframe=TimeFrame.Minute, start=datetime(2000, 1, 1), end=datetime(2025, 1, 1))
    bars = stock_client.get_stock_bars(request_params)
    
    # Check if the symbol is in the data
    if (symbol in bars.data):
        # Clean stock data and add to DataFrame
        df = pd.DataFrame(bars[stock["symbol"]])
        df = df.map(lambda x: x[1]) 
        df.columns = ["symbol", "timestamp", "open", "high", "low", "close", "volume", "trade_count", "vwap"]
        if not df.empty:
            stock_df = pd.concat([stock_df, df], ignore_index=True)

# Save data to a csv file
stock_df.to_csv("./data/raw/raw__nasdaq100_2000_to_2025.csv", index=False)

Processing stock 1/100 - AZN


  stock_df = pd.concat([stock_df, df], ignore_index=True)


Processing stock 2/100 - AAPL
Processing stock 3/100 - CSCO
Processing stock 4/100 - INTC
Processing stock 5/100 - MSFT
Processing stock 6/100 - WBA
Processing stock 7/100 - ATVI
Processing stock 8/100 - ADBE
Processing stock 9/100 - GOOGL
Processing stock 10/100 - AMZN
Processing stock 11/100 - AMGN
Processing stock 12/100 - ADI
Processing stock 13/100 - AMAT
Processing stock 14/100 - ADSK
Processing stock 15/100 - ADP
Processing stock 16/100 - BIDU
Processing stock 17/100 - BIIB
Processing stock 18/100 - CHTR
Processing stock 19/100 - CTSH
Processing stock 20/100 - CMCSA
Processing stock 21/100 - COST
Processing stock 22/100 - CSX
Processing stock 23/100 - DLTR
Processing stock 24/100 - EBAY
Processing stock 25/100 - EA
Processing stock 26/100 - FAST
Processing stock 27/100 - FISV
Processing stock 28/100 - GILD
Processing stock 29/100 - ILMN
Processing stock 30/100 - INTU
Processing stock 31/100 - ISRG
Processing stock 32/100 - JD
Processing stock 33/100 - KHC
Processing stock 34/100

In [29]:
print(f"Number of rows: {len(stock_df)}")
print("\nNull values per column:")
print(stock_df.isnull().sum())
print("\nNumber of rows per symbol:")
print(stock_df["symbol"].value_counts())

Number of rows: 90965923

Null values per column:
symbol         0
timestamp      0
open           0
high           0
low            0
close          0
volume         0
trade_count    0
vwap           0
dtype: int64

Number of rows per symbol:
symbol
AAPL    1600432
AMD     1544113
TSLA    1544055
NVDA    1442176
META    1330800
         ...   
ANSS     647506
CRWD     631796
BKNG     595775
DDOG     564595
ABNB     467252
Name: count, Length: 101, dtype: int64
