# Make sure you Alpaca credentials are being read properly and you are connecting to Alpaca

In [1]:
# Standard libraries
from datetime import datetime, timedelta
import time
import sys

# Non-standard libraries
import pandas as pd
from tqdm import tqdm

# Alpaca API
from alpaca_trade_api.rest import REST

# Suppress warnings
import warnings
warnings.filterwarnings("ignore")

# Project-specific imports
sys.path.append(r"d:\dev\stat_656_autotrader")
from credentials import API_KEY, SECRET_KEY, ENDPOINT_URL 

## Remove the '#' and uncomment the code to print your credentials from the .secrets file

In [2]:
#print(f"API_KEY: {API_KEY}, SECRET_KEY: {SECRET_KEY}, ENDPOINT_URL: {ENDPOINT_URL}")

## Connect to Alpaca

In [3]:
# Initialize Alpaca REST client
try:
    alpaca = REST(API_KEY, SECRET_KEY, base_url=ENDPOINT_URL)
    print("Connected to Alpaca successfully!")

except Exception as e:
    print(f"Connection failed: {e}")

Connected to Alpaca successfully!


## Get a list of all Alpaca stock tickers traded on (NASDAQ, NYSE, AMEX)

In [None]:
# API call for Alpaca assets
assets = alpaca.list_assets(status='active')  # Consider only active assets

# All US exchanges (NASDAQ, NYSE)
us_exchanges = ['NASDAQ', 'NYSE', 'AMEX']

# Filter to company stocks only (Uppercase letters only, no numbers, periods, underscores, lowercase)
stock_assets = [
    asset for asset in assets
    if asset.exchange in us_exchanges
    and asset.tradable
    and asset.status == 'active'
    and asset.symbol.isalpha()
    and asset.symbol.isupper()
]

stock_tickers = [asset.symbol for asset in stock_assets]

print(f"Fetched {len(stock_tickers)} company stock tickers from Alpaca (U.S. exchanges, no CUSIPs/ETFs/SPACs)!")
print("First 20 company stock tickers:", stock_tickers[:20])

    

Fetched 7177 company stock tickers from Alpaca (U.S. exchanges, no CUSIPs/ETFs/SPACs)!
First 150 company stock tickers: ['XHR', 'NXN', 'NXLIW', 'XIFR', 'NXL', 'ELC', 'ELBM', 'ELAB', 'IGI', 'IDAI', 'ICUCW', 'ICU', 'NXJ', 'EKSO', 'EKG', 'HTFC', 'HOVR', 'NXGLW', 'NXG', 'HIFS', 'XMTR', 'GRDN', 'GDSTR', 'XOM', 'GB', 'EIIA', 'FRHC', 'XP', 'XPEV', 'XPRO', 'XRX', 'EICC', 'NWTN', 'XYZ', 'FIVY', 'EICB', 'YELP', 'NWTG', 'YETI', 'EICA', 'YEXT', 'NWGL', 'EIC', 'FDFF', 'NWG', 'YMM', 'YORW', 'EHLS', 'YOU', 'YPF', 'EHI', 'EHGO', 'FBLG', 'FBIOP', 'FBIO', 'NVVEW', 'NVVE', 'FBGL', 'YRD', 'YSG', 'YUM', 'YUMC', 'YY', 'EXEEW', 'EXEEL', 'EGGQ', 'EGF', 'ESMV', 'NVNIW', 'NVNI', 'Z', 'ZBH', 'ESLAW', 'NVG', 'EFT', 'ZBRA', 'NVFY', 'NVDU', 'ZD', 'DYCQR', 'ZETA', 'ZEUS', 'DMAT', 'NVDS', 'ZG', 'ZGN', 'ZI', 'ZIM', 'ZION', 'ZIP', 'EFSI', 'DFGX', 'EFSCP', 'ZLAB', 'EFRA', 'ZM', 'DCOMP', 'ZNTL', 'DCOMG', 'ZS', 'ZTO', 'ZWS', 'NVDL', 'ZYME', 'DCAP', 'EFOI', 'C', 'CAVA', 'NVD', 'CYCC', 'CXSE', 'FBLA', 'NVRO', 'NVCT', 'RGNX'

## Test the fetch time for Alpaca

In [None]:
years_back = 5  # Number of years of historical data required
start_date = "2002-01-01"  # Start date for data fetching
end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")  # End date (yesterday)
trading_days_back = years_back * 252  # Approximate number of trading days per year (252)

print(f"Fetching OHLC from {start_date} to {end_date} (~threshold: {trading_days_back} days)")

# Initialize dataframe and performance tracking variables
alpaca_df = pd.DataFrame()
start_time = time.time()
missing_data_count = 0  # Counter for tickers with no data
not_enough_time_count = 0  # Counter for tickers with insufficient data
processed_ticker_count = 0  # Counter for processed tickers
subset_size = 175  # Number of tickers to process in this run
subset_tickers = stock_tickers[:subset_size]  # Select the first 50 tickers

# Loop through each ticker to fetch historical stock data from Alpaca
for ticker in tqdm(subset_tickers, desc="Alpaca Download Progress"):
    try:
        processed_ticker_count += 1  # Increment the processed ticker counter
        bars = alpaca.get_bars(ticker, "1Day", start_date, end_date).df  # Fetch daily OHLC data

        # Check if any data was retrieved
        if not bars.empty:
            # Extract relevant OHLC data and reset the DataFrame index
            df = bars[['open', 'high', 'low', 'close']].reset_index()

            # Check if retrieved data covers the required number of years
            if len(df) < trading_days_back:
                missing_data_count += 1
                not_enough_time_count += 1
                print(f"Not enough trading days. Skipped {ticker} (~{len(df)} < {trading_days_back} days)")
                continue

            # Add ticker and formatted date columns for clarity
            df['ticker'] = ticker
            df['date'] = df['timestamp'].dt.strftime("%Y-%m-%d")

            # Reorder columns
            df = df[['ticker', 'date', 'open', 'high', 'low', 'close']]

            # Append this ticker's data to the main DataFrame
            alpaca_df = pd.concat([alpaca_df, df])
        else:
            # If no data was retrieved, increment the missing data counter
            missing_data_count += 1
            print(f"No data for {ticker}")

        # Wait briefly to prevent API rate limiting
        time.sleep(1)

    except Exception as e:
        # Handle and log any exceptions during data retrieval
        print(f"Error for {ticker}: {e}")
        time.sleep(1)

# Calculate total processing time and performance metrics
total_seconds = time.time() - start_time
seconds_per_ticker = round((total_seconds / len(subset_tickers)), 2)
estimated_total_time = round(seconds_per_ticker * len(stock_tickers) / 60)

# Output performance summary and sample data
print(f"\nFetched {len(alpaca_df)} rows of stock data in {total_seconds:.2f} seconds.")
print(f"Processed {processed_ticker_count} tickers, missing {missing_data_count} tickers, {not_enough_time_count} tickers did not have enough time.")
print(f"Processing time per ticker: {seconds_per_ticker:.2f} seconds. Estimated processing time for all stocks in the dataset: {estimated_total_time} minutes.")
print("\nExample Table Data:\n")
print("Alpaca Sample:", display(alpaca_df.head(30)))


Fetching OHLC from 2002-01-01 to 2025-03-20 (~threshold: 1260 days)


Alpaca Download Progress:   6%|▌         | 3/50 [00:02<00:37,  1.25it/s]

Skipped NXLIW (~629 < 1260 days)


Alpaca Download Progress:  10%|█         | 5/50 [00:04<00:30,  1.45it/s]

Skipped NXL (~629 < 1260 days)


Alpaca Download Progress:  14%|█▍        | 7/50 [00:05<00:27,  1.55it/s]

Skipped ELBM (~727 < 1260 days)
Skipped ELAB (~332 < 1260 days)


Alpaca Download Progress:  22%|██▏       | 11/50 [00:07<00:16,  2.30it/s]

Skipped IDAI (~787 < 1260 days)
Skipped ICUCW (~1007 < 1260 days)


Alpaca Download Progress:  24%|██▍       | 12/50 [00:07<00:13,  2.74it/s]

Skipped ICU (~1007 < 1260 days)


Alpaca Download Progress:  34%|███▍      | 17/50 [00:10<00:12,  2.69it/s]

Skipped EKG (~751 < 1260 days)
Skipped HTFC (~686 < 1260 days)
Skipped HOVR (~493 < 1260 days)


Alpaca Download Progress:  36%|███▌      | 18/50 [00:10<00:09,  3.25it/s]

Skipped NXGLW (~813 < 1260 days)


Alpaca Download Progress:  46%|████▌     | 23/50 [00:13<00:09,  2.76it/s]

Skipped XMTR (~935 < 1260 days)
Skipped GRDN (~120 < 1260 days)
Skipped GDSTR (~735 < 1260 days)


Alpaca Download Progress:  50%|█████     | 25/50 [00:16<00:18,  1.34it/s]

Skipped EIIA (~104 < 1260 days)


Alpaca Download Progress:  58%|█████▊    | 29/50 [00:18<00:13,  1.51it/s]

Skipped XPEV (~1146 < 1260 days)


Alpaca Download Progress:  66%|██████▌   | 33/50 [00:21<00:09,  1.70it/s]

Skipped EICC (~236 < 1260 days)
Skipped NWTN (~1257 < 1260 days)


Alpaca Download Progress:  72%|███████▏  | 36/50 [00:22<00:06,  2.11it/s]

Skipped FIVY (~63 < 1260 days)
Skipped EICB (~411 < 1260 days)


Alpaca Download Progress:  76%|███████▌  | 38/50 [00:24<00:06,  1.91it/s]

Skipped NWTG (~401 < 1260 days)


Alpaca Download Progress:  80%|████████  | 40/50 [00:25<00:05,  1.82it/s]

Skipped EICA (~852 < 1260 days)


Alpaca Download Progress:  84%|████████▍ | 42/50 [00:26<00:04,  1.80it/s]

Skipped NWGL (~382 < 1260 days)


Alpaca Download Progress:  88%|████████▊ | 44/50 [00:28<00:03,  1.78it/s]

Skipped FDFF (~445 < 1260 days)


Alpaca Download Progress:  92%|█████████▏| 46/50 [00:29<00:02,  1.69it/s]

Skipped YMM (~941 < 1260 days)


Alpaca Download Progress:  98%|█████████▊| 49/50 [00:31<00:00,  2.06it/s]

Skipped EHLS (~243 < 1260 days)
Skipped YOU (~935 < 1260 days)


Alpaca Download Progress: 100%|██████████| 50/50 [00:32<00:00,  1.53it/s]


Fetched 48488 rows of stock data in 32.69 seconds.
Processed 50 tickers, skipped 27 tickers.
Processing time per ticker: 0.65 seconds. Estimated processing time for all stocks in the dataset: 78 minutes.

Example Table Data:






Unnamed: 0,ticker,date,open,high,low,close
0,XHR,2016-01-04,15.04,15.25,14.74,14.95
1,XHR,2016-01-05,14.98,15.65,14.91,15.56
2,XHR,2016-01-06,15.35,15.6,15.2901,15.41
3,XHR,2016-01-07,15.1,15.28,14.69,14.69
4,XHR,2016-01-08,14.74,14.74,14.25,14.29
5,XHR,2016-01-11,14.32,14.67,14.25,14.55
6,XHR,2016-01-12,14.68,14.8,14.41,14.65
7,XHR,2016-01-13,14.67,14.71,13.91,13.97
8,XHR,2016-01-14,14.0,14.15,13.53,13.93
9,XHR,2016-01-15,13.53,13.59,12.813,12.96


Alpaca Sample: None
