In [None]:
# /Notebooks/001-Tutorial-Connecting-to-Data-Source.ipynb

# Tutorial: Connecting and Testing Alpaca Connections

#  Setup & Imports

In [None]:
# Standard libraries
from datetime import datetime, timedelta
import time
import sys

# Non-standard libraries
import pandas as pd
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

# Import paths from the config module
from config import BASE_DIR

# Adjust the path to the project root directory
sys.path.append(str(BASE_DIR))
print(f"Project root added to sys.path: {BASE_DIR}")

# Project-specific imports
from src.utils import get_alpaca_client, fetch_alpaca_stock_tickers, fetch_alpaca_historical_data, fetch_alpaca_latest_bars
from credentials import ALPACA_API_KEY, ALPACA_SECRET_KEY, ALPAKA_ENDPOINT_URL

# Initialize Alpaca client
alpaca = get_alpaca_client()

Project root added to sys.path: D:\dev\stat_656_autotrader


# Connect and Fetch Alpaca Tickers

In [2]:
# Fetch Alpaca tickers using existing utility
stock_tickers = fetch_alpaca_stock_tickers(alpaca)

print(f"Total tickers fetched: {len(stock_tickers)}")
print("First 20 tickers:", stock_tickers[:20])

Total tickers fetched: 7460
First 20 tickers: ['PHI', 'GATX', 'PEY', 'REFR', 'HIHO', 'TY', 'TKR', 'VUZI', 'OFIX', 'NOMD', 'MPTI', 'ANTX', 'CASH', 'SEB', 'RMD', 'BEAT', 'PTY', 'CFBK', 'DYAI', 'GRC']


# Configure Test Parameters

In [3]:
years_back = 5
subset_size = 175
subset_tickers = stock_tickers[:subset_size]

start_date = "2002-01-01"
end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
trading_days_back = years_back * 252  # Approximate number of trading days

print(f"Fetching OHLC data from {start_date} to {end_date} (approx. {trading_days_back} trading days)")

Fetching OHLC data from 2002-01-01 to 2025-03-29 (approx. 1260 trading days)


# Fetch Historical Data (OHLC)

In [4]:
# Use existing fetch utility for historical OHLC
historical_df = fetch_alpaca_historical_data(alpaca, subset_tickers, start_date, end_date, years_back)

print(f"\nFetched {len(historical_df)} rows of historical OHLC data.")
print(historical_df.head(10))

Alpaca Download Progress: 100%|██████████| 175/175 [02:49<00:00,  1.03it/s]


Fetched 276535 rows of stock data in 169.14 seconds.
Processed 175 tickers, missing 48 tickers, 48 tickers did not have enough time.
Processing time per ticker: 0.97 seconds.

Fetched 276535 rows of historical OHLC data.
  ticker        date   open   high    low  close
0    PHI  2016-09-02  39.15  39.90  38.86  39.46
1    PHI  2016-09-06  39.48  39.48  38.35  38.62
2    PHI  2016-09-07  38.71  39.41  38.31  39.10
3    PHI  2016-09-08  39.49  39.85  39.32  39.81
4    PHI  2016-09-09  39.54  39.57  37.50  37.81
5    PHI  2016-09-12  37.53  37.90  37.38  37.56
6    PHI  2016-09-13  37.43  37.43  36.44  36.59
7    PHI  2016-09-14  36.73  37.66  36.73  36.86
8    PHI  2016-09-15  36.72  36.72  35.92  36.19
9    PHI  2016-09-16  35.82  35.82  34.86  35.05





# Fetch Yesterday's OHLC

In [5]:
yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")

yesterday_df = fetch_alpaca_historical_data(alpaca, subset_tickers, yesterday, yesterday)

print(f"\nFetched OHLC data for yesterday ({yesterday}):")
print(yesterday_df.head(10))

Alpaca Download Progress: 100%|██████████| 175/175 [03:20<00:00,  1.15s/it]


Fetched 0 rows of stock data in 200.77 seconds.
Processed 175 tickers, missing 175 tickers, 0 tickers did not have enough time.
Processing time per ticker: 1.15 seconds.

Fetched OHLC data for yesterday (2025-03-29):
Empty DataFrame
Columns: []
Index: []





# Fetch Today's Open Prices

In [6]:
today = datetime.now().strftime("%Y-%m-%d")
market_open_dt = pd.Timestamp(f"{today}T09:30:00-05:00")

open_prices_df = pd.DataFrame()

for ticker in tqdm(subset_tickers, desc="Fetching Today's Open Prices"):
    bars = alpaca.get_bars(ticker, "1Min", market_open_dt.isoformat(), (market_open_dt + timedelta(minutes=1)).isoformat()).df
    if not bars.empty:
        bars.reset_index(inplace=True)
        open_prices_df = pd.concat([
            open_prices_df,
            pd.DataFrame({
                'ticker': [ticker],
                'date': [today],
                'open': [bars.iloc[0]['open']]
            })
        ])
    time.sleep(0.3)  # Slightly faster, but avoid hitting rate limits

print(f"\nToday's Open Prices ({today}):")
print(open_prices_df.head(10))

Fetching Today's Open Prices: 100%|██████████| 175/175 [01:03<00:00,  2.75it/s]


Today's Open Prices (2025-03-30):
Empty DataFrame
Columns: []
Index: []





# Batch Fetch Real-time latest Bars

In [7]:
real_time_df = fetch_alpaca_latest_bars(alpaca, subset_tickers)

print("\nReal-Time Latest Bars (Market Open):")
print(real_time_df.head(10))


Real-Time Latest Bars (Market Open):
  ticker        date     open     high      low    close  volume
0  AFRIW  2025-03-20    0.620    0.620    0.620    0.620    1300
1   AGMI  2025-02-25   27.550   27.550   27.550   27.550     100
2   AOMR  2025-03-28    9.560    9.560    9.560    9.560     100
3   ACGL  2025-03-28   95.050   95.070   95.020   95.040    2933
4   ACTG  2025-03-28    3.310    3.310    3.310    3.310     819
5    AER  2025-03-28  102.110  102.120  102.110  102.120    1549
6   ACIW  2025-03-28   54.250   54.250   54.250   54.250     248
7   GATX  2025-03-28  155.335  155.335  155.335  155.335     109
8    ADN  2025-03-26    4.960    4.960    4.960    4.960     100
9    AGD  2025-03-28    9.955    9.955    9.955    9.955     502


# Performance Summary

In [8]:
# Simple diagnostics
def performance_summary(start_time, num_tickers):
    total_seconds = time.time() - start_time
    seconds_per_ticker = total_seconds / num_tickers
    estimated_time_all = (seconds_per_ticker * len(stock_tickers)) / 60  # in minutes

    print(f"\nProcessed {num_tickers} tickers in {total_seconds:.2f} seconds.")
    print(f"Average time per ticker: {seconds_per_ticker:.2f} sec.")
    print(f"Estimated time for all tickers: {estimated_time_all:.2f} mins.")

# Example Usage:
start_time = time.time()
_ = fetch_alpaca_latest_bars(alpaca, subset_tickers[:10])
performance_summary(start_time, 10)


Processed 10 tickers in 0.05 seconds.
Average time per ticker: 0.01 sec.
Estimated time for all tickers: 0.64 mins.
