In [3]:
# /Notebooks/001-Tutorial-Connecting-to-Data-Source.ipynb

# Tutorial: Connecting and Testing Alpaca Connections

#  Setup & Imports

In [4]:
# Standard libraries
from datetime import datetime, timedelta
import time
import sys
from pathlib import Path
import os

# Non-standard libraries
import pandas as pd
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

# Define BASE_DIR and adjust sys.path before importing
BASE_DIR = Path(os.getcwd()).parent  # Points to stat_656_autotrader/ from Notebooks/
sys.path.append(str(BASE_DIR))
print(f"Project root added to sys.path: {BASE_DIR}")

# Import paths from the config module (now in src/)
from src.config import BASE_DIR as CONFIG_BASE_DIR

# Project-specific imports
from src.utils import get_alpaca_client, fetch_alpaca_stock_tickers, fetch_alpaca_historical_data, fetch_alpaca_latest_bars
from credentials import ALPACA_API_KEY, ALPACA_SECRET_KEY, ALPAKA_ENDPOINT_URL

# Initialize Alpaca client
alpaca = get_alpaca_client()

Project root added to sys.path: /home/aaronnhorvitz/dev/school/stat_656_autotrader


# Connect and Fetch Alpaca Tickers

In [5]:
# Fetch Alpaca tickers using existing utility
stock_tickers = fetch_alpaca_stock_tickers(alpaca)

print(f"Total tickers fetched: {len(stock_tickers)}")
print("First 20 tickers:", stock_tickers[:20])

Total tickers fetched: 7465
First 20 tickers: ['CNTX', 'CNTM', 'DGXX', 'GVH', 'GV', 'CNTB', 'DHAI', 'APCX', 'GUT', 'DHAIW', 'CNOBP', 'DHCNL', 'CNL', 'DHF', 'CNFRZ', 'CNFR', 'AP', 'AOUT', 'DISTR', 'GURE']


# Configure Test Parameters

In [6]:
years_back = 5
subset_size = 175
subset_tickers = stock_tickers[:subset_size]

start_date = "2002-01-01"
end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
trading_days_back = years_back * 252  # Approximate number of trading days

print(f"Fetching OHLC data from {start_date} to {end_date} (approx. {trading_days_back} trading days)")

Fetching OHLC data from 2002-01-01 to 2025-04-27 (approx. 1260 trading days)


# Fetch Historical Data (OHLC)

In [7]:
# Use existing fetch utility for historical OHLC
historical_df = fetch_alpaca_historical_data(alpaca, subset_tickers, start_date, end_date, years_back)

print(f"\nFetched {len(historical_df)} rows of historical OHLC data.")
print(historical_df.head(10))

Alpaca Download Progress: 100%|██████████| 175/175 [01:35<00:00,  1.84it/s]


Fetched 139235 rows of stock data in 95.11 seconds.
Processed 175 tickers, missing 108 tickers, 108 tickers did not have enough time.
Processing time per ticker: 0.54 seconds.

Fetched 139235 rows of historical OHLC data.
  ticker        date    open    high     low  close
0    GUT  2016-01-04  5.6800  5.7100  5.6500   5.71
1    GUT  2016-01-05  5.7400  5.7600  5.6898   5.72
2    GUT  2016-01-06  5.7200  5.7600  5.7000   5.71
3    GUT  2016-01-07  5.6700  5.7000  5.6500   5.70
4    GUT  2016-01-08  5.7300  5.7500  5.6900   5.71
5    GUT  2016-01-11  5.7500  5.7597  5.6000   5.63
6    GUT  2016-01-12  5.6500  5.6500  5.5600   5.61
7    GUT  2016-01-13  5.6163  5.6604  5.6111   5.64
8    GUT  2016-01-14  5.6400  5.8400  5.6100   5.83
9    GUT  2016-01-15  5.4600  5.6300  5.4300   5.60





# Fetch Yesterday's OHLC

In [8]:
yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")

yesterday_df = fetch_alpaca_historical_data(alpaca, subset_tickers, yesterday, yesterday)

print(f"\nFetched OHLC data for yesterday ({yesterday}):")
print(yesterday_df.head(10))

Alpaca Download Progress: 100%|██████████| 175/175 [03:04<00:00,  1.06s/it]


Fetched 0 rows of stock data in 184.87 seconds.
Processed 175 tickers, missing 175 tickers, 0 tickers did not have enough time.
Processing time per ticker: 1.06 seconds.

Fetched OHLC data for yesterday (2025-04-27):
Empty DataFrame
Columns: []
Index: []





# Fetch Today's Open Prices

In [9]:
today = datetime.now().strftime("%Y-%m-%d")
market_open_dt = pd.Timestamp(f"{today}T09:30:00-05:00")

open_prices_df = pd.DataFrame()

for ticker in tqdm(subset_tickers, desc="Fetching Today's Open Prices"):
    bars = alpaca.get_bars(ticker, "1Min", market_open_dt.isoformat(), (market_open_dt + timedelta(minutes=1)).isoformat()).df
    if not bars.empty:
        bars.reset_index(inplace=True)
        open_prices_df = pd.concat([
            open_prices_df,
            pd.DataFrame({
                'ticker': [ticker],
                'date': [today],
                'open': [bars.iloc[0]['open']]
            })
        ])
    time.sleep(0.3)  # Slightly faster, but avoid hitting rate limits

print(f"\nToday's Open Prices ({today}):")
print(open_prices_df.head(10))

Fetching Today's Open Prices: 100%|██████████| 175/175 [01:02<00:00,  2.80it/s]


Today's Open Prices (2025-04-28):
  ticker        date    open
0   CNTM  2025-04-28  0.6535
0   DGXX  2025-04-28  1.1100
0     GV  2025-04-28  2.1392
0   CNTB  2025-04-28  0.8232
0   APCX  2025-04-28  0.2645
0    GUT  2025-04-28  5.2800
0   CNEY  2025-04-28  0.1270
0   GTEC  2025-04-28  1.6650
0    ANY  2025-04-28  0.5766
0    EAF  2025-04-28  0.6563





# Batch Fetch Real-time latest Bars

In [None]:
real_time_df = fetch_alpaca_latest_bars(alpaca, subset_tickers)

print("\nReal-Time Latest Bars (Market Open):")
print(real_time_dfxsds
      
      dddddddddd(1


Real-Time Latest Bars (Market Open):
  ticker        date     open     high      low    close  volume
0   CLDI  2025-04-28   0.4700   0.4700   0.4700   0.4700     200
1   EICC  2025-04-23  24.9400  24.9400  24.9400  24.9400     150
2   MBIO  2025-04-28   1.1750   1.1750   1.1600   1.1600     300
3   CMSA  2025-04-28  21.3600  21.3600  21.3600  21.3600     387
4   GQQQ  2025-04-23  22.9600  22.9600  22.9600  22.9600     113
5    HQL  2025-04-28  12.4900  12.4900  12.4900  12.4900     200
6  IMPPP  2025-04-25  24.0000  24.0000  24.0000  24.0000     200
7   IMNN  2025-04-28   0.8601   0.8601   0.8601   0.8601     100
8  CNOBP  2025-04-15  21.8500  21.8500  21.8500  21.8500     488
9  HROWL  2025-04-25  25.2900  25.2900  25.2900  25.2900     100


# Performance Summary

In [11]:
# Simple diagnostics
def performance_summary(start_time, num_tickers):
    total_seconds = time.time() - start_time
    seconds_per_ticker = total_seconds / num_tickers
    estimated_time_all = (seconds_per_ticker * len(stock_tickers)) / 60  # in minutes

    print(f"\nProcessed {num_tickers} tickers in {total_seconds:.2f} seconds.")
    print(f"Average time per ticker: {seconds_per_ticker:.2f} sec.")
    print(f"Estimated time for all tickers: {estimated_time_all:.2f} mins.")

# Example Usage:
start_time = time.time()
_ = fetch_alpaca_latest_bars(alpaca, subset_tickers[:10])
performance_summary(start_time, 10)


Processed 10 tickers in 0.05 seconds.
Average time per ticker: 0.00 sec.
Estimated time for all tickers: 0.57 mins.
