# Make sure you Alpaca credentials are being read properly and you are connecting to Alpaca

In [None]:
# Standard 
from datetime import datetime, timedelta  # ~Date handling for time ranges
import time                              # ~Timing downloads for comparison
import sys                               # ~Path manipulation for project imports

# Non-standard Libraries 
import numpy as np                       # ~Numerical operations (~if needed later)
import pandas as pd                      # ~DataFrames for HLOC data
import yfinance as yf                    # ~yfinance for historical HLOC
from tqdm import tqdm                    # ~Progress bar for real-time feedback

# Alpaca API
from alpaca_trade_api.rest import REST   # ~Alpaca REST client for API calls

# Suppress Warnings (~Optional ~Cleaner output)
import warnings
warnings.filterwarnings("ignore")  # ~Ignore non-critical warnings (~e.g., Pandas)

## Read and print your credentials from the .secrets file

In [None]:
# Project-specific Imports (~Custom to stat_656_autotrader)
sys.path.append("d:/dev/stat_656_autotrader")  # ~Add project root to sys.path
from credentials import API_KEY, SECRET_KEY, ENDPOINT_URL  # ~Alpaca API credentials

#print(f"API_KEY: {API_KEY}, SECRET_KEY: {SECRET_KEY}, ENDPOINT_URL: {ENDPOINT_URL}")

## Connect to Alpaca

In [None]:
# Initialize Alpaca REST client
try:
    alpaca = REST(API_KEY, SECRET_KEY, base_url=ENDPOINT_URL)
    print("Connected to Alpaca successfully!")

except Exception as e:
    print(f"Connection failed: {e}")

Connected to Alpaca successfully!


## Get a list of all Alpaca stock tickers traded on the NYSE and NASDAQ

In [None]:
# Get all assets and filter to stocks by exchange (NASDAQ, NYSE)
assets = alpaca.list_assets(status=None)
stock_assets = [asset for asset in assets if asset.exchange in ['NASDAQ', 'NYSE']]
stock_tickers = [asset.symbol for asset in stock_assets]
print(f"Fetched {len(stock_tickers)} stock tickers from Alpaca (NASDAQ/NYSE)!")
print("First 10 stock tickers:", stock_tickers[:10])

Fetched 9717 stock tickers from Alpaca (NASDAQ/NYSE)!
First 10 stock tickers: ['AMGP', '464ESC0G8', 'KNL', 'TVPT', '5326esc96', 'SCACU', 'ETP', 'FTIVU', 'VOSOU', 'CVRR']


## Test latency and data quality between YFinance vs. Alpaca

In [5]:
# Subset (150 tickers test)
subset_tickers = stock_tickers[:150]

# Date range 
years_back = 10  # User can change this
end_date = datetime.now().strftime("%Y-%m-%d")
start_date = (datetime.now() - timedelta(days=years_back * 365)).strftime("%Y-%m-%d")
print(f"Date range: {start_date} to {end_date}")

Date range: 2015-03-24 to 2025-03-21


### Fetch time for YFinance

In [None]:
yfinance_df = pd.DataFrame()
start_time = time.time()
yf_missing_data = 0

for ticker in tqdm
(subset_tickers, desc="yfinance Download Progress"):

    # Fetch stock price data from the Yahoo Finance API
    stock = yf.Ticker(ticker)
    df = stock.history(start=start_date, end=end_date, interval="1d")

    if not df.empty:
        
        # Ensure the pricing data go back far enough. If not, skip the ticker and count it as missing data.
        if len(df) < years_back:
            yf_missing_data += 1
            pass

        else:
            df = df[['Open', 'High', 'Low', 'Close']].reset_index()
            df.columns = ['timestamp', 'open', 'high', 'low', 'close']
            df['ticker'] = ticker
            df = df[['ticker', 'timestamp', 'open', 'high', 'low', 'close']]
            yfinance_df = pd.concat([yfinance_df, df])

yfinance_time = time.time() - start_time

display(df.head(10))
print(f"yfinance: Fetched {len(yfinance_df)} rows in {yfinance_time:.2f} seconds")
print("yfinance Sample:", yfinance_df.head())
print(f"yfinance: Missing data for {yf_missing_data} tickers.")


NameError: name 'pd' is not defined

### Fetch time for Alpaca

In [None]:
alpaca_df = pd.DataFrame()
start_time = time.time()
alp_missing_data = 0

for ticker in tqdm(subset_tickers, desc="Alpaca Download Progress"):
    
    # Fetch stock price data from the Alpaca API
    bars = alpaca.get_bars(ticker, "1Day", start_date, end_date).df

    if not bars.empty:
        df = bars[['open', 'high', 'low', 'close']].reset_index()

        # Ensure the pricing data go back far enough. If not, skip the ticker and count it as missing data.
        if len(df) < years_back:
            alp_missing_data += 1
            pass
        
        else:
            df['ticker'] = ticker
            df = df[['ticker', 'timestamp', 'open', 'high', 'low', 'close']]
            alpaca_df = pd.concat([alpaca_df, df])
    time.sleep(1)  # ~Rate limit (~1 call/sec ~free tier)
alpaca_time = time.time() - start_time
print(f"Alpaca: Fetched {len(alpaca_df)} rows in {alpaca_time:.2f} seconds")
print("Alpaca Sample:", alpaca_df.head())
print(f"Alpaca: Missing data for {alp_missing_data} tickers.")