# Testing Data Fetch Time and Alpaca Connection. 

In [None]:
# Standard libraries
from datetime import datetime, timedelta
import time
import sys

# Non-standard libraries
import pandas as pd
from tqdm import tqdm

# Alpaca API
from alpaca_trade_api.rest import REST
import yf
# Suppress warnings
import warnings
warnings.filterwarnings("ignore")

# Project-specific imports
sys.path.append(r"d:\dev\stat_656_autotrader")
from credentials import ALPACA_API_KEY, ALPACA_SECRET_KEY, ALPAKA_ENDPOINT_URL

## Remove the '#' and uncomment the code to print your credentials from the .secrets file

In [None]:
#print(f"API_KEY: {API_KEY}, SECRET_KEY: {SECRET_KEY}, ENDPOINT_URL: {ENDPOINT_URL}")

## Connect to Alpaca

In [None]:
# Initialize Alpaca REST client
try:
    alpaca = REST(ALPACA_API_KEY, ALPACA_SECRET_KEY, base_url=ALPAKA_ENDPOINT_URL)
    print("Connected to Alpaca successfully!")

except Exception as e:
    print(f"Connection failed: {e}")

## Get a list of all Alpaca stock tickers traded on (NASDAQ, NYSE, AMEX)

In [None]:
# API call for Alpaca assets
assets = alpaca.list_assets(status='active')  # Consider only active assets

# All US exchanges (NASDAQ, NYSE)
us_exchanges = ['NASDAQ', 'NYSE', 'AMEX']

# Filter to company stocks only (Uppercase letters only, no numbers, periods, underscores, lowercase)
stock_assets = [
    asset for asset in assets
    if asset.exchange in us_exchanges
    and asset.tradable
    and asset.status == 'active'
    and asset.symbol.isalpha()
    and asset.symbol.isupper()
]

stock_tickers = [asset.symbol for asset in stock_assets]

print(f"Fetched {len(stock_tickers)} company stock tickers from Alpaca (U.S. exchanges, no CUSIPs/ETFs/SPACs)!")
print("First 20 company stock tickers:", stock_tickers[:20])

    

## Configure Variables for the Tests

In [None]:
years_back = 5  # Number of years of historical data required
subset_size = 175  # Number of tickers to process in this run
subset_tickers = stock_tickers[:subset_size]  # Select the first 50 tickers
start_date = "2002-01-01"  # Start date for data fetching

# End date for data fetching
end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")  # End date (yesterday)
trading_days_back = years_back * 252  # Approximate number of trading days per year (252)
print(f"Fetching OHLC from {start_date} to {end_date} (~threshold: {trading_days_back} days)")

## Test the fetch time for Alpaca for all assets in the NASDAQ, NYSE, and AMEX

In [None]:
# Initialize dataframe and performance tracking variables
alpaca_df = pd.DataFrame()
start_time = time.time()
missing_data_count = 0  # Counter for tickers with no data
not_enough_time_count = 0  # Counter for tickers with insufficient data
processed_ticker_count = 0  # Counter for processed tickers

# Loop through each ticker to fetch historical stock data from Alpaca
for ticker in tqdm(subset_tickers, desc="Alpaca Download Progress"):
    try:
        processed_ticker_count += 1  # Increment the processed ticker counter
        bars = alpaca.get_bars(ticker, "1Day", start_date, end_date).df  # Fetch daily OHLC data

        # Check if any data was retrieved
        if not bars.empty:
            # Extract relevant OHLC data and reset the DataFrame index
            df = bars[['open', 'high', 'low', 'close']].reset_index()

            # Check if retrieved data covers the required number of years
            if len(df) < trading_days_back:
                missing_data_count += 1
                not_enough_time_count += 1

                #print(f"Not enough trading days. Skipped {ticker} (~{len(df)} < {trading_days_back} days)")
                continue

            # Add ticker and formatted date columns for clarity
            df['ticker'] = ticker
            df['date'] = df['timestamp'].dt.strftime("%Y-%m-%d")

            # Reorder columns
            df = df[['ticker', 'date', 'open', 'high', 'low', 'close']]

            # Append this ticker's data to the main DataFrame
            alpaca_df = pd.concat([alpaca_df, df])
        else:
            # If no data was retrieved, increment the missing data counter
            missing_data_count += 1
            print(f"No data for {ticker}")

        # Wait briefly to prevent API rate limiting
        time.sleep(1)

    except Exception as e:
        # Handle and log any exceptions during data retrieval
        print(f"Error for {ticker}: {e}")
        time.sleep(1)

# Calculate total processing time and performance metrics
total_seconds = time.time() - start_time
seconds_per_ticker = round((total_seconds / len(subset_tickers)), 2)
estimated_total_time = round(seconds_per_ticker * len(stock_tickers) / 60)

# Output performance summary and sample data
print(f"\nFetched {len(alpaca_df)} rows of stock data in {total_seconds:.2f} seconds.")
print(f"Processed {processed_ticker_count} tickers, missing {missing_data_count} tickers, {not_enough_time_count} tickers did not have enough time.")
print(f"Processing time per ticker: {seconds_per_ticker:.2f} seconds. Estimated processing time for all stocks in the dataset: {estimated_total_time} minutes.")
print("\nExample Table Data:\n")
print("Alpaca Sample:", display(alpaca_df.head(30)))


## Testing the Fetch Time for Yesterday's (OHLC) (One Day Only)

In [None]:
yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d") # RFC3339 format - date in the format required for Alpaca API

start_time = time.time()  # Start timer for data fetching
daily_df = pd.DataFrame()  # Initialize DataFrame to store OHLC data

# Loop through tickers to fetch OHLC data for yesterday
for ticker in tqdm(subset_tickers, desc="Fetching Yesterday's OHLC"):
    try:
        bars = alpaca.get_bars(ticker, "1Day", yesterday, yesterday).df  # Fetch daily OHLC data for yesterday
        
        if not bars.empty:
            bars.reset_index(inplace=True)  # Reset DataFrame index for convenience
            ohlc = bars[['timestamp', 'open', 'high', 'low', 'close']]  # Extract relevant columns
            ohlc['ticker'] = ticker  # Add ticker symbol
            ohlc['date'] = ohlc['timestamp'].dt.strftime("%Y-%m-%d")  # Format timestamp as date
            daily_df = pd.concat([daily_df, ohlc[['ticker', 'date', 'open', 'high', 'low', 'close']]])  # Append to main DataFrame
        else:
            print(f"No data for {ticker}")  # Log if no data available for ticker

        time.sleep(1)  # Brief pause to avoid API rate limits
        
    except Exception as e:
        print(f"Error fetching {ticker}: {e}")  # Log any exceptions during fetch
        time.sleep(1)

fetch_time = time.time() - start_time  # Calculate total fetching time
seconds_per_ticker = round((fetch_time / len(subset_tickers)), 2)  # Calculate average time per ticker
estimated_total_time = round(seconds_per_ticker * len(stock_tickers) / 60)  # Estimate total time for all tickers in minutes

# Output summary of fetch results
print(f"\nFetched OHLC data for yesterday ({yesterday}) in {fetch_time:.2f} seconds.")
print(f"Average processing time per ticker: {seconds_per_ticker:.2f} seconds.")
print(f"Estimated total processing time for all stocks: {estimated_total_time} minutes.")
print("\nSample fetched data:")
print(daily_df.head())


## Testing the Fetch Time for the Open Prices ONLY in the Morning 

In [None]:
# Today's date
today = datetime.now().strftime("%Y-%m-%d") # RFC3339 format - date in the format required for Alpaca API

# Market opens at 9:30 AM EST (or EDT, depending on daylight savings)
market_open = f"{today}T09:30:00-05:00"  
market_open_dt = pd.Timestamp(market_open)

start_time = time.time()
open_prices_df = pd.DataFrame()

# Loop to fetch today's opening prices for each ticker
for ticker in tqdm(subset_tickers, desc="Fetching Today's Open Prices"):
    try:
        # Fetching the first minute bar (market open price)
        bars = alpaca.get_bars(
            ticker, 
            "1Min", 
            market_open_dt.isoformat(), 
            (market_open_dt + timedelta(minutes=1)).isoformat()
        ).df
        
        if not bars.empty:
            bars.reset_index(inplace=True)
            open_price = bars.iloc[0]['open']
            
            # Append opening price data to DataFrame
            open_prices_df = pd.concat([
                open_prices_df, 
                pd.DataFrame({
                    'ticker': [ticker],
                    'date': [today],
                    'open': [open_price]
                })
            ])
        # else:
        #     print(f"No open data for {ticker}")

        time.sleep(1)  # Pause to avoid API rate limits
        
    except Exception as e:
        print(f"Error fetching {ticker}: {e}")
        time.sleep(1)

# Performance metrics
fetch_time = time.time() - start_time
seconds_per_ticker = round((fetch_time / len(subset_tickers)), 2)
estimated_total_time = round(seconds_per_ticker * len(stock_tickers) / 60)

print(f"\nFetched today's open prices ({today}) in {fetch_time:.2f} seconds.")
print(f"Average processing time per ticker: {seconds_per_ticker:.2f} seconds.")
print(f"Estimated total processing time for all stocks: {estimated_total_time} minutes.\n")

print(open_prices_df.head())

## Batch Fetching for One Day

In [None]:
# Get today's date in the format required for Alpaca API
today = datetime.now().strftime("%Y-%m-%d")
market_open_dt = pd.Timestamp(f"{today}T09:30:00-05:00").isoformat()

# Fetch real-time opening prices for multiple tickers simultaneously
try:
    bars = alpaca.get_latest_bars(subset_tickers)  # Passes subset_tickers list directly

    # Parse results into a DataFrame
    data = []
    for ticker, bar in bars.items():
        data.append({
            'ticker': ticker,
            'date': bar.t.strftime("%Y-%m-%d"),
            'open': bar.o,
            'high': bar.h,
            'low': bar.l,
            'close': bar.c,
            'volume': bar.v
        })

    real_time_df = pd.DataFrame(data)
    print("Real-Time Opening Prices (Market Open):")
    print(real_time_df.head())

except Exception as e:
    print(f"Error fetching real-time data: {e}")