In [3]:
# pip install alpaca-trade-api xgboost pandas numpy scikit-learn joblib

import pandas as pd
import numpy as np
import time
import os
import joblib
from datetime import datetime, timedelta, timezone
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from alpaca_trade_api.rest import REST, TimeFrame

# --- Config ---
API_KEY = 'PKY9SSRUW3SSCB6UQK7K'
SECRET_KEY = 'N8JoPWWLu7jFomZJIjmJHQhPRNmtvG44eoVFYnKJ'
BASE_URL = 'https://paper-api.alpaca.markets'
SYMBOLS = ['AAPL', 'MSFT', 'TSLA', 'NVDA', 'GOOG', 'AMZN', 'META']
TRADE_QTY = 1
STOP_LOSS_PCT = 0.10  # 10% stop loss for training
LOG_FILE = 'trade_log.csv'
DATA_FILE = 'master_data.csv'
MODEL_FILE = 'xgb_model.joblib'

api = REST(API_KEY, SECRET_KEY, BASE_URL)

# --- Check if market is open ---
def is_market_open():
    return api.get_clock().is_open

# --- Get position ---
def get_position(symbol):
    try:
        pos = api.get_position(symbol)
        return int(pos.qty), float(pos.avg_entry_price)
    except:
        return 0, 0.0

# --- Fetch 5-min bars for the past 'days' days ---
def get_data(symbol, days=5):
    now = datetime.now(timezone.utc)
    start = now - timedelta(days=days)
    try:
        bars = api.get_bars(
            symbol,
            '5Min',
            start=start.isoformat(),
            end=now.isoformat()
        ).df
        bars = bars.reset_index()
        bars['symbol'] = symbol
        return bars
    except Exception as e:
        print(f"{symbol} data error: {e}")
        return pd.DataFrame()


# --- Feature engineering ---
def add_features(df):
    df = df.sort_values('timestamp')
    df['return'] = df['close'].pct_change()
    df['volatility'] = df['return'].rolling(window=5).std()
    df['ma'] = df['close'].rolling(window=5).mean()
    df['ma_diff'] = df['close'] - df['ma']
    df['target'] = (df['close'].shift(-1) > df['close']).astype(int)
    df.dropna(inplace=True)
    return df

# --- Load or create master dataset ---
def load_master_data():
    if os.path.exists(DATA_FILE):
        master_df = pd.read_csv(DATA_FILE, parse_dates=['timestamp'])
        return master_df
    else:
        return pd.DataFrame()

# --- Save master dataset ---
def save_master_data(df):
    df.to_csv(DATA_FILE, index=False)

# --- Train or load model ---
def train_or_load_model(master_df):
    if os.path.exists(MODEL_FILE):
        model = joblib.load(MODEL_FILE)
        print("Model loaded from disk.")
        return model
    else:
        print("Training new model...")
        master_df = add_features(master_df)
        X = master_df[['return', 'volatility', 'ma_diff']]
        y = master_df['target']
        X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, test_size=0.2)
        model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
        model.fit(X_train, y_train)
        print(f"Model trained. Accuracy: {(model.score(X_test, y_test)*100):.2f}%")
        joblib.dump(model, MODEL_FILE)
        print("Model saved to disk.")
        return model

# --- Update master data with latest bars for all symbols ---
def update_master_data(master_df):
    new_data = []
    for symbol in SYMBOLS:
        df = get_data(symbol)
        if not df.empty:
            new_data.append(df)
    if new_data:
        new_df = pd.concat(new_data, ignore_index=True)
        combined = pd.concat([master_df, new_df], ignore_index=True)
        combined.drop_duplicates(subset=['timestamp', 'symbol'], inplace=True)
        combined.sort_values(['symbol', 'timestamp'], inplace=True)
        return combined
    return master_df

# --- Trade logic ---
def trade(symbol, model, master_df):
    df = master_df[master_df['symbol'] == symbol].copy()
    df = add_features(df)
    if df.empty:
        print(f"No data to trade {symbol}")
        return
    features = df[['return', 'volatility', 'ma_diff']].iloc[[-1]]
    prediction = model.predict(features)[0]
    price = df['close'].iloc[-1]
    qty, avg_price = get_position(symbol)

    # Stop-loss check
    if qty > 0 and price < avg_price * (1 - STOP_LOSS_PCT):
        try:
            api.submit_order(symbol=symbol, qty=qty, side='sell', type='market', time_in_force='gtc')
            print(f"STOP-LOSS SELL: {symbol} @ {price:.2f}")
            log_trade(symbol, 'stop-loss', price)
            return
        except Exception as e:
            print(f"Stop-loss error for {symbol}: {e}")
            return

    # Buy or sell logic
    if prediction == 1 and qty == 0:
        try:
            api.submit_order(symbol=symbol, qty=TRADE_QTY, side='buy', type='market', time_in_force='gtc')
            print(f"BUY: {symbol} @ {price:.2f}")
            log_trade(symbol, 'buy', price)
        except Exception as e:
            print(f"Buy error for {symbol}: {e}")
    elif prediction == 0 and qty > 0:
        try:
            api.submit_order(symbol=symbol, qty=qty, side='sell', type='market', time_in_force='gtc')
            print(f"SELL: {symbol} @ {price:.2f}")
            log_trade(symbol, 'sell', price)
        except Exception as e:
            print(f"Sell error for {symbol}: {e}")
    else:
        print(f"HOLD: {symbol} @ {price:.2f}")

# --- Check stop-loss for all open positions every 30 seconds ---
def check_stop_losses():
    positions = api.list_positions()
    for pos in positions:
        symbol = pos.symbol
        qty = int(pos.qty)
        entry = float(pos.avg_entry_price)
        current = float(pos.current_price)
        if current < entry * (1 - STOP_LOSS_PCT):
            try:
                api.submit_order(symbol=symbol, qty=qty, side='sell', type='market', time_in_force='gtc')
                print(f"STOP-LOSS SELL: {symbol} @ {current:.2f}")
                log_trade(symbol, 'stop-loss', current)
            except Exception as e:
                print(f"Stop-loss error for {symbol}: {e}")

# --- Log trades ---
def log_trade(symbol, action, price):
    now = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
    equity = float(api.get_account().equity)
    cash = float(api.get_account().cash)
    row = {
        'time': now,
        'symbol': symbol,
        'action': action,
        'price': price,
        'equity': equity,
        'cash': cash
    }
    df = pd.DataFrame([row])
    if os.path.exists(LOG_FILE):
        df.to_csv(LOG_FILE, mode='a', header=False, index=False)
    else:
        df.to_csv(LOG_FILE, index=False)

# --- Main run loop ---
def main():
    print("Starting trading bot...")

    master_df = load_master_data()

    last_model_train = None
    model = None

    try:
        while is_market_open():
            now = datetime.now(timezone.utc)

            # Update master data
            master_df = update_master_data(master_df)

            # Train model once per day (e.g. after 3:59pm ET)
            if (last_model_train is None or
                (now - last_model_train).total_seconds() > 24*3600):
                print(f"Training model at {now}")
                model = train_or_load_model(master_df)
                last_model_train = now
                save_master_data(master_df)

            # Trade top 5 symbols by model predicted probability
            if model is not None:
                preds = []
                for symbol in SYMBOLS:
                    df = master_df[master_df['symbol'] == symbol].copy()
                    if df.empty:
                        continue
                    df = add_features(df)
                    features = df[['return', 'volatility', 'ma_diff']].iloc[[-1]]
                    prob = model.predict_proba(features)[0][1]
                    preds.append((symbol, prob))
                top5 = sorted(preds, key=lambda x: x[1], reverse=True)[:5]
                for sym, _ in top5:
                    trade(sym, model, master_df)

            # Check stop losses fast every 30 seconds
            check_stop_losses()

            print("Cycle complete. Sleeping 30 seconds...\n")
            time.sleep(30)

        print("Market closed. Stopping bot.")

    except KeyboardInterrupt:
        print("Bot stopped by user.")

if __name__ == '__main__':
    main()


Starting trading bot...
AAPL data error: subscription does not permit querying recent SIP data
MSFT data error: subscription does not permit querying recent SIP data
TSLA data error: subscription does not permit querying recent SIP data
NVDA data error: subscription does not permit querying recent SIP data
GOOG data error: subscription does not permit querying recent SIP data
AMZN data error: subscription does not permit querying recent SIP data
META data error: subscription does not permit querying recent SIP data
Training model at 2025-06-20 18:08:48.542112+00:00
Training new model...


KeyError: 'timestamp'