# ML-Powered Trader Bot

Imported Libraries:

In [1]:
import json
import os
import pandas as pd
import yfinance as yf
from datetime import datetime
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

## Getting the stocks in the S&P 500
We simply scrape Wikipedia's list of S&P 500 stocks for the ticker symbols.

In [2]:
def get_sp500_tickers():
    url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
    table = pd.read_html(url)[0]
    return [ticker.replace(".","-") for ticker in table['Symbol'].tolist()]

print(get_sp500_tickers())

['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A', 'APD', 'ABNB', 'AKAM', 'ALB', 'ARE', 'ALGN', 'ALLE', 'LNT', 'ALL', 'GOOGL', 'GOOG', 'MO', 'AMZN', 'AMCR', 'AEE', 'AEP', 'AXP', 'AIG', 'AMT', 'AWK', 'AMP', 'AME', 'AMGN', 'APH', 'ADI', 'AON', 'APA', 'APO', 'AAPL', 'AMAT', 'APTV', 'ACGL', 'ADM', 'ANET', 'AJG', 'AIZ', 'T', 'ATO', 'ADSK', 'ADP', 'AZO', 'AVB', 'AVY', 'AXON', 'BKR', 'BALL', 'BAC', 'BAX', 'BDX', 'BRK-B', 'BBY', 'TECH', 'BIIB', 'BLK', 'BX', 'XYZ', 'BK', 'BA', 'BKNG', 'BSX', 'BMY', 'AVGO', 'BR', 'BRO', 'BF-B', 'BLDR', 'BG', 'BXP', 'CHRW', 'CDNS', 'CZR', 'CPT', 'CPB', 'COF', 'CAH', 'KMX', 'CCL', 'CARR', 'CAT', 'CBOE', 'CBRE', 'CDW', 'COR', 'CNC', 'CNP', 'CF', 'CRL', 'SCHW', 'CHTR', 'CVX', 'CMG', 'CB', 'CHD', 'CI', 'CINF', 'CTAS', 'CSCO', 'C', 'CFG', 'CLX', 'CME', 'CMS', 'KO', 'CTSH', 'COIN', 'CL', 'CMCSA', 'CAG', 'COP', 'ED', 'STZ', 'CEG', 'COO', 'CPRT', 'GLW', 'CPAY', 'CTVA', 'CSGP', 'COST', 'CTRA', 'CRWD', 'CCI', 'CSX', 'CMI', 'CVS', 'DHR', 'DRI', 'DDOG', '

## Creating our Trading Engine

The first step to creating our trading bot will be to simplify our variable usage by creating a trading class.

We are using machine learning to calculate two metrics:
1. Expected Return 
2. Volitility

We will use these to calculate the Sharpe ratio for each stock.


In [3]:
class TradingEngine:
    def __init__(self, balance=100_000):
        self.balance = balance
        self.portfolio = {}

    def fetch_data(self, ticker, period="1y", interval="1d"):
        df = yf.download(ticker, period=period, interval=interval, auto_adjust=True, progress=False)
        df.dropna(inplace=True)
        df['Return'] = df['Close'].pct_change()

        # Targets for ML
        df['ReturnTarget'] = df['Return'].shift(-1)                          # Predict tomorrow's return
        df['VolatilityTarget'] = df['Return'].rolling(5).std().shift(-1)    # Predict 5-day forward-looking volatility

        df.dropna(inplace=True)
        return df

    def train_models(self, df):
        features = ['Open', 'High', 'Low', 'Close', 'Volume']
        X = df[features]

        y_return = df['ReturnTarget']
        y_vol = df['VolatilityTarget']

        X_train, X_test, y_train_r, y_test_r = train_test_split(X, y_return, test_size=0.2, shuffle=False)
        _, _, y_train_v, y_test_v = train_test_split(X, y_vol, test_size=0.2, shuffle=False)


        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        model_return = RandomForestRegressor(n_estimators=100, random_state=42)
        model_vol = RandomForestRegressor(n_estimators=100, random_state=42)

        model_return.fit(X_train_scaled, y_train_r)
        model_vol.fit(X_train_scaled, y_train_v)

        return model_return, model_vol, scaler

    def predict_sharpe(self, model_return, model_vol, scaler, df):
        latest = df[['Open', 'High', 'Low', 'Close', 'Volume']].iloc[-1:]
        X_latest = scaler.transform(latest)

        pred_return = model_return.predict(X_latest)[0]
        pred_vol = model_vol.predict(X_latest)[0]

        # Avoid divide-by-zero
        if pred_vol == 0:
            return -999  # Very bad Sharpe score

        sharpe = pred_return / pred_vol
        return sharpe

    def rank_stocks(self, tickers):
        scores = []
        for ticker in tickers:
            try:
                df = self.fetch_data(ticker)
                model_return, model_vol, scaler = self.train_models(df)
                sharpe = self.predict_sharpe(model_return, model_vol, scaler, df)
                scores.append((ticker, sharpe))
            except Exception as e:
                print(f"Error with {ticker}: {e}")
        ranked = sorted(scores, key=lambda x: x[1], reverse=True)
        return ranked
    
    def save_rankings_for_alpaca(self, df_ranked):
        try:
            if not os.path.exists('shared_data'):
                os.makedirs('shared_data')
            
            rankings = []
            for _, row in df_ranked.iterrows():
                rankings.append({
                    'ticker': row['Ticker'],
                    'sharpe_ratio': float(row['SharpeRatio'])
                })
            
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f'shared_data/rankings_{timestamp}.json'
            
            with open(filename, 'w') as f:
                json.dump(rankings, f, indent=2)
            
            print(f"Rankings saved to {filename} for Alpaca trading")
            return filename
            
        except Exception as e:
            print(f"Error saving rankings: {e}")
            return None

## Main File

In [4]:
if __name__ == "__main__":
    tickers = get_sp500_tickers()
    engine = TradingEngine()

    ranked_stocks = engine.rank_stocks(tickers[:50])
    
    df_ranked = pd.DataFrame(ranked_stocks, columns=['Ticker', 'SharpeRatio'])
    df_ranked = df_ranked.sort_values(by='SharpeRatio', ascending=False)
    df_ranked = df_ranked.head(10)
    
    print(df_ranked)

    engine.save_rankings_for_alpaca(df_ranked)

  Ticker  SharpeRatio
0   ALGN     1.548071
1    ACN     0.928442
2    AFL     0.525377
3    APA     0.455604
4    AJG     0.451385
5    AMP     0.289236
6   AMGN     0.285553
7   ACGL     0.276913
8    ADI     0.260045
9   AKAM     0.258204
Rankings saved to shared_data/rankings_20250804_215843.json for Alpaca trading
