In [1]:
!pip install polars-talib
!pip install alpaca-trade-api
!pip install yfinance
!pip install -U polygon-api-client



In [2]:
from polygon import RESTClient
import polars as pl
import polars_talib as plta
from datetime import datetime, timedelta
import lumibot

from lumibot.backtesting import BacktestingBroker, PolygonDataBacktesting
from lumibot.strategies import Strategy
from lumibot.traders import Trader
from indicators.IndicatorFactory import IndicatorFactory



##### Lumibot Implementation Example

In [3]:
class MyStrategy(Strategy):
    parameters = {
        "symbol": "AAPL",
    }

    def initialize(self):
        self.sleeptime = "1M"

    def on_trading_iteration(self):
        if self.first_iteration:
            symbol = self.parameters["symbol"]
            price = self.get_last_price(symbol)
            qty = self.portfolio_value / price
            order = self.create_order(symbol, quantity=qty, side="buy")
            self.submit_order(order)

In [4]:
backtesting_start = datetime(2025, 1, 1)
backtesting_end = datetime(2025, 1, 31)
polygon_key = "8XvjiWID5oKnXHWl2O0n7Pc6iyAoyKG3"

result = MyStrategy.run_backtest(
    PolygonDataBacktesting,
    backtesting_start,
    backtesting_end,
    polygon_api_key=polygon_key,
    benchmark_asset="SPY")

### Polars DataFrame Example

In [5]:
import polars as pl

data = {
    "name": ["Alice", "Bob", "Charlie"],
    "age": [25, 30, 35],
    "city": ["New York", "London", "Paris"]
}
df = pl.DataFrame(data)
print(df)

shape: (3, 3)
┌─────────┬─────┬──────────┐
│ name    ┆ age ┆ city     │
│ ---     ┆ --- ┆ ---      │
│ str     ┆ i64 ┆ str      │
╞═════════╪═════╪══════════╡
│ Alice   ┆ 25  ┆ New York │
│ Bob     ┆ 30  ┆ London   │
│ Charlie ┆ 35  ┆ Paris    │
└─────────┴─────┴──────────┘


#### Get stock data into Polars DF

In [6]:
def get_stock_data_polygon(api_key, symbols=["AAPL", "MSFT", "GOOG"], start_date="2024-06-01", end_date="2024-06-30"):
    """
    Fetch stock data using Polygon REST client for multiple symbols
    
    Args:
        api_key (str): Polygon API key
        symbols (list): List of stock symbols to fetch
        start_date (str): Start date in YYYY-MM-DD format
        end_date (str): End date in YYYY-MM-DD format
    
    Returns:
        dict: Dictionary with symbol as key and Polars DataFrame as value
    """
    client = RESTClient(api_key)
    stock_data = {}
    
    for symbol in symbols:
        try:
            # Get aggregates (bars) data
            aggs = client.get_aggs(
                ticker=symbol,
                multiplier=1,
                timespan="minute",
                from_=start_date,
                to=end_date,
                limit=50000
            )
            
            # Convert to list of dictionaries
            data_list = []
            for agg in aggs:
                data_list.append({
                    "timestamp": datetime.fromtimestamp(agg.timestamp / 1000),
                    "open": agg.open,
                    "high": agg.high,
                    "low": agg.low,
                    "close": agg.close,
                    "volume": agg.volume,
                    "vwap": agg.vwap,
                    "symbol": symbol
                })
            
            # Create Polars DataFrame
            if data_list:
                df = pl.DataFrame(data_list)
                # Sort by timestamp
                df = df.sort("timestamp")
                stock_data[symbol] = df
                print(f"✅ Successfully fetched {len(df)} records for {symbol}")
            else:
                print(f"❌ No data found for {symbol}")
                
        except Exception as e:
            print(f"❌ Error fetching data for {symbol}: {str(e)}")
    
    return stock_data

# Example usage:
api_key = "8XvjiWID5oKnXHWl2O0n7Pc6iyAoyKG3"  # Your Polygon API key
stock_data = get_stock_data_polygon(api_key)

# Display sample data for each symbol
for symbol, df in stock_data.items():
    print(f"\n📊 {symbol} Data Sample:")
    print(df.head())
    print(f"Shape: {df.shape}")
    print(f"Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")

✅ Successfully fetched 15147 records for AAPL
✅ Successfully fetched 11077 records for MSFT
✅ Successfully fetched 10252 records for GOOG

📊 AAPL Data Sample:
shape: (5, 8)
┌─────────────────────┬────────┬────────┬────────┬────────┬────────┬──────────┬────────┐
│ timestamp           ┆ open   ┆ high   ┆ low    ┆ close  ┆ volume ┆ vwap     ┆ symbol │
│ ---                 ┆ ---    ┆ ---    ┆ ---    ┆ ---    ┆ ---    ┆ ---      ┆ ---    │
│ datetime[μs]        ┆ f64    ┆ f64    ┆ f64    ┆ f64    ┆ i64    ┆ f64      ┆ str    │
╞═════════════════════╪════════╪════════╪════════╪════════╪════════╪══════════╪════════╡
│ 2024-06-03 04:00:00 ┆ 192.45 ┆ 193.14 ┆ 192.45 ┆ 192.77 ┆ 3376   ┆ 192.8161 ┆ AAPL   │
│ 2024-06-03 04:01:00 ┆ 192.69 ┆ 192.69 ┆ 192.69 ┆ 192.69 ┆ 1028   ┆ 192.6405 ┆ AAPL   │
│ 2024-06-03 04:02:00 ┆ 192.68 ┆ 192.7  ┆ 192.68 ┆ 192.7  ┆ 6174   ┆ 192.7006 ┆ AAPL   │
│ 2024-06-03 04:03:00 ┆ 192.73 ┆ 192.78 ┆ 192.73 ┆ 192.74 ┆ 2982   ┆ 192.7456 ┆ AAPL   │
│ 2024-06-03 04:04:00 ┆ 19

In [7]:
# Combine all stock data into a single DataFrame
def combine_stock_data(stock_data):
    """
    Combine multiple stock DataFrames into a single DataFrame
    
    Args:
        stock_data (dict): Dictionary with symbol as key and DataFrame as value
    
    Returns:
        pl.DataFrame: Combined DataFrame with all stocks
    """
    combined_dfs = []
    for symbol, df in stock_data.items():
        combined_dfs.append(df)
    
    if combined_dfs:
        return pl.concat(combined_dfs)
    else:
        return pl.DataFrame()

# Combine the data
combined_df = combine_stock_data(stock_data)
print(f"📈 Combined DataFrame shape: {combined_df.shape}")
print(f"Symbols included: {combined_df['symbol'].unique().to_list()}")


📈 Combined DataFrame shape: (36476, 8)
Symbols included: ['GOOG', 'MSFT', 'AAPL']


In [8]:
combined_df.with_columns(
    pl.col("close").ta.ema(5).over("symbol").alias("ema5"),
    pl.col("close").ta.macd(12, 26, 9).over("symbol").struct.field("macd"),
    pl.col("close").ta.macd(12, 26, 9).over("symbol").struct.field("macdsignal")
)

timestamp,open,high,low,close,volume,vwap,symbol,ema5,macd,macdsignal
datetime[μs],f64,f64,f64,f64,i64,f64,str,f64,f64,f64
2024-06-03 04:00:00,192.45,193.14,192.45,192.77,3376,192.8161,"""AAPL""",,,
2024-06-03 04:01:00,192.69,192.69,192.69,192.69,1028,192.6405,"""AAPL""",,,
2024-06-03 04:02:00,192.68,192.7,192.68,192.7,6174,192.7006,"""AAPL""",,,
2024-06-03 04:03:00,192.73,192.78,192.73,192.74,2982,192.7456,"""AAPL""",,,
2024-06-03 04:04:00,192.77,193.0,192.77,193.0,13024,192.8495,"""AAPL""",192.78,,
…,…,…,…,…,…,…,…,…,…,…
2024-06-28 19:54:00,183.95,183.95,183.95,183.95,745,183.9502,"""GOOG""",183.937732,0.025627,0.021646
2024-06-28 19:56:00,183.91,183.91,183.91,183.91,247,183.9203,"""GOOG""",183.928488,0.023129,0.021942
2024-06-28 19:57:00,183.95,183.96,183.95,183.96,636,183.9394,"""GOOG""",183.938992,0.024897,0.022533
2024-06-28 19:58:00,183.95,183.95,183.95,183.95,325,183.9524,"""GOOG""",183.942661,0.0252,0.023067


In [9]:
combined_df.with_columns(
    [plta.bbands(
        pl.col("close"),
        timeperiod=20,
        nbdevup=2,
        nbdevdn=2,
        matype=0
    ).over("symbol").alias("bbands"),
    plta.rsi(
        pl.col("close"),
        timeperiod=14
    ).over("symbol").alias("rsi"),
    ]
)

timestamp,open,high,low,close,volume,vwap,symbol,bbands,rsi
datetime[μs],f64,f64,f64,f64,i64,f64,str,struct[3],f64
2024-06-03 04:00:00,192.45,193.14,192.45,192.77,3376,192.8161,"""AAPL""","{NaN,NaN,NaN}",
2024-06-03 04:01:00,192.69,192.69,192.69,192.69,1028,192.6405,"""AAPL""","{NaN,NaN,NaN}",
2024-06-03 04:02:00,192.68,192.7,192.68,192.7,6174,192.7006,"""AAPL""","{NaN,NaN,NaN}",
2024-06-03 04:03:00,192.73,192.78,192.73,192.74,2982,192.7456,"""AAPL""","{NaN,NaN,NaN}",
2024-06-03 04:04:00,192.77,193.0,192.77,193.0,13024,192.8495,"""AAPL""","{NaN,NaN,NaN}",
…,…,…,…,…,…,…,…,…,…
2024-06-28 19:54:00,183.95,183.95,183.95,183.95,745,183.9502,"""GOOG""","{184.010061,183.891995,183.773929}",54.42429
2024-06-28 19:56:00,183.91,183.91,183.91,183.91,247,183.9203,"""GOOG""","{184.003072,183.898995,183.794918}",51.370453
2024-06-28 19:57:00,183.95,183.96,183.95,183.96,636,183.9394,"""GOOG""","{184.003744,183.906995,183.810246}",54.785711
2024-06-28 19:58:00,183.95,183.95,183.95,183.95,325,183.9524,"""GOOG""","{184.00791,183.910995,183.81408}",53.969345


In [10]:
# Create a sample DataFrame with OHLCV data
df = pl.DataFrame({
    'symbol': ['AAPL'] * 100,
    'open': [100 + i for i in range(100)],
    'high': [101 + i for i in range(100)],
    'low': [99 + i for i in range(100)],
    'close': [100 + i for i in range(100)],
    'volume': [1000000 + i*1000 for i in range(100)]
})

# Initialize the factory
factory = IndicatorFactory(combined_df)

# Get DataFrame with all indicators
result_df = factory.get_indicators()

In [11]:
result_df

timestamp,open,high,low,close,volume,vwap,symbol,sma_20,ema_20,rsi,bb_upper,bb_middle,bb_lower,atr,adx,obv,mfi,cci,vwap_calc,timestamp_prev,close_prev,symbol_prev,sma_20_prev,ema_20_prev,rsi_prev,bb_upper_prev,bb_middle_prev,bb_lower_prev,atr_prev,adx_prev,obv_prev,mfi_prev,cci_prev,vwap_calc_prev
datetime[μs],f64,f64,f64,f64,i64,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,datetime[μs],f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2024-06-03 04:00:00,192.45,193.14,192.45,192.77,3376,192.8161,"""AAPL""",,,,,,,,,3376.0,,,192.77,,,,,,,,,,,,,,,
2024-06-03 04:01:00,192.69,192.69,192.69,192.69,1028,192.6405,"""AAPL""",,,,,,,,,2348.0,,,192.751326,2024-06-03 04:00:00,192.77,"""AAPL""",,,,,,,,,3376.0,,,192.77
2024-06-03 04:02:00,192.68,192.7,192.68,192.7,6174,192.7006,"""AAPL""",,,,,,,,,8522.0,,,192.721369,2024-06-03 04:01:00,192.69,"""AAPL""",,,,,,,,,2348.0,,,192.751326
2024-06-03 04:03:00,192.73,192.78,192.73,192.74,2982,192.7456,"""AAPL""",,,,,,,,,11504.0,,,192.725466,2024-06-03 04:02:00,192.7,"""AAPL""",,,,,,,,,8522.0,,,192.721369
2024-06-03 04:04:00,192.77,193.0,192.77,193.0,13024,192.8495,"""AAPL""",,,,,,,,,24528.0,,,192.859965,2024-06-03 04:03:00,192.74,"""AAPL""",,,,,,,,,11504.0,,,192.725466
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2024-06-28 19:54:00,183.95,183.95,183.95,183.95,745,183.9502,"""GOOG""",183.891995,183.901757,54.42429,184.010061,183.891995,183.773929,0.054271,12.211181,9.284436e6,46.675952,86.302971,183.944876,2024-06-28 19:52:00,183.95,"""GOOG""",183.897985,183.896679,54.42429,184.037435,183.897985,183.758535,0.058445,11.860619,9.284436e6,51.319611,72.250208,183.938358
2024-06-28 19:56:00,183.91,183.91,183.91,183.91,247,183.9203,"""GOOG""",183.898995,183.902542,51.370453,184.003072,183.898995,183.794918,0.053251,12.018601,9.284189e6,43.264424,21.150001,183.948494,2024-06-28 19:54:00,183.95,"""GOOG""",183.891995,183.901757,54.42429,184.010061,183.891995,183.773929,0.054271,12.211181,9.284436e6,46.675952,86.302971,183.944876
2024-06-28 19:57:00,183.95,183.96,183.95,183.96,636,183.9394,"""GOOG""",183.906995,183.908014,54.785711,184.003744,183.906995,183.810246,0.053019,12.338527,9.284825e6,50.871007,86.392168,183.951277,2024-06-28 19:56:00,183.91,"""GOOG""",183.898995,183.902542,51.370453,184.003072,183.898995,183.794918,0.053251,12.018601,9.284189e6,43.264424,21.150001,183.948494
2024-06-28 19:58:00,183.95,183.95,183.95,183.95,325,183.9524,"""GOOG""",183.910995,183.912013,53.969345,184.00791,183.910995,183.81408,0.049946,12.635601,9.2845e6,38.385134,67.770582,183.948669,2024-06-28 19:57:00,183.96,"""GOOG""",183.906995,183.908014,54.785711,184.003744,183.906995,183.810246,0.053019,12.338527,9.284825e6,50.871007,86.392168,183.951277


In [None]:
yaml_strategy = 