This script sets up and runs a backtest using the Zipline library, leveraging both built-in and custom data bundles. The trading strategy is based on moving average crossovers for a single asset, AAPL. Finally, it runs the backtest over a specified time period and outputs performance metrics and visualizations

In [50]:
import warnings
import pandas as pd
import matplotlib.pyplot as plt
from openbb import obb
from zipline.data.bundles import register
from zipline.data.bundles.csvdir import csvdir_equities 
from zipline.data.bundles import load
from zipline.data.bundles.core import ingest
from zipline.utils.paths import data_root
from zipline import run_algorithm
from zipline.data import bundles
from zipline.api import order_target, order_target_percent, symbol
from zipline.finance import commission, slippage
warnings.filterwarnings("ignore")
obb.user.preferences.output_type = "dataframe"

In [51]:
# Get the data from Yahoo finance
stock = "AAPL"
data = obb.equity.price.historical(
    stock,
    start_date="2020-01-01",
    end_date="2024-12-31",
)
data.to_csv("AAPL.csv", index_label="date", float_format="%.6f")
start_session = pd.Timestamp("2020-01-01", tz='utc')
end_session = pd.Timestamp("2024-12-31", tz='utc')

## Backtesting

In [52]:
register(
    'custom-csvdir-bundle',
    csvdir_equities(
        ['daily'],
        'route_to_csv'
    ),
    calendar_name='NYSE'
    # Let zipline infer the start and end date
)

<bound method CSVDIRBundle.ingest of <zipline.data.bundles.csvdir.CSVDIRBundle object at 0x0000017630505E10>>

In [53]:
bundle_name = "custom-csvdir-bundle"
ingest(
    bundle_name
)

In [54]:
def initialize(context):
    context.i = 0
    context.asset = symbol("AAPL")
    context.set_commission(commission.PerShare(cost=0.01))
    context.set_slippage(slippage.FixedSlippage(spread=0.01))

In [76]:
def handle_data(context, data):
    context.i += 1
    if context.i < 100: # minimum periods to compute the moving average
        return

    short_ma = data.history(
        context.asset,
        "price",
        bar_count=21,
        frequency="1d"
    ).mean()

    long_ma = data.history(
        context.asset,
        "price",
        bar_count=63,
        frequency="1d"
    ).mean()

    if short_ma > long_ma:
        order_target_percent(context.asset, 0.15)
    elif short_ma < long_ma:
        order_target_percent(context.asset, -0.10)

In [82]:
perf = run_algorithm(
    start=pd.Timestamp("2020-01-02"),
    end=pd.Timestamp("2024-12-31"),
    initialize=initialize,
    handle_data=handle_data,
    capital_base=100_000,
    bundle="custom-csvdir-bundle"
)

In [83]:
perf.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1258 entries, 2020-01-02 21:00:00+00:00 to 2024-12-31 21:00:00+00:00
Data columns (total 37 columns):
 #   Column                   Non-Null Count  Dtype              
---  ------                   --------------  -----              
 0   period_open              1258 non-null   datetime64[ns, UTC]
 1   period_close             1258 non-null   datetime64[ns, UTC]
 2   pnl                      1258 non-null   float64            
 3   returns                  1258 non-null   float64            
 4   ending_cash              1258 non-null   float64            
 5   starting_value           1258 non-null   float64            
 6   transactions             1258 non-null   object             
 7   net_leverage             1258 non-null   float64            
 8   capital_used             1258 non-null   float64            
 9   short_exposure           1258 non-null   float64            
 10  orders                   1258 non-null   object 