In [None]:
# Cell 2: Import required libraries
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
from zipline.api import attach_pipeline, pipeline_output, order_target_percent, record
from zipline.pipeline import Pipeline
from zipline.pipeline.factors import AverageDollarVolume, CustomFactor
from zipline import run_algorithm
from zipline.data.data_portal import DataPortal
from zipline.utils.calendars import get_calendar
from datetime import datetime
import pytz
from trading_calendars import get_calendar



In [1]:
# Cell 3: Download stock data using yfinance
# Define the tickers and download their historical data
tickers = ['AAPL', 'MSFT', 'GOOG', 'AMZN']
data = yf.download(tickers, start='2014-01-01', end='2014-12-31')

# Display the data to ensure it's properly downloaded
data.head()


NameError: name 'yf' is not defined

In [None]:
# Cell 4: Prepare Yahoo Finance data for Zipline
# Adjust the column structure to match Zipline's expected OHLCV format
ohlcv_data = data['Adj Close']
volume_data = data['Volume']

# Ensure columns match the symbol names Zipline expects (lowercase)
ohlcv_data.columns = [symbol.lower() for symbol in ohlcv_data.columns]

# Convert index to DatetimeIndex if needed
ohlcv_data.index = pd.to_datetime(ohlcv_data.index)
volume_data.index = pd.to_datetime(volume_data.index)

# Display the first few rows to confirm the format
ohlcv_data.head()


In [2]:
# Cell 5: Register Yahoo Finance bundle in Zipline
# Custom bundle function to register the Yahoo Finance data
from zipline.data.bundles import register

def yahoo_finance_bundle(environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, show_progress, output_dir):
    assets = pd.DataFrame(index=pd.Index(tickers, name='symbol'))
    
    # Write daily OHLCV data to the bundle
    daily_bar_writer.write(
        ohlcv_data, 
        assets.index, 
        calendar.sessions_in_range(start_session, end_session)
    )

# Register the custom bundle under the name 'yahoo-finance'
register('yahoo-finance', yahoo_finance_bundle)


<function __main__.yahoo_finance_bundle(environ, asset_db_writer, minute_bar_writer, daily_bar_writer, adjustment_writer, calendar, start_session, end_session, cache, show_progress, output_dir)>

In [3]:
# Cell 6: Define factors and pipeline
class MeanReversion(CustomFactor):
    inputs = [USEquityPricing.close]
    window_length = 10

    def compute(self, today, assets, out, close_prices):
        short_ma = close_prices[-5:].mean(axis=0)
        long_ma = close_prices.mean(axis=0)
        out[:] = long_ma - short_ma

# Define constants for the strategy
N_LONGS = 10
N_SHORTS = 10
VOL_SCREEN = 500

def compute_factors():
    """Create the factor pipeline for mean reversion and filter by 30-day Dollar Volume"""
    mean_reversion = MeanReversion()
    dollar_volume = AverageDollarVolume(window_length=30)
    return Pipeline(
        columns={
            'longs': mean_reversion.bottom(N_LONGS),
            'shorts': mean_reversion.top(N_SHORTS),
            'ranking': mean_reversion.rank(ascending=False)
        },
        screen=dollar_volume.top(VOL_SCREEN)
    )


NameError: name 'USEquityPricing' is not defined

In [4]:
# Cell 7: Initialize the backtest
def initialize(context):
    """Initialize the algorithm by attaching the factor pipeline"""
    pipeline = compute_factors()
    attach_pipeline(pipeline, 'factor_pipeline')


In [5]:
# Cell 8: Define before_trading_start function
def before_trading_start(context, data):
    """Run the factor pipeline before trading starts"""
    context.factor_data = pipeline_output('factor_pipeline')
    
    # Record the factor ranking and asset prices
    record(factor_data=context.factor_data['ranking'])
    assets = context.factor_data.index
    record(prices=data.current(assets, 'price'))


In [6]:
# Cell 9: Run the backtest
start = pd.Timestamp('2014-01-02', tz='UTC')
end = pd.Timestamp('2014-12-31', tz='UTC')

results = run_algorithm(
    start=start,
    end=end,
    initialize=initialize,
    before_trading_start=before_trading_start,
    capital_base=10000,
    bundle='yahoo-finance',  # Use the custom Yahoo Finance bundle
    trading_calendar=get_calendar('XNYS')  # NYSE calendar
)


NameError: name 'get_calendar' is not defined

In [None]:
# Cell 10: Plot the portfolio value
results.portfolio_value.plot(figsize=(10, 6))
plt.title('Portfolio Value Over Time')
plt.show()
