In [1]:
import pandas as pd
import numpy as np
import backtest

In [2]:
np.random.seed(123)  # for reproducibility

In [3]:
# Mock datetime index (same as prices_df)
datetime_index = pd.date_range(start='2024-01-01', end='2024-12-31', freq='D')

# Mock asset symbols
assets = ['Asset_1', 'Asset_2', 'Asset_3', 'Asset_4', 'Asset_5']

# Generate random signal values for each asset and datetime
signal_data = np.random.rand(len(datetime_index) * len(assets)).reshape(len(datetime_index), len(assets))

# Create signals DataFrame with MultiIndex
index = pd.MultiIndex.from_product([datetime_index, assets], names=['datetime', 'asset'])
signals_df = pd.DataFrame(signal_data.flatten(), index=index, columns=['signal'])

print(signals_df.head())

                      signal
datetime   asset            
2024-01-01 Asset_1  0.696469
           Asset_2  0.286139
           Asset_3  0.226851
           Asset_4  0.551315
           Asset_5  0.719469


In [4]:
def generate_price_series(start_date, end_date, num_assets, mu, sigma, initial_price=100):
    # Generate dates
    dates = pd.date_range(start=start_date, end=end_date, freq='D')

    # Generate random shocks
    dt = 1  # Time step
    epsilon = np.random.normal(0, 1, size=(len(dates), num_assets))

    # Initialize prices DataFrame
    prices_df = pd.DataFrame(index=dates)

    # Calculate prices using Geometric Brownian Motion
    for i in range(num_assets):
        asset_name = f'Asset_{i+1}'
        prices = [initial_price]
        for t in range(len(prices_df.index) - 1):
            drift = mu * dt
            diffusion = sigma * np.sqrt(dt) * epsilon[t, i]
            price_today = prices[-1] * (1 + drift + diffusion)
            prices.append(price_today)
        prices_df[asset_name] = prices

    return prices_df

# Parameters
start_date = '2024-01-01'
end_date = '2024-12-31'
num_assets = 5
mu = 0.01  # Drift (expected return)
sigma = 0.2  # Volatility

# Generate price DataFrame
prices_df = generate_price_series(start_date, end_date, num_assets, mu, sigma)

# Print or visualize the price DataFrame
print(prices_df.head())


               Asset_1     Asset_2     Asset_3     Asset_4     Asset_5
2024-01-01  100.000000  100.000000  100.000000  100.000000  100.000000
2024-01-02   97.117114   96.975679  114.046106  102.744247  120.748569
2024-01-03  103.477291  112.706864  125.298588  117.031939  120.925998
2024-01-04  109.117378  107.724420  141.691891   86.696355  139.697009
2024-01-05  131.988934  124.782836  140.158850   96.323625  119.396835


In [5]:
def vectorial_backtest(signals_df, prices_df, initial_cash, c, n):
    # Sort signals dataframe to get the top n and bottom n signals
    top_n = pd.DataFrame(signals_df['signal'].groupby(level='datetime').nlargest(n).droplevel(0))

    # Compute equally weighted portfolio
    top_n['signal'] = 1/(2 * n)
    bottom_n = pd.DataFrame(signals_df.groupby(level='datetime')['signal'].nsmallest(n).droplevel(0))
    bottom_n['signal'] = - 1/(2 * n)

    # Create trade signals dataframe
    trades_df = pd.concat([top_n, bottom_n])
    trades_df = trades_df['signal'].unstack()
    trades_df = trades_df.fillna(0)  # Fill NaNs with 0s

    # Calculate transaction costs
    transaction_costs = c / 10000  # Convert basis points to decimal

    # Initialize DataFrame to store portfolio returns
    equity_line = []

    # Initialize cash balance
    portfolio_value = initial_cash
    fwd_returns_df = prices_df.pct_change().shift(-1)

    # Loop through each datetime
    for i in range(len(prices_df.index) - 1):
        datetime = fwd_returns_df.index[i]
        equity_line.append(pd.DataFrame({'portfolio_value': [portfolio_value]}, index=[datetime]))


        # Get weights for current datetime
        weights = trades_df.loc[datetime]

        if i == 0:
            previous_weights = pd.Series(np.zeros(weights.size), index=weights.index)
        else:
            previous_weights = trades_df.loc[fwd_returns_df.index[i-1]]
        
        # total weights difference
        sum_of_absolute_weights_difference = abs(weights - previous_weights).sum()

        # compute the transaction costs
        daily_costs = sum_of_absolute_weights_difference * transaction_costs * portfolio_value

        # subtract them from portfolio value
        portfolio_value -= daily_costs

        # daily forward returns
        fwd_returns = fwd_returns_df.loc[datetime]

        # total portfolio return
        total_return = (weights * fwd_returns).sum()

        # apply the return to the portfolio, after acconting for costs
        portfolio_value *= 1 + total_return

    equity_line.append(pd.DataFrame({'portfolio_value': [portfolio_value]}, index=[datetime]))

    # Convert executed trades list to DataFrame
    equity_line_df = pd.concat(equity_line)
    equity_line_df.index = equity_line_df.index.rename('datetime')

    return trades_df, equity_line_df


In [6]:
# Example usage:
# Assuming you have signals_df and prices_df ready
# signals_df should have a MultiIndex with 'datetime' and 'asset'
# prices_df should have a DateTimeIndex

# Set parameters
backtest_obj = backtest.VectorialBacktest(
    signals=signals_df, 
    prices=prices_df, 
    initial_cash=1000000, 
    commissions=10, 
    number_of_instruments_long_leg=2, 
    number_of_instruments_short_leg=2
)
# Run backtest
trades_df, equity_line_df = backtest_obj.do_backtest()


In [7]:
signals_df = pd.DataFrame(index=pd.MultiIndex.from_arrays([
    ['2020-01-01', '2020-01-01', '2020-01-02', '2020-01-02',
     '2020-01-03','2020-01-03', '2020-01-04', '2020-01-04',
     '2020-01-05','2020-01-05'], [
         'A','B','A','B','A',
         'B','A','B','A','B'
     ]
], names=['datetime', 'asset']),
    data={
        'signal': [
            1,0,1,0,0,1,0,1,1,0
        ]})

prices_df = pd.DataFrame({
    'A': [100, 101, 100, 99, 100, 101],
    'B': [100, 95, 100, 105, 100, 90]
}, index=['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04', '2020-01-05', '2020-01-06'])



In [8]:
# No commissions
backtest_obj = backtest.VectorialBacktest(
    signals=signals_df, 
    prices=prices_df, 
    initial_cash=1000000, 
    commissions=0, 
    number_of_instruments_long_leg=1, 
    number_of_instruments_short_leg=1
)
# Run backtest
trades_df, equity_line_df = backtest_obj.do_backtest()


In [9]:
# with commissions
backtest_obj = backtest.VectorialBacktest(
    signals=signals_df, 
    prices=prices_df, 
    initial_cash=1000000, 
    commissions=10, 
    number_of_instruments_long_leg=1, 
    number_of_instruments_short_leg=1
)
# Run backtest
trades_df, equity_line_df = backtest_obj.do_backtest()


In [10]:
# long only
backtest_obj = backtest.VectorialBacktest(
    signals=signals_df, 
    prices=prices_df, 
    initial_cash=1000000, 
    commissions=0, 
    number_of_instruments_long_leg=2, 
    number_of_instruments_short_leg=0
)
# Run backtest
trades_df, equity_line_df = backtest_obj.do_backtest()
