In [1]:
import os
from typing import Optional


def set_project_root_dir(project_root_name: str, cwd: Optional[str] = None):
    """Set the working directory to the project root directory, based on the name of the project root directory.

    Args:
        project_root_name (str): The name of the project root directory.
        cwd (str, optional): The current working directory. Defaults to None.

    Raises:
        ValueError: If the project root directory is not found in the directory hierarchy.

    Returns:
        None
    """
    # If no current working directory is provided, use the current working directory
    if cwd is None:
        cwd = os.getcwd()

    # Split the current working directory into its components
    cwd_components = cwd.split(os.sep)

    # Find the index of the first occurrence of the project root directory in the list of components
    try:
        root_index = cwd_components.index(project_root_name)
    except ValueError:
        raise ValueError(
            f"Project root directory '{project_root_name}' not found in directory hierarchy."
        )

    # Use the root index to get the path of the project root directory
    root_dir = os.sep.join(cwd_components[: root_index + 1])

    # Change the working directory to the project root directory
    os.chdir(root_dir)

    # Print new CWD
    print("New CWD is: " + os.getcwd())


set_project_root_dir("backtestbuddy")

New CWD is: i:\Coding\00_Projects\00_packages\backtestbuddy


In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import TimeSeriesSplit
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from src.backtest.sport_backtest import ModelBacktest, PredictionBacktest
from src.strategies.sport_strategies import FixedStake

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [3]:
# Cell 2: Create dummy data
np.random.seed(42)
dates = pd.date_range(start='2023-01-01', end='2023-12-31', freq='D')
n_samples = len(dates)

data = pd.DataFrame({
    'date': dates,
    'odds_team_a': np.random.uniform(1.5, 3, n_samples),
    'odds_team_b': np.random.uniform(1.5, 3, n_samples),
    'actual_winner': np.random.randint(0, 2, n_samples),
    'model_predictions': np.random.randint(0, 2, n_samples)
})

# Cell 3: Display the first few rows of the dummy data
data

Unnamed: 0,date,odds_team_a,odds_team_b,actual_winner,model_predictions
0,2023-01-01,2.061810,2.079154,0,0
1,2023-01-02,2.926071,2.941786,0,1
2,2023-01-03,2.597991,2.858026,0,1
3,2023-01-04,2.397988,1.793687,0,1
4,2023-01-05,1.734028,1.604042,0,0
...,...,...,...,...,...
360,2023-12-27,2.082255,2.762743,0,0
361,2023-12-28,2.464932,1.709659,0,0
362,2023-12-29,2.187379,2.692901,1,1
363,2023-12-30,2.318425,1.802441,1,1


In [4]:
# Cell 4: Set up the backtester
strategy = FixedStake(stake=10)
backtest = PredictionBacktest(
    data=data,
    date_column='date',
    odds_columns=['odds_team_a', 'odds_team_b'],
    outcome_column='actual_winner',
    prediction_column='model_predictions',
    initial_bankroll=1000,
    strategy=strategy,
)

In [5]:
backtest.run()

In [6]:
detailed_results = backtest.detailed_results
detailed_results

Unnamed: 0,bt_index,bt_fold,bt_predicted_outcome,bt_actual_outcome,bt_starting_bankroll,bt_ending_bankroll,bt_odds,bt_date_column,bt_stake,bt_potential_return,bt_win,bt_profit,bt_roi,date,odds_team_a,odds_team_b,actual_winner,model_predictions
0,0,0,0,0,1000.000000,1010.618102,2.061810,2023-01-01,10,20.618102,True,10.618102,106.181018,2023-01-01,2.061810,2.079154,0,0
1,1,0,1,0,1010.618102,1000.618102,2.941786,2023-01-02,10,29.417858,False,-10.000000,-100.000000,2023-01-02,2.926071,2.941786,0,1
2,2,0,1,0,1000.618102,990.618102,2.858026,2023-01-03,10,28.580260,False,-10.000000,-100.000000,2023-01-03,2.597991,2.858026,0,1
3,3,0,1,0,990.618102,980.618102,1.793687,2023-01-04,10,17.936867,False,-10.000000,-100.000000,2023-01-04,2.397988,1.793687,0,1
4,4,0,0,0,980.618102,987.958381,1.734028,2023-01-05,10,17.340280,True,7.340280,73.402796,2023-01-05,1.734028,1.604042,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,360,0,0,0,1236.142029,1246.964578,2.082255,2023-12-27,10,20.822549,True,10.822549,108.225489,2023-12-27,2.082255,2.762743,0,0
361,361,0,0,0,1246.964578,1261.613901,2.464932,2023-12-28,10,24.649323,True,14.649323,146.493233,2023-12-28,2.464932,1.709659,0,0
362,362,0,1,1,1261.613901,1278.542911,2.692901,2023-12-29,10,26.929010,True,16.929010,169.290097,2023-12-29,2.187379,2.692901,1,1
363,363,0,1,1,1278.542911,1286.567320,1.802441,2023-12-30,10,18.024410,True,8.024410,80.244098,2023-12-30,2.318425,1.802441,1,1


In [7]:
bookie_results = backtest.get_detailed_results()
bookie_results

Unnamed: 0,bt_index,bt_fold,bt_predicted_outcome,bt_actual_outcome,bt_starting_bankroll,bt_ending_bankroll,bt_odds,bt_date_column,bt_stake,bt_potential_return,bt_win,bt_profit,bt_roi,date,odds_team_a,odds_team_b,actual_winner,model_predictions
0,0,0,0,0,1000.000000,1010.618102,2.061810,2023-01-01,10,20.618102,True,10.618102,106.181018,2023-01-01,2.061810,2.079154,0,0
1,1,0,1,0,1010.618102,1000.618102,2.941786,2023-01-02,10,29.417858,False,-10.000000,-100.000000,2023-01-02,2.926071,2.941786,0,1
2,2,0,1,0,1000.618102,990.618102,2.858026,2023-01-03,10,28.580260,False,-10.000000,-100.000000,2023-01-03,2.597991,2.858026,0,1
3,3,0,1,0,990.618102,980.618102,1.793687,2023-01-04,10,17.936867,False,-10.000000,-100.000000,2023-01-04,2.397988,1.793687,0,1
4,4,0,0,0,980.618102,987.958381,1.734028,2023-01-05,10,17.340280,True,7.340280,73.402796,2023-01-05,1.734028,1.604042,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,360,0,0,0,1236.142029,1246.964578,2.082255,2023-12-27,10,20.822549,True,10.822549,108.225489,2023-12-27,2.082255,2.762743,0,0
361,361,0,0,0,1246.964578,1261.613901,2.464932,2023-12-28,10,24.649323,True,14.649323,146.493233,2023-12-28,2.464932,1.709659,0,0
362,362,0,1,1,1261.613901,1278.542911,2.692901,2023-12-29,10,26.929010,True,16.929010,169.290097,2023-12-29,2.187379,2.692901,1,1
363,363,0,1,1,1278.542911,1286.567320,1.802441,2023-12-30,10,18.024410,True,8.024410,80.244098,2023-12-30,2.318425,1.802441,1,1


In [8]:
bookie_results = backtest.get_bookie_results()
bookie_results

Unnamed: 0,bt_index,bt_fold,bt_predicted_outcome,bt_actual_outcome,bt_starting_bankroll,bt_ending_bankroll,bt_stake,bt_potential_return,bt_win,bt_profit,bt_roi,bt_odds,bt_date_column,date,odds_team_a,odds_team_b,actual_winner,model_predictions
0,0,0,0,0,1000.000000,1010.618102,10,20.618102,True,10.618102,106.181018,2.061810,2023-01-01,2023-01-01,2.061810,2.079154,0,0
1,1,0,0,0,1010.618102,1029.878816,10,29.260715,True,19.260715,192.607146,2.926071,2023-01-02,2023-01-02,2.926071,2.941786,0,1
2,2,0,0,0,1029.878816,1045.858726,10,25.979909,True,15.979909,159.799091,2.597991,2023-01-03,2023-01-03,2.597991,2.858026,0,1
3,3,0,1,0,1045.858726,1035.858726,10,17.936867,False,-10.000000,-100.000000,1.793687,2023-01-04,2023-01-04,2.397988,1.793687,0,1
4,4,0,1,0,1035.858726,1025.858726,10,16.040420,False,-10.000000,-100.000000,1.604042,2023-01-05,2023-01-05,1.734028,1.604042,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,360,0,0,0,1079.234754,1090.057303,10,20.822549,True,10.822549,108.225489,2.082255,2023-12-27,2023-12-27,2.082255,2.762743,0,0
361,361,0,1,0,1090.057303,1080.057303,10,17.096586,False,-10.000000,-100.000000,1.709659,2023-12-28,2023-12-28,2.464932,1.709659,0,0
362,362,0,0,1,1080.057303,1070.057303,10,21.873793,False,-10.000000,-100.000000,2.187379,2023-12-29,2023-12-29,2.187379,2.692901,1,1
363,363,0,1,1,1070.057303,1078.081713,10,18.024410,True,8.024410,80.244098,1.802441,2023-12-30,2023-12-30,2.318425,1.802441,1,1


In [9]:
backtest.calculate_metrics()

{'Backtest Start Date': Timestamp('2023-01-01 00:00:00'),
 'Backtest End Date': Timestamp('2023-12-31 00:00:00'),
 'Backtest Duration': Timedelta('364 days 00:00:00'),
 'ROI [%]': np.float64(30.56892924637293),
 'Total Profit [$]': np.float64(305.6892924637296),
 'Bankroll Final [$]': np.float64(1305.6892924637293),
 'Bankroll Peak [$]': np.float64(1305.6892924637293),
 'Bankroll Valley [$]': np.float64(875.1124844618936),
 'Sharpe Ratio [-]': np.float64(1.0152856752619315),
 'Sortino Ratio [-]': np.float64(14.130205356189823),
 'Calmar Ratio [-]': np.float64(1.3003556137214483),
 'Max Drawdown [%]': np.float64(15.366568563360786),
 'Avg. Drawdown [%]': np.float64(3.1928923892260146),
 'Max. Drawdown Duration [bets]': 158,
 'Avg. Drawdown Duration [bets]': np.float64(16.35),
 'Median Drawdown Duration [bets]': np.float64(5.0),
 'Win Rate [%]': np.float64(48.76712328767123),
 'Average Odds [-]': np.float64(2.2156884264822603),
 'Highest Winning Odds [-]': np.float64(2.9894471941789504),

In [10]:
backtest.metrics

{'Backtest Start Date': Timestamp('2023-01-01 00:00:00'),
 'Backtest End Date': Timestamp('2023-12-31 00:00:00'),
 'Backtest Duration': Timedelta('364 days 00:00:00'),
 'ROI [%]': np.float64(30.56892924637293),
 'Total Profit [$]': np.float64(305.6892924637296),
 'Bankroll Final [$]': np.float64(1305.6892924637293),
 'Bankroll Peak [$]': np.float64(1305.6892924637293),
 'Bankroll Valley [$]': np.float64(875.1124844618936),
 'Sharpe Ratio [-]': np.float64(1.0152856752619315),
 'Sortino Ratio [-]': np.float64(14.130205356189823),
 'Calmar Ratio [-]': np.float64(1.3003556137214483),
 'Max Drawdown [%]': np.float64(15.366568563360786),
 'Avg. Drawdown [%]': np.float64(3.1928923892260146),
 'Max. Drawdown Duration [bets]': 158,
 'Avg. Drawdown Duration [bets]': np.float64(16.35),
 'Median Drawdown Duration [bets]': np.float64(5.0),
 'Win Rate [%]': np.float64(48.76712328767123),
 'Average Odds [-]': np.float64(2.2156884264822603),
 'Highest Winning Odds [-]': np.float64(2.9894471941789504),

In [11]:
backtest.plot()

In [12]:
model = make_pipeline(StandardScaler(), LogisticRegression())

# Initialize ModelBacktest with the dummy model
model_backtest = ModelBacktest(
    data=data,
    date_column='date',
    odds_columns=['odds_team_a', 'odds_team_b'],
    outcome_column='actual_winner',
    initial_bankroll=1000,
    strategy=strategy,
    cv_schema=TimeSeriesSplit(n_splits=5),
    model=model
)


In [13]:
# Run the backtest
model_backtest.run()

In [16]:
model_backtest.get_bookie_results()
model_backtest.detailed_results

Unnamed: 0,bt_index,bt_fold,bt_predicted_outcome,bt_actual_outcome,bt_starting_bankroll,bt_ending_bankroll,bt_odds,bt_date_column,bt_stake,bt_potential_return,bt_win,bt_profit,bt_roi,date,odds_team_a,odds_team_b,actual_winner,model_predictions
0,65,0,0,0,1000.000000,1013.140441,2.314044,2023-03-07,10,23.140441,True,13.140441,131.404412,2023-03-07,2.314044,1.714488,0,1
1,66,0,1,0,1013.140441,1003.140441,2.642266,2023-03-08,10,26.422659,False,-10.000000,-100.000000,2023-03-08,1.711386,2.642266,0,1
2,67,0,0,0,1003.140441,1020.173396,2.703295,2023-03-09,10,27.032955,True,17.032955,170.329547,2023-03-09,2.703295,2.427327,0,0
3,68,0,0,1,1020.173396,1010.173396,1.611826,2023-03-10,10,16.118260,False,-10.000000,-100.000000,2023-03-10,1.611826,1.651684,1,0
4,69,0,0,1,1010.173396,1000.173396,2.980330,2023-03-11,10,29.803304,False,-10.000000,-100.000000,2023-03-11,2.980330,1.626160,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,360,4,0,0,1384.902240,1395.724789,2.082255,2023-12-27,10,20.822549,True,10.822549,108.225489,2023-12-27,2.082255,2.762743,0,0
296,361,4,1,0,1395.724789,1385.724789,1.709659,2023-12-28,10,17.096586,False,-10.000000,-100.000000,2023-12-28,2.464932,1.709659,0,0
297,362,4,0,1,1385.724789,1375.724789,2.187379,2023-12-29,10,21.873793,False,-10.000000,-100.000000,2023-12-29,2.187379,2.692901,1,1
298,363,4,0,1,1375.724789,1365.724789,2.318425,2023-12-30,10,23.184252,False,-10.000000,-100.000000,2023-12-30,2.318425,1.802441,1,1


In [14]:
model_backtest.calculate_metrics()

{'Backtest Start Date': Timestamp('2023-03-07 00:00:00'),
 'Backtest End Date': Timestamp('2023-12-31 00:00:00'),
 'Backtest Duration': Timedelta('299 days 00:00:00'),
 'ROI [%]': np.float64(35.57247886718092),
 'Total Profit [$]': np.float64(355.72478867180985),
 'Bankroll Final [$]': np.float64(1355.7247886718092),
 'Bankroll Peak [$]': np.float64(1463.722726368339),
 'Bankroll Valley [$]': np.float64(980.1733959586843),
 'Sharpe Ratio [-]': np.float64(1.6458488275890577),
 'Sortino Ratio [-]': np.float64(15.47447202518303),
 'Calmar Ratio [-]': np.float64(2.815072242779461),
 'Max Drawdown [%]': np.float64(9.483353619172444),
 'Avg. Drawdown [%]': np.float64(2.4564892774944433),
 'Max. Drawdown Duration [bets]': 64,
 'Avg. Drawdown Duration [bets]': np.float64(9.296296296296296),
 'Median Drawdown Duration [bets]': np.float64(4.0),
 'Win Rate [%]': np.float64(50.33333333333333),
 'Average Odds [-]': np.float64(2.2349962234426966),
 'Highest Winning Odds [-]': np.float64(2.9894471941

In [15]:
model_backtest.plot()