In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy.stats import skellam
from bettools import (
    get_data,
    generate_seasons,
    calculate_poisson_match_outcomes,
    calculate_ev_from_odds,
    kelly_criterion,
)
import statsmodels.api as sm
import statsmodels.formula.api as smf
import warnings
from dixon_coles import (
    solve_parameters_decay,
    get_1x2_probs,
    dixon_coles_simulate_match,
)

# Suppress RuntimeWarnings
warnings.filterwarnings("ignore", category=RuntimeWarning)

pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.options.mode.chained_assignment = None

In [2]:
leagues = ["E3"]

season_list = generate_seasons(2023, 2024)

df_ls = get_data(season_list, leagues, additional_cols=["HS", "AS", "FTR"])

main_df = pd.concat(df_ls)

main_df = main_df[-500:]

main_df.reset_index(inplace=True, drop=True)

main_df["Date"] = pd.to_datetime(main_df["Date"], format="%d/%m/%y")
main_df["time_diff"] = (max(main_df["Date"]) - main_df["Date"]).dt.days
main_df = main_df[["HomeTeam", "AwayTeam", "FTHG", "FTAG", "FTR", "time_diff"]]

In [3]:
params = solve_parameters_decay(main_df, xi=0.00325)

Optimization terminated successfully    (Exit mode 0)
            Current function value: 1058.909108635088
            Iterations: 41
            Function evaluations: 2154
            Gradient evaluations: 41


In [7]:
def make_betting_prediction(
    home_odds, draw_odds, away_odds, params, home_team, away_team, bankroll
):
    predicted_probs = get_1x2_probs(
        dixon_coles_simulate_match(params, home_team, away_team, max_goals=10)
    )
    home_ev = calculate_ev_from_odds(home_odds, predicted_probs["H"])
    away_ev = calculate_ev_from_odds(away_odds, predicted_probs["A"])
    draw_ev = calculate_ev_from_odds(draw_odds, predicted_probs["D"])
    max_ev = max([home_ev, away_ev, draw_ev])
    if max_ev == home_ev:
        bet_amount = kelly_criterion(
            predicted_probs["H"], home_odds, bankroll, kelly_fraction=0.05
        )
        bet_selection = "Home"
    if max_ev == away_ev:
        bet_amount = kelly_criterion(
            predicted_probs["A"], away_odds, bankroll, kelly_fraction=0.05
        )
        bet_selection = "Away"
    elif max_ev == draw_ev:
        bet_amount = kelly_criterion(
            predicted_probs["D"], draw_odds, bankroll, kelly_fraction=0.05
        )
        bet_selection = "Draw"
    return bet_selection, bet_amount


make_betting_prediction(3.2, 3.6, 3, params, "Walsall", "Bradford", 110)

('Home', 0.8269579154991229)

In [None]:
leagues = ["E0"]

season_list = generate_seasons(2014, 2024)

df_ls = get_data(season_list, leagues)

main_df = pd.concat(df_ls)

In [None]:
main_df["Date"] = pd.to_datetime(main_df["Date"])
main_df = main_df.sort_values("Date")
main_df.set_index("Date", inplace=True)

In [None]:
def walk_forward_validation(data, initial_train_size, test_size, num_iterations):
    total_rows = len(data)

    train_start = 0
    train_end = initial_train_size
    test_end = train_end + test_size

    results = []
    iteration = 0  # To track the number of iterations
    while test_end <= total_rows and iteration < num_iterations:
        # Splitting the data
        train_data = data.iloc[train_start:train_end]
        test_data = data.iloc[train_end:test_end]

        # Calculating time difference from the end of the training set
        max_train_date = train_data.index.max()
        train_data["time_diff"] = (max_train_date - train_data.index).days

        # Selecting columns (adjust as necessary)
        train_data = train_data[["HomeTeam", "AwayTeam", "FTHG", "FTAG", "time_diff"]]

        successful_fit = False
        attempts = 0
        while (
            not successful_fit and attempts < 5
        ):  # Try fitting parameters up to 2 times
            try:
                params = solve_parameters_decay(train_data, xi=0.00325)
                # Insert your prediction and evaluation logic here, using `params`
                # For each row in test_data, calculate predictions and append to test_data
                test_data = test_data.reset_index()
                for i in range(len(test_data)):
                    home_team = test_data.loc[i]["HomeTeam"]
                    away_team = test_data.loc[i]["AwayTeam"]
                    probs_1x2 = get_1x2_probs(
                        dixon_coles_simulate_match(
                            params, home_team, away_team, max_goals=10
                        )
                    )
                    test_data.loc[i, "home_win_prob"] = probs_1x2["H"]
                    test_data.loc[i, "away_win_prob"] = probs_1x2["A"]
                    test_data.loc[i, "draw_win_prob"] = probs_1x2["D"]
                results.append(test_data)
                successful_fit = True
            except Exception as e:
                # If parameter fitting fails, add more data to the training set and try again
                print(
                    f"Parameter fitting failed on attempt {attempts + 1}: {e}. Trying with a larger training set."
                )
                train_end += test_size  # Expanding the training set window
                if train_end + test_size > total_rows:
                    print(
                        "Not enough data to expand the training set and perform another test. Stopping."
                    )
                    return results
                train_data = data.iloc[train_start:train_end]
                train_data["time_diff"] = (max_train_date - train_data.index).days
                train_data = train_data[
                    ["HomeTeam", "AwayTeam", "FTHG", "FTAG", "time_diff"]
                ]
                attempts += 1
        if successful_fit:
            # Move the window forward
            train_end = test_end
            test_end += test_size
            iteration += 1  # Increment the iteration count
    return results


res = walk_forward_validation(main_df, 500, 10, 200)