In [17]:
import pandas as pd
from footix.models.bayesian import Bayesian
import footix.implied_odds as odds
import collections
from footix.data_io.data_scrapper import ScrapFootballData
import pymc as pm

In [18]:
dataset = ScrapFootballData(competition="Ligue 1", season="2425", path ="./data", force_reload=True).get_data()

In [19]:
dataset.head(-5)

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA,BFECAHH,BFECAHA
0,F1,16/08/2024,19:45,Le Havre,Paris SG,1,4,A,0,1,...,1.93,2.00,1.95,1.97,1.95,2.02,1.89,1.95,1.95,2.02
1,F1,17/08/2024,16:00,Brest,Marseille,1,5,A,1,3,...,1.89,2.04,1.91,2.02,1.91,2.08,1.85,2.01,1.90,2.09
2,F1,17/08/2024,18:00,Reims,Lille,0,2,A,0,1,...,2.10,1.70,2.14,1.81,2.14,1.85,2.07,1.80,2.13,1.86
3,F1,17/08/2024,20:00,Monaco,St Etienne,1,0,H,1,0,...,2.00,1.93,2.00,1.93,2.00,2.01,1.95,1.91,1.95,2.00
4,F1,18/08/2024,14:00,Auxerre,Nice,2,1,H,1,1,...,1.82,2.11,1.82,2.13,1.83,2.17,1.77,2.10,1.84,2.16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,F1,19/01/2025,19:45,Marseille,Strasbourg,1,1,D,0,1,...,2.04,1.89,2.02,1.91,2.07,1.91,1.98,1.86,2.03,1.96
162,F1,24/01/2025,19:45,Auxerre,St Etienne,1,1,D,1,1,...,1.95,1.95,1.98,1.94,2.00,1.96,1.96,1.89,2.03,1.96
163,F1,25/01/2025,16:00,Monaco,Rennes,3,2,H,1,1,...,1.93,2.00,1.94,1.98,1.94,2.02,1.88,1.97,1.92,2.06
164,F1,25/01/2025,18:00,Strasbourg,Lille,2,1,H,0,1,...,1.70,2.10,1.76,2.19,1.82,2.20,1.77,2.08,1.85,2.14


In [20]:
model = Bayesian(n_teams=18, n_goals=15)

In [21]:
model.fit(X_train=dataset)

Multiprocess sampling (4 chains in 4 jobs)
CompoundStep
>BinaryGibbsMetropolis: [grp_att, grp_def]
>NUTS: [home, intercept, sigma_att, sigma_def, attack, defense]


Output()

Sampling 4 chains for 2_000 tune and 3_000 draw iterations (8_000 + 12_000 draws total) took 170 seconds.
There were 491 divergences after tuning. Increase `target_accept` or reparameterize.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


In [8]:
fixture = pd.read_csv("odds_info.csv")
# Assuming fixture and odds are already defined DataFrames
for idx, match in fixture.iterrows():
    print("#" * 50)
    print(f"{match['home_team']:<30} {match['away_team']:<30}")
    # Display probabilities from Poisson prediction
    probas = model.predict(home_team=match["home_team"], away_team=match["away_team"]).return_probas()
    home_prob = probas[0]
    draw_prob = probas[1]
    away_prob = probas[2]
    print(f"  Probabilities: Home: {home_prob:.2f}, Draw: {draw_prob:.2f}, Away: {away_prob:.2f}")
    # Display odds from the function
    odds_list = [match["H"], match["D"], match["A"]]
    odds_result = odds.shin(odds=odds_list)[0]
    print(f"  Odds: Home: {odds_result[0]:.2f}, Draw: {odds_result[1]:.2f}, Away: {odds_result[2]:.2f}")
    print("#" * 50)

##################################################
Brest                          Paris SG                      
  Probabilities: Home: 0.25, Draw: 0.21, Away: 0.54
  Odds: Home: 0.14, Draw: 0.19, Away: 0.68
##################################################
##################################################
Monaco                         Auxerre                       
  Probabilities: Home: 0.49, Draw: 0.25, Away: 0.27
  Odds: Home: 0.72, Draw: 0.16, Away: 0.12
##################################################
##################################################
Lille                          St Etienne                    
  Probabilities: Home: 0.57, Draw: 0.23, Away: 0.20
  Odds: Home: 0.70, Draw: 0.19, Away: 0.11
##################################################
##################################################
Toulouse                       Nice                          
  Probabilities: Home: 0.33, Draw: 0.26, Away: 0.42
  Odds: Home: 0.38, Draw: 0.28, Away: 0.34
###############

In [12]:
#pm.summary(model.trace)

In [22]:
# Assuming fixture and odds are already defined DataFrames
fixture = pd.read_csv("odds_info.csv")
selections = []

for idx, match in fixture.iterrows():
    home_team = match["home_team"]
    away_team = match["away_team"]
    name = f"{home_team} vs {away_team} - "

    # Display probabilities from Poisson prediction
    probas = model.predict(home_team=home_team, away_team=away_team).return_probas()
    home_prob = probas[0]
    draw_prob = probas[1]
    away_prob = probas[2]

    # Display odds from the function
    odds_list = [match["H"], match["D"], match["A"]]
    odds_result = odds.shin(odds=odds_list)[0]
    home_implied = odds_result[0]
    draw_implied = odds_result[1]
    away_implied = odds_result[2]

    home_odd = odds_list[0]
    draw_odd = odds_list[1]
    away_odd = odds_list[2]

    # Calculate the value for each outcome
    home_value = home_prob - home_implied
    draw_value = draw_prob - draw_implied
    away_value = away_prob - away_implied

    # Determine the best selection
    if home_value >= draw_value and home_value >= away_value:
        best_prob = home_prob
        best_odd = home_odd
        sel = "home"
    elif draw_value >= home_value and draw_value >= away_value:
        best_prob = draw_prob
        best_odd = draw_odd
        sel = "draw"
    else:
        best_prob = away_prob
        best_odd = away_odd
        sel = "away"
    # Append the selection to the list
    selections.append({
        "name": name+sel,
        "probability": best_prob,
        "odds_bookie": best_odd
    })

In [23]:
print(selections)

[{'name': 'Brest vs Paris SG - home', 'probability': 0.24692785590412847, 'odds_bookie': 6.75}, {'name': 'Monaco vs Auxerre - away', 'probability': 0.2623079433842603, 'odds_bookie': 7.5}, {'name': 'Lille vs St Etienne - away', 'probability': 0.18941476035803936, 'odds_bookie': 7.7}, {'name': 'Toulouse vs Nice - away', 'probability': 0.40553428523918567, 'odds_bookie': 2.75}, {'name': 'Reims vs Nantes - away', 'probability': 0.29045091634671144, 'odds_bookie': 3.35}, {'name': 'Angers vs Le Havre - home', 'probability': 0.5344677324380644, 'odds_bookie': 1.95}, {'name': 'Rennes vs Strasbourg - away', 'probability': 0.3510963060421753, 'odds_bookie': 3.5}, {'name': 'Marseille vs Lyon - home', 'probability': 0.5223626364943785, 'odds_bookie': 2.0}, {'name': 'Nantes vs Brest - away', 'probability': 0.33697465778995617, 'odds_bookie': 2.95}, {'name': 'Paris SG vs Monaco - away', 'probability': 0.18047053813660602, 'odds_bookie': 5.4}, {'name': 'Nice vs Lens - home', 'probability': 0.4884823

In [24]:
from footix.strategy.strategies import realKelly

In [None]:
realKelly(selections[:9], bankroll=53, max_multiple=1, device="cpu")


2025-02-01 00:29:35 - Optimization finished. Runtime --- 9.081 seconds ---

Objective: -4.09755
Certainty Equivalent: 60.193

Brest vs Paris SG - home @6.75 - € 6
Monaco vs Auxerre - away @7.5 - € 8
Lille vs St Etienne - away @7.7 - € 4
Toulouse vs Nice - away @2.75 - € 4
Angers vs Le Havre - home @1.95 - € 1
Rennes vs Strasbourg - away @3.5 - € 5
Marseille vs Lyon - home @2.0 - € 3
Bankroll used 28.24153878752672 €
