In [1]:
from footix.models.elo import EloDavidson
from footix.data_io.footballdata import ScrapFootballData
from footix.metrics import rps, incertity, zscore
import numpy as np

# Loading the dataset:

Here, we are going to play with the season 2024-2025 of French division ligue 1, using the scrapper of [football-data](www.footbal-data.co.uk).

In [2]:
dataset = ScrapFootballData(competition="FRA Ligue 1", season="2024-2025", path ="./data", force_reload=True).get_fixtures()

In [3]:
train_dataset = dataset.iloc[:-9]
test_dataset = dataset.iloc[-9:]

# Initialization of the model and fitting


In [4]:
model = EloDavidson(n_teams=18, k0=75, lambd=0.1, sigma=400, agnostic_probs=(0.45, 0.25, 0.30))

In [5]:
model.fit(X_train=train_dataset)

We can check the ranking according to the train_dataset:

In [6]:
print(model)

1. Paris SG : 320.75432743144427 
2. Strasbourg : 206.56101286218592 
3. Monaco : 169.70566980118522 
4. Marseille : 109.71826104119576 
5. Nice : 108.91279977408382 
6. Lille : 90.16240716024019 
7. Lyon : 72.45619709436357 
8. Brest : 48.08972569559302 
9. Lens : 1.4986338932876653 
10. Rennes : -19.20293361188967 
11. Auxerre : -30.69221220449368 
12. Toulouse : -80.37453488130947 
13. Angers : -89.44275629782884 
14. St Etienne : -101.58357285588156 
15. Nantes : -109.20780114730539 
16. Le Havre : -125.9909846570242 
17. Reims : -185.3726409569987 
18. Montpellier : -385.99159814084874 



# Let's compute some metrics

In [7]:
rps_list = []
incertity_list = []
zscore_list = []

def _helper_result(result_str: str)-> int:
    if result_str=="H":
        return 0
    if result_str=="D":
        return 1
    return 2

for idx, row in test_dataset.iterrows():
    probability = model.predict(home_team=row["home_team"], away_team=row["away_team"])
    outcome_idx = _helper_result(row["ftr"])
    rps_list.append(rps(probas=probability, outcome_idx=outcome_idx))
    zscore_list.append(zscore(probas=probability,rps_observed=rps_list[-1]))
    incertity_list.append(incertity(probas=probability, outcome_idx=outcome_idx))

In [8]:
print(f"Incertity metric : {np.mean(incertity_list)} +/- {np.std(incertity_list)}")
print(f"RPS metric : {np.mean(rps_list)} +/- {np.std(rps_list)}")
print(f"Z-score metric : {np.mean(zscore_list)} +/- {np.std(zscore_list)}")

Incertity metric : 0.76869436955589 +/- 0.6429007823413724
RPS metric : 0.20753950552245645 +/- 0.21584757488213818
Z-score metric : 0.20843916792124062 +/- 0.752432546817333


According to these metrics, we can say that for this particular matchday:
- The Elo model is somewhat uncertain but not by much.
- it delivers reasonable but not outstanding probabilistic accuracy (RPS),
- The Elo-Davidson model is not well calibrated (large value of z-score).

# Game focus

In [9]:
probability = model.predict(home_team="St Etienne", away_team="Toulouse")

In [10]:
print("St Etienne - Toulouse outcome probabilities")
print(f"Probability of home victory : {100*probability.proba_home:.2f} %")
print(f"Probability of a draw : {100*probability.proba_draw:.2f}")
print(f"Probability of away victory : {100*probability.proba_away:.2f}")

St Etienne - Toulouse outcome probabilities
Probability of home victory : 42.67 %
Probability of a draw : 25.20
Probability of away victory : 32.14
