In [1]:
from footix.models.elo import EloDavidson
from footix.data_io.footballdata import ScrapFootballData
from footix.metrics import rps, incertity, zscore
import numpy as np

# Loading the dataset:

Here, we are going to play with the season 2024-2025 of French division ligue 1, using the scrapper of [football-data](www.footbal-data.co.uk).

In [2]:
dataset = ScrapFootballData(competition="FRA Ligue 1", season="2024-2025", path ="./data", force_reload=True).get_fixtures()

In [6]:
train_dataset = dataset.iloc[:-9]
test_dataset = dataset.iloc[-9:]

# Initialization of the model and fitting


In [45]:
model = EloDavidson(n_teams=18, k0=75, lambd=0.1, sigma=400, agnostic_probs=(0.47, 0.3, 0.23))

In [55]:
model.fit(X_train=train_dataset)

We can check the ranking according to the train_dataset:

In [56]:
print(model)

1. Paris SG : 353.31697960831787 
2. Strasbourg : 214.35677247717092 
3. Monaco : 177.41819800707972 
4. Marseille : 124.6070916532047 
5. Nice : 121.10957770439222 
6. Lille : 106.04991192648706 
7. Lyon : 84.66246915610814 
8. Brest : 49.861676901004486 
9. Lens : 4.298891521425102 
10. Rennes : -23.640634591924638 
11. Auxerre : -31.997492579646117 
12. Toulouse : -84.54657642994815 
13. Angers : -103.9699802064673 
14. St Etienne : -114.5912481140742 
15. Nantes : -114.77613337819021 
16. Le Havre : -149.17233346884598 
17. Reims : -199.83202409496553 
18. Montpellier : -413.15514609112785 



# Let's compute some metrics

In [47]:
rps_list = []
incertity_list = []
zscore_list = []

def _helper_result(result_str: str)-> int:
    if result_str=="H":
        return 0
    if result_str=="D":
        return 1
    return 2

for idx, row in test_dataset.iterrows():
    probability = model.predict(home_team=row["home_team"], away_team=row["away_team"])
    outcome_idx = _helper_result(row["ftr"])
    rps_list.append(rps(probas=probability, outcome_idx=outcome_idx))
    zscore_list.append(zscore(probas=probability,rps_observed=rps_list[-1]))
    incertity_list.append(incertity(probas=probability, outcome_idx=outcome_idx))

In [48]:
print(f"Incertity metric : {np.mean(incertity_list)} +/- {np.std(incertity_list)}")
print(f"RPS metric : {np.mean(rps_list)} +/- {np.std(rps_list)}")
print(f"Z-score metric : {np.mean(zscore_list)} +/- {np.std(zscore_list)}")

Incertity metric : 0.8097040553292228 +/- 0.7797334025105871
RPS metric : 0.20217860243094035 +/- 0.23074536018746047
Z-score metric : 0.22743251894424735 +/- 0.816334551077693


According to these metrics, we can say that for this particular matchday:
- The Elo model is somewhat uncertain but not by much.
- it delivers reasonable but not outstanding probabilistic accuracy (RPS),
- The Elo-Davidson model is not well calibrated (large value of z-score).

# Game focus

In [59]:
probability = model.predict(home_team="St Etienne", away_team="Toulouse")

In [60]:
print("St Etienne - Toulouse outcome probabilities")
print(f"Probability of home victory : {100*probability.proba_home:.2f} %")
print(f"Probability of a draw : {100*probability.proba_draw:.2f}")
print(f"Probability of away victory : {100*probability.proba_away:.2f}")

St Etienne - Toulouse outcome probabilities
Probability of home victory : 43.90 %
Probability of a draw : 30.55
Probability of away victory : 25.54
