In [1]:
import pandas as pd

In [2]:
cols = [
    "name",
    "team",
    "mean",
    "std",
    "50%",
    "2.5%",
    "97.5%",
    "total_minutes",
    "total_games",
    "total_wins",
    "total_draws",
    "weighted_score",
    "weighted_goals",
    "weighted_goals_difference",
]

In [None]:
def load_df(file_name):
    full_path = f"../real_data/player_level_results/{file_name}"
    df = pd.read_csv(full_path)
    num_players = len(df[~df["name"].isna()]["name"].unique())
    print(f"Total number of players: {num_players}")
    global_params = df[
        (df["player_id"].isin(["None", "home_effect", "correlation_strength"]))
    ][["index", "50%", "mean", "std", "2.5%", "95%", "97.5%"]]

    df.dropna(inplace=True)
    df["total_minutes"] = df["total_minutes"].astype(int)
    df["total_games"] = df["total_games"].astype(int)
    df["total_wins"] = df["total_wins"].astype(int)
    df["total_draws"] = df["total_draws"].astype(int)

    corr = df["50%"].corr(df["weighted_score"])
    print(f"Correlation between median and weighted_score: {corr:.4f}")
    corr = df["50%"].corr(df["weighted_goals"])
    print(f"Correlation between median and weighted_goals: {corr:.4f}")
    corr = df["50%"].corr(df["weighted_goals_difference"])
    print(f"Correlation between median and weighted_goals_difference: {corr:.4f}")

    display(global_params)

    return df

In [4]:
def show_stats(df, top=True, n=20, show_latex=False):
    if top:
        stats = df.sort_values(by="50%", ascending=False).head(n)[cols]
    else:
        stats = df.sort_values(by="50%", ascending=False).tail(n)[cols]

    num_players = len(df["name"].unique())
    if show_latex:
        print(
            "# & name & team & mean & std & median & [ci_low, ci_high] & "
            "total_minutes & total_games & total_wins & total_draws & weighted_score & "
            "weighted_goals & weighted_goals_difference \\\\"
        )
        for i, row in enumerate(stats.values):
            (
                name,
                team,
                mean,
                std,
                median,
                ci_low,
                ci_high,
                total_minutes,
                total_games,
                total_wins,
                total_draws,
                weighted_score,
                weighted_goals,
                weighted_goals_difference,
            ) = row
            team = team.split("/")[0].strip()
            idx = i + 1 if top else num_players - n + i + 1
            print(
                f"{idx} & {name} & {team} & {mean:.3f} & {std:.2f} & {median:.3f} & "
                f"[{ci_low:.2f}, {ci_high:.2f}] & {total_minutes:,} & {total_games} & "
                f"{total_wins} & {total_draws} & {weighted_score:.1f} & "
                f"{weighted_goals:.1f} & {weighted_goals_difference:.1f} \\\\"
            )
    else:
        display(stats)

# 2025

In [5]:
df = load_df("players_summary_2025_poisson_7.csv")

Total number of players: 730
Correlation between median and weighted_score: 0.4981
Correlation between median and weighted_goals: 0.4466
Correlation between median and weighted_goals_difference: 0.9924


Unnamed: 0,index,50%,mean,std,2.5%,95%,97.5%
0,home_effect,0.421856,0.421532,0.065699,0.293964,0.529428,0.549279
1,correlation_strength,-0.002071,-0.002178,0.050918,-0.104341,0.080294,0.096083
2,,-2.275175,-2.509474,1.68757,-6.412668,-0.160076,0.120551


In [6]:
show_stats(df, top=True, show_latex=True)

# & name & team & mean & std & median & [ci_low, ci_high] & total_minutes & total_games & total_wins & total_draws & weighted_score & weighted_goals & weighted_goals_difference \\
1 & 1ROSSI Agustin Daniel Rossi T(g)P815100 & Flamengo & 0.049 & 0.10 & 0.049 & [-0.15, 0.25] & 3,330 & 37 & 23 & 9 & 78.0 & 75.0 & 51.0 \\
2 & 4Leo Pereira Leonardo Pereira TP310373 & Flamengo & 0.045 & 0.10 & 0.046 & [-0.16, 0.25] & 2,954 & 33 & 22 & 8 & 73.5 & 68.5 & 47.5 \\
3 & 10De Arrascaeta Giorgian Daniel de A ... TP521990 & Flamengo & 0.036 & 0.10 & 0.037 & [-0.16, 0.23] & 2,402 & 33 & 22 & 7 & 58.5 & 52.2 & 35.8 \\
4 & 3Léo Ortiz Leonardo Rech Ortiz TP422469 & Flamengo & 0.031 & 0.10 & 0.030 & [-0.17, 0.23] & 2,248 & 26 & 16 & 7 & 52.0 & 45.0 & 31.7 \\
5 & 7Luiz Luiz de Araujo Guima ... TP424455 & Flamengo & 0.029 & 0.10 & 0.029 & [-0.17, 0.23] & 1,712 & 33 & 22 & 7 & 44.0 & 40.9 & 29.3 \\
6 & 25Villalba Lucas Hernan Villalba RP837958 & Cruzeiro & 0.027 & 0.10 & 0.027 & [-0.17, 0.23] & 2,864 & 32 & 

In [7]:
show_stats(df, top=False, show_latex=True)

# & name & team & mean & std & median & [ci_low, ci_high] & total_minutes & total_games & total_wins & total_draws & weighted_score & weighted_goals & weighted_goals_difference \\
711 & 2Ewerthon Ewerthon Diogenes da ... TP500506 & Juventude & -0.012 & 0.10 & -0.012 & [-0.21, 0.18] & 1,110 & 17 & 3 & 5 & 8.0 & 10.1 & -16.7 \\
712 & 79Renato Kayzer Renato Kayzer de Souza RP337840 & Vitória & -0.013 & 0.10 & -0.013 & [-0.21, 0.18] & 2,042 & 28 & 7 & 9 & 21.9 & 18.4 & -18.3 \\
713 & 95Caique Caique de Jesus Goncalves RP502736 & Juventude & -0.014 & 0.10 & -0.014 & [-0.21, 0.18] & 2,054 & 28 & 7 & 6 & 18.8 & 19.5 & -20.1 \\
714 & 2Ramon Ramon Ramos Lima RP526090 & Vitória & -0.014 & 0.10 & -0.014 & [-0.21, 0.18] & 1,949 & 23 & 7 & 5 & 25.1 & 15.1 & -20.1 \\
715 & 30Chrystian Chrystian Amaral Bar ... RP546794 & Sport & -0.014 & 0.10 & -0.015 & [-0.21, 0.18] & 1,546 & 33 & 2 & 11 & 6.8 & 10.7 & -18.7 \\
716 & 40Ramon Menezes Ramon Menezes Roma RP402095 & Sport & -0.016 & 0.10 & -0.015 & [-0.

# 2020-2025

In [8]:
df = load_df("players_summary_2020-2025_poisson_7.csv")

Total number of players: 1975
Correlation between median and weighted_score: 0.5193
Correlation between median and weighted_goals: 0.4972
Correlation between median and weighted_goals_difference: 0.9948


Unnamed: 0,index,50%,mean,std,2.5%,95%,97.5%
0,home_effect,0.323674,0.323569,0.027451,0.269809,0.368555,0.377417
1,correlation_strength,0.015772,0.015726,0.020901,-0.025402,0.04999,0.056741
2,,-4.813255,-5.235895,2.415083,-10.946535,-2.069762,-1.739932


In [9]:
show_stats(df, top=True, show_latex=True)

# & name & team & mean & std & median & [ci_low, ci_high] & total_minutes & total_games & total_wins & total_draws & weighted_score & weighted_goals & weighted_goals_difference \\
1 & 1Weverton Weverton Pereira da Silva T(g)P169050 & Palmeiras & 0.132 & 0.10 & 0.132 & [-0.07, 0.33] & 16,448 & 183 & 105 & 44 & 359.0 & 308.0 & 153.5 \\
2 & 14de Arrasca ... Giorgian Daniel de A ... TP521990 & Flamengo & 0.094 & 0.10 & 0.093 & [-0.10, 0.30] & 9,864 & 142 & 87 & 29 & 225.9 & 205.5 & 106.7 \\
3 & 14Fabricio B ... Fabricio Bruno Soare ... TP389228 & Flamengo & 0.083 & 0.10 & 0.083 & [-0.12, 0.28] & 13,391 & 155 & 81 & 43 & 275.7 & 235.6 & 94.2 \\
4 & 15GUSTAVO GOMEZ Gustavo Raul Gomez P ... TP633571 & Palmeiras & 0.083 & 0.10 & 0.083 & [-0.12, 0.29] & 13,786 & 157 & 86 & 39 & 290.7 & 236.2 & 105.5 \\
5 & 26Murilo Murilo Cerqueira Paim TP392224 & Palmeiras & 0.082 & 0.10 & 0.082 & [-0.12, 0.28] & 8,930 & 102 & 60 & 27 & 202.5 & 167.3 & 96.4 \\
6 & 22Joaquin Pi ... Joaquin Piquerez Moreira RP72

In [10]:
show_stats(df, top=False, show_latex=True)

# & name & team & mean & std & median & [ci_low, ci_high] & total_minutes & total_games & total_wins & total_draws & weighted_score & weighted_goals & weighted_goals_difference \\
1956 & 6Victor Victor Luis Chuab Za ... TP309431 & Botafogo & -0.025 & 0.10 & -0.026 & [-0.22, 0.17] & 6,632 & 104 & 23 & 31 & 67.1 & 74.1 & -32.7 \\
1957 & 16Lucas Lucas Kal Schenfeld  ... RP337242 & América & -0.025 & 0.10 & -0.026 & [-0.22, 0.17] & 7,119 & 94 & 27 & 20 & 89.6 & 78.8 & -34.7 \\
1958 & 2Rodrigo Rodrigo Alves Soares TP342224 & Juventude & -0.026 & 0.10 & -0.026 & [-0.22, 0.16] & 2,788 & 35 & 3 & 11 & 17.9 & 23.0 & -34.4 \\
1959 & 9Pedro Raul Pedro Raul Garay da Silva TP502361 & Ceará & -0.026 & 0.10 & -0.026 & [-0.22, 0.16] & 8,898 & 123 & 27 & 39 & 101.1 & 93.5 & -37.9 \\
1960 & 3Victor Victor Hugo Soares d ... TP355418 & Bahia & -0.028 & 0.10 & -0.027 & [-0.22, 0.16] & 10,710 & 124 & 37 & 27 & 132.6 & 128.9 & -36.8 \\
1961 & 21Shaylon Shaylon Kallyson Cardozo RP421138 & Atlético Goianiense 