## Load Packages & Set Paths

In [6]:
import pandas as pd
import numpy as np
from pathlib import Path
import duckdb

REPO_ROOT = Path().resolve().parents[0]
DB_PATH = REPO_ROOT / "dbt_project" / "dev.duckdb"
print(REPO_ROOT)
print(DB_PATH)

/Users/samharrison/Documents/data_sci/gk_performance_tracker/gk_performance_tracker
/Users/samharrison/Documents/data_sci/gk_performance_tracker/gk_performance_tracker/dbt_project/dev.duckdb


## Connect to `duckdb` DB & Create Query-Function

In [7]:
# Connect to duckdb
con = duckdb.connect(str(DB_PATH), read_only=False)
con.execute("PRAGMA enable_progress_bar=true;")

# Create query-function
def q(sql: str, *params):
    """Run a SQL query and return a pandas DataFrame."""
    return con.execute(sql, params).df()

pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)

# Example usage of query-function
q("""
select *
from stg_matchlogs__parsed
where minutes_played > 0
and competition = 'Premier League' 
limit 100
""")

Unnamed: 0,source_table,goalkeeper,season,match_date,competition,round,venue,result,team,opponent,game_started,minutes_played,gk_shots_on_target_against,gk_goals_against,gk_saves,gk_clean_sheets,gk_psxg,gk_pens_att,gk_pens_allowed,gk_pens_saved,gk_pens_missed,gk_passes_launched,gk_passes_completed_launched,gk_passes,gk_passes_throws,gk_passes_length_avg,gk_goal_kicks,gk_goal_kicks_launched,gk_goal_kick_length_avg,gk_crosses,gk_crosses_stopped,gk_def_actions_outside_pen_area,gk_avg_distance_def_actions
0,aaron_ramsdale_2025_2026,aaron_ramsdale,2025_2026,2025-11-09,Premier League,Matchweek 11,Away,L 1–3,Newcastle Utd,Brentford,N,14,3.0,2.0,1.0,0.0,1.4,1.0,1.0,0.0,0.0,2.0,0.0,3.0,0.0,37.0,3.0,0.999,39.3,6.0,0.0,0.0,
1,aaron_ramsdale_2025_2026,aaron_ramsdale,2025_2026,2025-11-29,Premier League,Matchweek 13,Away,W 4–1,Newcastle Utd,Everton,Y,90,2.0,1.0,1.0,0.0,0.9,0.0,0.0,0.0,0.0,15.0,5.0,31.0,5.0,32.6,5.0,4.000,62.4,18.0,1.0,0.0,4.0
2,aaron_ramsdale_2025_2026,aaron_ramsdale,2025_2026,2025-12-02,Premier League,Matchweek 14,Home,D 2–2,Newcastle Utd,Tottenham,Y,90,2.0,2.0,0.0,0.0,1.4,0.0,0.0,0.0,0.0,8.0,1.0,24.0,3.0,29.3,3.0,3.000,62.3,24.0,1.0,0.0,5.3
3,aaron_ramsdale_2025_2026,aaron_ramsdale,2025_2026,2025-12-06,Premier League,Matchweek 15,Home,W 2–1,Newcastle Utd,Burnley,Y,90,3.0,1.0,2.0,0.0,1.2,1.0,1.0,0.0,0.0,4.0,1.0,32.0,8.0,26.1,5.0,1.000,31.2,14.0,1.0,5.0,17.9
4,aaron_ramsdale_2025_2026,aaron_ramsdale,2025_2026,2025-12-14,Premier League,Matchweek 16,Away,L 0–1,Newcastle Utd,Sunderland,Y,90,1.0,1.0,1.0,0.0,0.2,0.0,0.0,0.0,0.0,14.0,6.0,36.0,7.0,31.7,7.0,3.997,51.3,12.0,1.0,2.0,19.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,caoimhin_kelleher_2025_2026,caoimhin_kelleher,2025_2026,2025-12-27,Premier League,Matchweek 18,Home,W 4–1,Brentford,Bournemouth,Y,90,9.0,1.0,8.0,0.0,1.2,0.0,0.0,0.0,0.0,22.0,10.0,39.0,5.0,37.6,3.0,3.000,61.3,30.0,0.0,0.0,11.0
96,david_raya_2025_2026,david_raya,2025_2026,2025-08-17,Premier League,Matchweek 1,Away,W 1–0,Arsenal,Manchester Utd,Y,90,7.0,0.0,7.0,1.0,0.7,0.0,0.0,0.0,0.0,20.0,5.0,28.0,9.0,34.4,9.0,9.000,64.8,11.0,2.0,0.0,9.2
97,david_raya_2025_2026,david_raya,2025_2026,2025-08-23,Premier League,Matchweek 2,Home,W 5–0,Arsenal,Leeds United,Y,90,1.0,0.0,1.0,1.0,0.1,0.0,0.0,0.0,0.0,7.0,4.0,32.0,1.0,28.8,0.0,,,3.0,1.0,0.0,3.5
98,david_raya_2025_2026,david_raya,2025_2026,2025-08-31,Premier League,Matchweek 3,Away,L 0–1,Arsenal,Liverpool,Y,90,3.0,1.0,2.0,0.0,0.3,0.0,0.0,0.0,0.0,26.0,12.0,49.0,7.0,40.3,2.0,2.000,69.0,6.0,2.0,2.0,17.6


## Data Marts

### a) Goalkeeper Performance Summary Table: `fct_goalkeeper_performance`

In [8]:
# Summarise goalkeeper performacnce
q("""
select *
from fct_goalkeeper_performance
""")

Unnamed: 0,goalkeeper,team,goalkeeper_1,matches_played,clean_sheets,ga,saves,shots_on_target_against,save_pct,psxg_minus_ga,crosses_faced_p90,crosses_stopped_pct,pass_att_p90,long_kick_pass_completion_pct,def_actions_outside_pen_area_p90,avg_distance_def_actions
0,david_raya,Arsenal,david_raya,19,9.0,12.0,31.0,43.0,72.1,-1.1,9.8,11.3,35.7,33.2,2.0,21.511429
1,jordan_pickford,Everton,jordan_pickford,19,8.0,20.0,50.0,71.0,70.4,2.8,16.2,3.2,38.9,36.8,2.1,18.2
2,robert_sanchez,Chelsea,robert_sanchez,19,8.0,19.0,44.0,63.0,69.8,1.2,12.9,13.8,42.9,30.0,1.3,17.17619
3,djordje_petrovic,Bournemouth,djordje_petrovic,19,5.0,35.0,53.0,87.0,60.9,-5.1,14.0,8.3,25.6,31.1,1.9,16.41875
4,bart_verbruggen,Brighton,bart_verbruggen,19,4.0,27.0,54.0,79.0,68.4,0.0,14.0,4.6,45.4,25.0,1.4,16.845833
5,martin_dubravka,Burnley,martin_dubravka,19,3.0,37.0,67.0,101.0,66.3,0.1,20.1,3.5,29.5,31.9,0.8,12.446154
6,dean_henderson,Crystal Palace,dean_henderson,18,7.0,20.0,44.0,64.0,68.8,1.9,16.4,4.5,31.1,24.3,0.7,15.390909
7,robin_roefs,Sunderland,robin_roefs,18,6.0,18.0,61.0,79.0,77.2,2.1,19.8,10.3,40.7,24.8,1.4,17.963636
8,guglielmo_vicario,Tottenham,guglielmo_vicario,18,6.0,23.0,51.0,74.0,68.9,1.1,15.4,4.8,39.0,29.8,2.1,18.923529
9,bernd_leno,Fulham,bernd_leno,18,5.0,26.0,48.0,72.0,66.7,-4.3,15.0,5.3,39.1,25.1,0.6,17.07


### b) Goalkeeper Relative Performance vs. League: `mart_goalkeeper_league_ratings`

Comment re. validation from the Fozcast Top-10

In [9]:
q("""
select *
from mart_goalkeeper_league_ratings
""")

Unnamed: 0,goalkeeper,team,save_pct,psxg_minus_ga,crosses_stopped_pct,pass_att_p90,long_kick_pass_completion_pct,def_actions_outside_pen_area_p90,z_save_pct,z_psxg_minus_ga,z_crosses_stopped_pct,z_pass_att_p90,z_long_kick_pass_completion_pct,z_def_actions_outside_pen_area_p90,pct_save_pct,pct_psxg_minus_ga,pct_crosses_stopped_pct,pct_pass_att_p90,pct_long_kick_pass_completion_pct,pct_def_actions_outside_pen_area_p90,overall_score,overall_rank
0,nick_pope,Newcastle Utd,75.9,-0.4,14.3,25.0,24.7,3.1,1.579903,0.32928,2.421737,-1.61672,-1.360826,2.658583,95.833333,58.333333,100.0,0.0,8.333333,100.0,0.983681,1
1,robert_sanchez,Chelsea,69.8,1.2,13.8,42.9,30.0,1.3,0.678537,1.008209,2.266577,1.349587,-0.135898,0.0,79.166667,79.166667,95.833333,95.833333,50.0,41.666667,0.962444,2
2,robin_roefs,Sunderland,77.2,2.1,10.3,40.7,24.8,1.4,1.771997,1.390106,1.180457,0.985013,-1.337714,0.147699,100.0,95.833333,83.333333,87.5,12.5,58.333333,0.924902,3
3,emiliano_martinez,Aston Villa,72.4,1.2,11.0,42.2,37.1,0.7,1.062725,1.008209,1.397681,1.233586,1.505044,-0.886194,91.666667,79.166667,87.5,91.666667,95.833333,16.666667,0.885067,4
4,jordan_pickford,Everton,70.4,2.8,3.2,38.9,36.8,2.1,0.767195,1.687137,-1.022814,0.686725,1.435709,1.181592,83.333333,100.0,0.0,62.5,87.5,91.666667,0.760143,5
5,david_raya,Arsenal,72.1,-1.1,11.3,35.7,33.2,2.0,1.018396,0.032249,1.490777,0.156435,0.603682,1.033893,87.5,54.166667,91.666667,45.833333,75.0,83.333333,0.740993,6
6,guglielmo_vicario,Tottenham,68.9,1.1,4.8,39.0,29.8,2.1,0.545548,0.965776,-0.526303,0.703296,-0.182121,1.181592,75.0,75.0,45.833333,66.666667,45.833333,91.666667,0.474649,7
7,bart_verbruggen,Brighton,68.4,0.0,4.6,45.4,25.0,1.4,0.471666,0.499012,-0.588367,1.763875,-1.291491,0.147699,66.666667,62.5,37.5,100.0,16.666667,58.333333,0.170806,8
8,dean_henderson,Crystal Palace,68.8,1.9,4.5,31.1,24.3,0.7,0.530772,1.30524,-0.619399,-0.605856,-1.453274,-0.886194,70.833333,91.666667,29.166667,29.166667,0.0,16.666667,-0.030258,9
9,senne_lammens,Manchester Utd,63.3,1.3,4.9,31.3,29.6,0.9,-0.281935,1.050642,-0.495271,-0.572713,-0.228345,-0.590796,29.166667,87.5,50.0,33.333333,41.666667,29.166667,-0.061506,10


## Radar Chart: Example

In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

METRIC_CONFIG = {
    "save_pct": ("Save %", "{:.1f}%"),
    "psxg_minus_ga": ("PSxG - GA", "{:.1f}"),
    "crosses_stopped_pct": ("Cross Stop %", "{:.1f}%"),
    "pass_att_p90": ("Passes Att. (per 90)", ""),
    "long_kick_pass_completion_pct": ("Long Pass Completion %", "{:.1f}%"),
    "def_actions_outside_pen_area_p90": ("Def. Actions OPA (per 90)", "{:.1f}"),
}

METRIC_COLS = list(METRIC_CONFIG)
METRIC_LABELS = {c: METRIC_CONFIG[c][0] for c in METRIC_COLS}
THETA = list(METRIC_LABELS.values())


def get_goalkeeper_data(goalkeeper: str, df: pd.DataFrame) -> tuple[pd.Series, pd.Series, pd.Series]:
    gk_row = df.loc[df["goalkeeper"] == goalkeeper].squeeze()

    raw_values = pd.Series({
        METRIC_LABELS[c]: METRIC_CONFIG[c][1].format(gk_row[c]) if METRIC_CONFIG[c][1] else gk_row[c]
        for c in METRIC_COLS
    })

    z_scores = gk_row[[f"z_{c}" for c in METRIC_COLS]].rename({f"z_{c}": METRIC_LABELS[c] for c in METRIC_COLS})
    percentiles = gk_row[[f"pct_{c}" for c in METRIC_COLS]].rename({f"pct_{c}": METRIC_LABELS[c] for c in METRIC_COLS})

    return raw_values, z_scores, percentiles


def plot_goalkeeper_radar(goalkeepers: str | list[str], df: pd.DataFrame) -> go.Figure:
    fig = go.Figure()
    max_z_score = 0

    for gk in goalkeepers:
        raw_values, z_scores, percentiles = get_goalkeeper_data(gk, df)
        max_z_score = max(abs(z_scores).max(), max_z_score)

        customdata = np.column_stack([
            raw_values.reindex(THETA).to_numpy(),
            percentiles.reindex(THETA).to_numpy(),
        ])

        fig.add_trace(
            go.Scatterpolar(
                r = z_scores.reindex(THETA).to_numpy(),
                theta = THETA,
                fill = "toself",
                name = gk.replace("_", " ").title(),
                customdata = customdata,
                hovertemplate = (
                    "<b>%{theta}</b><br>"
                    "Value: %{customdata[0]}<br>"
                    "Percentile: %{customdata[1]:.0f}th<br>"
                    "Z-score: %{r:.2f}<br>"
                    "<extra></extra>"
                ),
            )
        )

    fig.update_layout(
        polar = dict(radialaxis = dict(visible = True, range = list((-1 * max_z_score, max_z_score)))),
        showlegend = True,
        title = "Goalkeeper Performance",
        width=700,
        height=700
    )
    return fig


# Example usage
df = q("select * from mart_goalkeeper_league_ratings")
fig = plot_goalkeeper_radar(["nick_pope", "alisson", "jose_sa"], df)
fig.show()