## Load Packages & Set Paths

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import duckdb

REPO_ROOT = Path().resolve().parents[0]
DB_PATH = REPO_ROOT / "dbt_project" / "dev.duckdb"
print(REPO_ROOT)
print(DB_PATH)

/Users/samharrison/Documents/data_sci/gk_performance_tracker/gk_performance_tracker
/Users/samharrison/Documents/data_sci/gk_performance_tracker/gk_performance_tracker/dbt_project/dev.duckdb


## Connect to `duckdb` DB & Create Query-Function

In [2]:
# Connect to duckdb
con = duckdb.connect(str(DB_PATH), read_only=False)
con.execute("PRAGMA enable_progress_bar=true;")

# Create query-function
def q(sql: str, *params):
    """Run a SQL query and return a pandas DataFrame."""
    return con.execute(sql, params).df()

pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)

# Example usage of query-function
q("""
select *
from stg_matchlogs__parsed
where minutes_played > 0
and competition = 'Premier League' 
limit 100
""")

Unnamed: 0,source_table,goalkeeper,season,match_date,competition,round,venue,result,team,opponent,game_started,minutes_played,gk_shots_on_target_against,gk_goals_against,gk_saves,gk_clean_sheets,gk_psxg,gk_pens_att,gk_pens_allowed,gk_pens_saved,gk_pens_missed,gk_passes_launched,gk_passes_completed_launched,gk_passes,gk_passes_throws,gk_passes_length_avg,gk_goal_kicks,gk_goal_kicks_launched,gk_goal_kick_length_avg,gk_crosses,gk_crosses_stopped,gk_def_actions_outside_pen_area,gk_avg_distance_def_actions
0,aaron_ramsdale_2025_2026,aaron_ramsdale,2025_2026,2025-11-09,Premier League,Matchweek 11,Away,L 1–3,Newcastle Utd,Brentford,N,14,3.0,2.0,1.0,0.0,1.4,1.0,1.0,0.0,0.0,2.0,0.0,3.0,0.0,37.0,3.0,0.999,39.3,6.0,0.0,0.0,
1,aaron_ramsdale_2025_2026,aaron_ramsdale,2025_2026,2025-11-29,Premier League,Matchweek 13,Away,W 4–1,Newcastle Utd,Everton,Y,90,2.0,1.0,1.0,0.0,0.9,0.0,0.0,0.0,0.0,15.0,5.0,31.0,5.0,32.6,5.0,4.000,62.4,18.0,1.0,0.0,4.0
2,aaron_ramsdale_2025_2026,aaron_ramsdale,2025_2026,2025-12-02,Premier League,Matchweek 14,Home,D 2–2,Newcastle Utd,Tottenham,Y,90,2.0,2.0,0.0,0.0,1.4,0.0,0.0,0.0,0.0,8.0,1.0,24.0,3.0,29.3,3.0,3.000,62.3,24.0,1.0,0.0,5.3
3,aaron_ramsdale_2025_2026,aaron_ramsdale,2025_2026,2025-12-06,Premier League,Matchweek 15,Home,W 2–1,Newcastle Utd,Burnley,Y,90,3.0,1.0,2.0,0.0,1.2,1.0,1.0,0.0,0.0,4.0,1.0,32.0,8.0,26.1,5.0,1.000,31.2,14.0,1.0,5.0,17.9
4,aaron_ramsdale_2025_2026,aaron_ramsdale,2025_2026,2025-12-14,Premier League,Matchweek 16,Away,L 0–1,Newcastle Utd,Sunderland,Y,90,1.0,1.0,1.0,0.0,0.2,0.0,0.0,0.0,0.0,14.0,6.0,36.0,7.0,31.7,7.0,3.997,51.3,12.0,1.0,2.0,19.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,david_raya_2025_2026,david_raya,2025_2026,2025-10-26,Premier League,Matchweek 9,Home,W 1–0,Arsenal,Crystal Palace,Y,90,1.0,0.0,1.0,1.0,0.5,0.0,0.0,0.0,0.0,21.0,7.0,37.0,3.0,40.8,5.0,2.000,41.2,9.0,1.0,4.0,20.1
96,david_raya_2025_2026,david_raya,2025_2026,2025-11-01,Premier League,Matchweek 10,Away,W 2–0,Arsenal,Burnley,Y,90,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,14.0,1.0,31.0,4.0,38.2,0.0,,,6.0,1.0,1.0,18.0
97,david_raya_2025_2026,david_raya,2025_2026,2025-11-08,Premier League,Matchweek 11,Away,D 2–2,Arsenal,Sunderland,Y,90,2.0,2.0,0.0,0.0,0.8,0.0,0.0,0.0,0.0,14.0,4.0,22.0,2.0,41.5,2.0,2.000,72.0,10.0,0.0,0.0,
98,david_raya_2025_2026,david_raya,2025_2026,2025-11-23,Premier League,Matchweek 12,Home,W 4–1,Arsenal,Tottenham,Y,90,2.0,1.0,1.0,0.0,0.1,0.0,0.0,0.0,0.0,14.0,8.0,19.0,2.0,35.9,6.0,4.998,59.0,6.0,2.0,0.0,7.7


## Template Dashboard Views

### a) Goalkeeper Performance Summary Table: `fct_goalkeeper_performance`

In [3]:
# Summarise goalkeeper performacnce
q("""
select *
from fct_goalkeeper_performance
""")

Unnamed: 0,goalkeeper,team,matches_played,clean_sheets,ga,saves,shots_on_target_against,save_pct,pxsg_minus_ga,crosses_faced_p90,crosses_stopped_pct,pass_att_p90,long_kick_pass_completion_pct,def_actions_outside_pen_area_p90,avg_distance_def_actions
0,david_raya,Arsenal,17,9.0,10.0,27.0,37.0,73.0,-1.6,9.8,11.3,36.0,34.2,2.0,22.066667
1,robert_sanchez,Chelsea,17,8.0,15.0,34.0,49.0,69.4,0.6,13.3,14.6,42.5,29.6,1.4,17.395
2,dean_henderson,Crystal Palace,17,7.0,19.0,41.0,60.0,68.3,1.9,16.8,4.3,31.1,24.1,0.7,15.43
3,jordan_pickford,Everton,17,6.0,20.0,44.0,65.0,67.7,1.9,14.3,3.2,38.0,35.8,2.2,18.405882
4,robin_roefs,Sunderland,17,6.0,17.0,57.0,74.0,77.0,1.9,19.5,11.0,40.8,24.9,1.4,17.963636
5,djordje_petrovic,Bournemouth,17,5.0,29.0,49.0,78.0,62.8,-4.5,13.8,8.5,26.5,29.7,1.8,16.977778
6,guglielmo_vicario,Tottenham,17,5.0,23.0,49.0,72.0,68.1,1.0,14.8,4.0,39.4,27.5,2.2,19.187879
7,bart_verbruggen,Brighton,17,4.0,23.0,47.0,69.0,68.1,0.5,13.9,3.8,44.9,24.0,1.3,17.515
8,caoimhin_kelleher,Brentford,17,3.0,25.0,41.0,66.0,62.1,-3.2,16.3,4.8,35.8,36.0,1.5,17.347826
9,martin_dubravka,Burnley,17,2.0,34.0,57.0,88.0,64.8,-0.6,20.7,3.5,29.3,31.2,0.5,12.085714


### a) Goalkeeper Relative Performance vs. League: `mart_goalkeeper_zscores`

In [4]:
q("""
select *
from mart_goalkeeper_zscores
""")

Unnamed: 0,goalkeeper,team,save_pct,pxsg_minus_ga,crosses_stopped_pct,pass_att_p90,long_kick_pass_completion_pct,def_actions_outside_pen_area_p90,z_save_pct,z_pxsg_minus_ga,z_crosses_stopped_pct,z_pass_att_p90,z_long_kick_pass_completion_pct,z_def_actions_outside_pen_area_p90,overall_score,rank
0,emiliano_martinez,Aston Villa,77.1,2.1,11.3,42.1,34.9,0.8,1.521447,1.484107,1.359459,1.265838,1.005274,-0.764241,1.097895,1
1,robert_sanchez,Chelsea,69.4,0.6,14.6,42.5,29.6,1.4,0.611038,0.817592,2.310735,1.335313,-0.193635,0.066456,0.909566,2
2,robin_roefs,Sunderland,77.0,1.9,11.0,40.8,24.9,1.4,1.509623,1.395239,1.27298,1.040043,-1.256819,0.066456,0.890298,3
3,nick_pope,Newcastle Utd,76.5,0.0,15.1,25.4,23.8,3.4,1.450506,0.550986,2.454867,-1.634751,-1.505649,2.835445,0.888326,4
4,david_raya,Arsenal,73.0,-1.6,11.3,36.0,34.2,2.0,1.036683,-0.159964,1.359459,0.206341,0.846927,0.897152,0.63428,5
5,jordan_pickford,Everton,67.7,1.9,3.2,38.0,35.8,2.2,0.410039,1.395239,-0.97549,0.553717,1.208862,1.174051,0.529382,6
6,guglielmo_vicario,Tottenham,68.1,1.0,4.0,39.4,27.5,2.2,0.457333,0.995329,-0.744878,0.79688,-0.668675,1.174051,0.321549,7
7,bart_verbruggen,Brighton,68.1,0.5,3.8,44.9,24.0,1.3,0.457333,0.773158,-0.802531,1.752164,-1.460407,-0.071994,0.146226,8
8,gianluigi_donnarumma,Manchester City,66.7,1.2,4.4,27.6,36.4,0.4,0.291804,1.084198,-0.629571,-1.252637,1.344588,-1.318039,0.080887,9
9,dean_henderson,Crystal Palace,68.3,1.9,4.3,31.1,24.1,0.7,0.48098,1.395239,-0.658398,-0.64473,-1.437786,-0.90269,0.014805,10
