In [1]:
#Load packages
import pandas as pd
import numpy as np
from mplsoccer import Pitch, Sbopen
from statsbombpy import sb


In [11]:
#Create list of match IDs
df = sb.matches(competition_id=55, season_id=282)
euros_id = df['match_id'].tolist()

In [5]:
# Load the xT grid data
xT = pd.read_csv('xT_grid.csv')
xT = np.array(xT)
xT_rows, xT_cols = xT.shape

In [9]:
# Initialize the parser
parser = Sbopen()

# List to store xT per player per match
all_match_xT = []

# Loop over each match
for match_id in euros_id:
    # Fetch event data
    df, related, freeze, tactics = parser.event(match_id)

    # Select relevant columns
    df = df[['period', 'minute', 'second', 'team_name', 'x', 'y', 'player_name', 'end_x', 'end_y', 'type_name', 'outcome_name']]

    # Filter for successful passes
    df_pass = df[(df['type_name'] == 'Pass') & (df['outcome_name'].isna())].copy()

    # Create bins
    df_pass['x1_bin'] = pd.cut(df_pass['x'], bins=xT_cols, labels=False)
    df_pass['y1_bin'] = pd.cut(df_pass['y'], bins=xT_rows, labels=False)
    df_pass['x2_bin'] = pd.cut(df_pass['end_x'], bins=xT_cols, labels=False)
    df_pass['y2_bin'] = pd.cut(df_pass['end_y'], bins=xT_rows, labels=False)

    # Calculate xT values
    df_pass['start_zone_value'] = df_pass[['x1_bin', 'y1_bin']].apply(lambda x: xT[x.iloc[1]][x.iloc[0]], axis=1)
    df_pass['end_zone_value'] = df_pass[['x2_bin', 'y2_bin']].apply(lambda x: xT[x.iloc[1]][x.iloc[0]], axis=1)
    df_pass['xT'] = df_pass['end_zone_value'] - df_pass['start_zone_value']

    # Aggregate xT per player
    player_xT = df_pass.groupby('player_name')['xT'].sum().reset_index()

    # Count passes and fouls per player
    pass_counts = df[df['type_name'] == 'Pass'].groupby('player_name').size().reset_index(name='pass_count')
    foul_counts = df[df['type_name'] == 'Foul Won'].groupby('player_name').size().reset_index(name='foul_count')

    # Merge all into one DataFrame
    player_xT = player_xT.merge(pass_counts, on='player_name', how='left')
    player_xT = player_xT.merge(foul_counts, on='player_name', how='left')

    # Append to the list
    all_match_xT.append(player_xT)

# Combine all matches into one DataFrame
final_xT_results = pd.concat(all_match_xT, ignore_index=True)

# Aggregate across all matches
total_xT_per_player = final_xT_results.groupby('player_name', as_index=False)[['xT', 'pass_count']].sum()
total_xT_per_player['xT_per_pass'] = total_xT_per_player['xT'] / total_xT_per_player['pass_count']
total_xT_per_player.head()

Unnamed: 0,player_name,xT,pass_count,xT_per_pass
0,Abdülkerim Bardakcı,0.71418,216,0.003306
1,Adam Buksa,0.041449,10,0.004145
2,Adam Gnezda Čerin,0.433566,165,0.002628
3,Adam Hložek,0.064569,16,0.004036
4,Adam Obert,0.340692,27,0.012618
