In [2]:
import matplotlib.pyplot as plt
from mplsoccer import Pitch, Sbopen, VerticalPitch
import pandas as pd

In [3]:
# opening the dataset
parser = Sbopen()
matches_df = parser.match(competition_id=72, season_id=30)
matches_df.head()

team = "England Women's"

# get a list of match ids for the team
match_ids = matches_df.loc[(matches_df['home_team_name'] == team) | (matches_df['away_team_name'] == team)]['match_id'].tolist()
no_games = len(match_ids)
print(f"Number of games for {team}: {no_games}")

Number of games for England Women's: 7


In [15]:
"""
The context for this heat map is that we'll look at dangerous passes.
Those are passes that lead to a shot within a certain time window.
"""

dangerous_passes_df = pd.DataFrame()

for match_id in match_ids:
    df = parser.event(match_id)[0]  # event data is at index 0
    for period in [1, 2]:
        # only keep accurate passes that were not set pieces in this period
        pass_mask = (df.team_name == team) & (df.type_name == "Pass") & (df.outcome_name.isnull()) & (df.period == period) & (df.sub_type_name.isnull())
        passes_df = df.loc[pass_mask, ["x", "y", "end_x", "end_y", "minute", "second", "player_name"]]

        # keep shots in this period
        shot_mask = (df.team_name == team) & (df.type_name == "Shot") & (df.period == period)
        shots_df = df.loc[shot_mask, ["minute", "second"]]

        # create a pandas Series with the pass times converted to seconds
        pass_times = passes_df["minute"] * 60 + passes_df["second"]

        # create a pandas Series with the shot times in seconds (pass window end)
        pass_window_end = shots_df["minute"] * 60 + shots_df["second"]

        # find start of window before shot (for example 15 seconds before)
        shot_window = 15
        pass_window_start = pass_window_end - shot_window

        # condition to avoid negative window start
        pass_window_start = pass_window_start.apply(lambda i: i if i > 0 else (period - 1) * 45)

        # collect passes that are within the pass window
        pass_to_shot = pass_times.apply(lambda x: True in ((x >= pass_window_start) & (x <= pass_window_end)).unique())

        # get passes from df
        dangerous_passes_period = passes_df.loc[pass_to_shot]

        # add to dangerous passes df
        dangerous_passes_df = pd.concat([dangerous_passes_df, dangerous_passes_period], ignore_index=True)

dangerous_passes_df.head()

Unnamed: 0,x,y,end_x,end_y,minute,second,player_name
0,114.6,16.4,111.3,7.1,22,40,Ellen White
1,32.9,70.0,42.0,68.5,30,26,Stephanie Houghton
2,41.7,71.0,45.1,76.5,30,27,Jill Scott
3,53.9,76.2,67.0,62.0,30,30,Nikita Parris
4,70.4,54.9,85.1,66.2,30,32,Jill Scott
