In [37]:
import requests
import pandas as pd
import json
import numpy as np
from scipy.spatial.distance import cdist
import os
from datetime import timedelta
from kloppy import skillcorner
from kloppy.domain import Team
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.ticker import PercentFormatter
from urllib.request import urlopen
from highlight_text import fig_text
from matplotlib.animation import FuncAnimation

from PIL import Image

from mplsoccer import PyPizza, add_image, FontManager


# Setup pitch and plot
from mplsoccer import Pitch
#from mplsoccer.pitch import Pitch ,VerticalPitch

# username = "XXX"
# password = "XXX"


# from skillcorner.client import SkillcornerClient
# client=SkillcornerClient(username=username,password=password)

def time_to_seconds(time_str):
    if time_str is None:
        return 90 * 60  # 120 minutes = 7200 seconds
    h, m, s = map(int, time_str.split(':'))
    return h * 3600 + m * 60 + s

font_normal = FontManager('https://raw.githubusercontent.com/googlefonts/roboto/main/'
                          'src/hinted/Roboto-Regular.ttf')
font_italic = FontManager('https://raw.githubusercontent.com/googlefonts/roboto/main/'
                          'src/hinted/Roboto-Italic.ttf')
font_bold = FontManager('https://raw.githubusercontent.com/google/fonts/main/apache/robotoslab/'
                        'RobotoSlab[wght].ttf')

In [3]:
def load_matches(matches_json_path):

    with open(matches_json_path, "r") as f:
        matches_json = json.load(f)

    match_ids = [match["id"] for match in matches_json]

    all_tracking = []

    for match_id in match_ids:
        tracking_data_github_url = f'https://media.githubusercontent.com/media/SkillCorner/opendata/741bdb798b0c1835057e3fa77244c1571a00e4aa/data/matches/{match_id}/{match_id}_tracking_extrapolated.jsonl'
        
        dataset = skillcorner.load(
            meta_data=f'https://raw.githubusercontent.com/SkillCorner/opendata/741bdb798b0c1835057e3fa77244c1571a00e4aa/data/matches/{match_id}/{match_id}_match.json',
            raw_data=tracking_data_github_url,
            # Optional arguments,
            coordinates="skillcorner",
            include_empty_frames=False)

        all_tracking.append(dataset)

    all_de_dfs = []

    for match_id in match_ids:
        url = f"https://raw.githubusercontent.com/SkillCorner/opendata/master/data/matches/{match_id}/{match_id}_dynamic_events.csv"
        try:
            de_match = pd.read_csv(url)
            all_de_dfs.append(de_match)
        except Exception as e:
            print(f"Failed to load dynamic events for match {match_id}: {e}")

    de_all_matches = pd.concat(all_de_dfs, ignore_index=True)
    
    return all_tracking, de_all_matches
        

In [4]:
# Load matches

matches_json_path = os.path.join(os.path.dirname(os.getcwd()), "data/matches.json")
all_tracking, de_all_matches = load_matches(matches_json_path)

  de_match = pd.read_csv(url)
  de_match = pd.read_csv(url)


How can a midfielder create space?
- He moves to another location and his origin location is free.
- Moving to another location creates a pass line.
- Dragging defenders
- pushing defensive line with in behind runs


In [42]:
def midfielders_obe(de_all_matches):
    # Get off-ball events
    off_ball_events = de_all_matches[de_all_matches["event_type_id"] == 1]

    # Get only off ball events from midfielders
    positions_mid = [9,10,11,12,13,14,15]
    mid_obe = off_ball_events[off_ball_events["player_position_id"].isin(positions_mid)].copy()

    # For every obe, column id equals event_id_match_id
    mid_obe["id"] = mid_obe["event_id"].astype(str) + "_" + mid_obe["match_id"].astype(str)
    mid_obe = mid_obe.reset_index(drop=True)

    # Data matching
    mid_obe = mid_obe[
            (mid_obe["is_player_possession_start_matched"] == True) &
            (mid_obe["is_player_possession_end_matched"] == True)
        ]
    return mid_obe

def space_creation(mid_obe, all_tracking):

    for row in mid_obe.itertuples():
        # Find the match in all_tracking
        match = next((m for m in all_tracking if int(m.metadata.game_id) == int(row.match_id)), None)
        if not match:
                raise ValueError(f"Match {row.match_id} not found in all_tracking")
        
        # Find the start frame in the match frames
        start_frame = next((f for f in match.frames if int(f.frame_id) == int(row.frame_start)), None)
        if not start_frame:
            print(f"Start frame {row.frame_start} not found in match {row.match_id}")
            continue  # Skip if start frame not found
        
        end_frame = next((f for f in match.frames if int(f.frame_id) == int(row.frame_end)), None)
        if not end_frame:
            print(f"End frame {row.frame_end} not found in match {row.match_id}")
            continue  # Skip if end frame not found
        
        # Get player coordinates
        player_coord = None
        player_team = None
        for player, coord in start_frame.players_coordinates.items():
            if player.player_id == str(row.player_id):
                player_coord = np.array([coord.x, coord.y])
                player_team = player.team.team_id
                break

        if player_coord is None:
            continue

        # --- Start frame distances ---
        opponents_start = np.array([[c.x, c.y] for p, c in start_frame.players_coordinates.items()
                                    if p.team.team_id != player_team])
        if len(opponents_start) > 0:
            distances_start = cdist([player_coord], opponents_start).flatten()
            close_start = distances_start[distances_start <= 5]
            avg_distance_start = close_start.mean() if len(close_start) > 0 else np.nan
        else:
            avg_distance_start = np.nan

        # --- End frame distances ---
        opponents_end = np.array([[c.x, c.y] for p, c in end_frame.players_coordinates.items()
                                if p.team.team_id != player_team])
        if len(opponents_end) > 0:
            distances_end = cdist([player_coord], opponents_end).flatten()
            close_end = distances_end[distances_end <= 5]
            avg_distance_end = close_end.mean() if len(close_end) > 0 else np.nan
        else:
            avg_distance_end = np.nan

        # Store results in mid_obe DataFrame
        mid_obe.at[row.Index, 'def_density_change'] = avg_distance_end - avg_distance_start
        
    return mid_obe

In [43]:
mid_obe = midfielders_obe(de_all_matches)
mid_obe = space_creation(mid_obe, all_tracking)

Start frame 2523 not found in match 2017461
Start frame 2529 not found in match 2017461
End frame 54489 not found in match 2015213
End frame 30255 not found in match 2013725
End frame 55652 not found in match 2013725
End frame 18801 not found in match 2011166
End frame 44696 not found in match 2011166
End frame 44694 not found in match 2011166
End frame 930 not found in match 2006229
End frame 32272 not found in match 2006229
End frame 34131 not found in match 2006229
Start frame 52029 not found in match 2006229
End frame 35966 not found in match 1996435
End frame 43171 not found in match 1996435
End frame 53595 not found in match 1996435
End frame 5481 not found in match 1953632
End frame 8632 not found in match 1953632
End frame 15468 not found in match 1953632
End frame 30795 not found in match 1953632
End frame 40608 not found in match 1953632
End frame 11177 not found in match 1925299
End frame 30128 not found in match 1925299
End frame 30136 not found in match 1925299
End frame 8

In [None]:
# NEXT STEP : CALCULATE AVERAGE DEFENSIVE DENSITY CHANGE PER PLAYER PER90MIN