In [1]:
import requests
import pandas as pd
import json
import numpy as np
from scipy.spatial.distance import cdist
import os
from datetime import timedelta
from scipy.spatial import Voronoi
from shapely.geometry import Polygon, LineString,Point, box
from kloppy import skillcorner
from kloppy.domain import Team
from kloppy.domain import Orientation
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.ticker import PercentFormatter
from urllib.request import urlopen
from highlight_text import fig_text
from matplotlib.animation import FuncAnimation
import re
import random

from PIL import Image

from mplsoccer import PyPizza, add_image, FontManager


# Setup pitch and plot
from mplsoccer import Pitch
#from mplsoccer.pitch import Pitch ,VerticalPitch

# username = "XXX"
# password = "XXX"


# from skillcorner.client import SkillcornerClient
# client=SkillcornerClient(username=username,password=password)

def time_to_seconds(time_str):
    if time_str is None:
        return 90 * 60  # 120 minutes = 7200 seconds
    h, m, s = map(int, time_str.split(':'))
    return h * 3600 + m * 60 + s

font_normal = FontManager('https://raw.githubusercontent.com/googlefonts/roboto/main/'
                          'src/hinted/Roboto-Regular.ttf')
font_italic = FontManager('https://raw.githubusercontent.com/googlefonts/roboto/main/'
                          'src/hinted/Roboto-Italic.ttf')
font_bold = FontManager('https://raw.githubusercontent.com/google/fonts/main/apache/robotoslab/'
                        'RobotoSlab[wght].ttf')

In [2]:
def load_matches(matches_json_path):

    with open(matches_json_path, "r") as f:
        matches_json = json.load(f)

    match_ids = [match["id"] for match in matches_json]

    all_tracking = []

    for match_id in match_ids:
        tracking_data_github_url = f'https://media.githubusercontent.com/media/SkillCorner/opendata/741bdb798b0c1835057e3fa77244c1571a00e4aa/data/matches/{match_id}/{match_id}_tracking_extrapolated.jsonl'
        
        dataset = skillcorner.load(
            meta_data=f'https://raw.githubusercontent.com/SkillCorner/opendata/741bdb798b0c1835057e3fa77244c1571a00e4aa/data/matches/{match_id}/{match_id}_match.json',
            raw_data=tracking_data_github_url,
            # Optional arguments,
            coordinates="skillcorner",
            include_empty_frames=False)
        
        dataset.transform(to_orientation = Orientation.STATIC_HOME_AWAY)

        all_tracking.append(dataset)

    all_de_dfs = []

    for match_id in match_ids:
        url = f"https://raw.githubusercontent.com/SkillCorner/opendata/master/data/matches/{match_id}/{match_id}_dynamic_events.csv"
        try:
            de_match = pd.read_csv(url)
            all_de_dfs.append(de_match)
        except Exception as e:
            print(f"Failed to load dynamic events for match {match_id}: {e}")

    de_all_matches = pd.concat(all_de_dfs, ignore_index=True)

    all_metadata = []

    for match_id in match_ids:
        metadata_match = f'https://raw.githubusercontent.com/SkillCorner/opendata/741bdb798b0c1835057e3fa77244c1571a00e4aa/data/matches/{match_id}/{match_id}_match.json'
        response = requests.get(metadata_match)
        raw_match_data = response.json()
        all_metadata.append(raw_match_data)

    
    return all_tracking, de_all_matches, all_metadata
        

In [3]:
# Load matches

matches_json_path = os.path.join(os.path.dirname(os.getcwd()), "data/matches.json")
all_tracking, de_all_matches, all_metadata = load_matches(matches_json_path)

  de_match = pd.read_csv(url)
  de_match = pd.read_csv(url)


In [4]:
def midfielders_obe(de_all_matches):
    # Get off-ball events
    off_ball_events = de_all_matches[de_all_matches["event_type_id"] == 1]

    # Get only off ball events from midfielders
    positions_mid = [9,10,11,12,13,14,15]
    mid_obe = off_ball_events[off_ball_events["player_position_id"].isin(positions_mid)].copy()

    # For every obe, column id equals event_id_match_id
    mid_obe["id"] = mid_obe["event_id"].astype(str) + "_" + mid_obe["match_id"].astype(str)
    mid_obe = mid_obe.reset_index(drop=True)

    # Data matching
    mid_obe = mid_obe[
            (mid_obe["is_player_possession_start_matched"] == True) &
            (mid_obe["is_player_possession_end_matched"] == True)
        ]
    return mid_obe

In [5]:
mid_obe = midfielders_obe(de_all_matches)

In [6]:
def space_occupation(row, de_all_matches, all_tracking):
    # Function to calculate space occupation for a given off-ball event row
    # First we indetify if the coordinates of the player in the start_frame are 5m away from the peak_passing_option_frame
    match_id = row["match_id"]
    event_id = row["event_id"]
    start_frame = row["frame_start"]
    peak_passing_option_frame = row["peak_passing_option_frame"]
    # Get the tracking data for the specific match
    # Find the match in all_tracking
    match = next((m for m in all_tracking if int(m.metadata.game_id) == int(row.match_id)), None)
    if not match:
            raise ValueError(f"Match {row.match_id} not found in all_tracking")
        
    # Find the start frame in the match frames
    start_frame = next((f for f in match.frames if int(f.frame_id) == int(row.frame_start)), None)
    if not start_frame:
        print(f"Start frame {row.frame_start} not found in match {row.match_id}")
        return None  # Skip if start frame not found
        
    peak_frame = next((f for f in match.frames if int(f.frame_id) == int(row.peak_passing_option_frame)), None)
    if not peak_frame:
        print(f"Peak passing option frame {row.peak_passing_option_frame} not found in match {row.match_id}")
        return None  # Skip if peak passing option frame not found

    # Get player coordinates
    player_coord = None
    player_team = None
    for player, coord in start_frame.players_coordinates.items():
        if player.player_id == str(row.player_id):
            player_coord = np.array([coord.x, coord.y])
            player_team = player.team.team_id
            break
    if player_coord is None:
        return None

    # Calculate distance to peak passing option frame
    peak_coord = None
    for player, coord in peak_frame.players_coordinates.items():
        if player.player_id == str(row.player_id):
            peak_coord = np.array([coord.x, coord.y])
            break
    if peak_coord is None:
        return None

    distance = np.linalg.norm(peak_coord - player_coord)
    if distance < 5:
        print(f"Player {row.player_id} wasn't at least 5m away from peak passing option in match {row.match_id}")
        return None  # Skip if not at least 5m away

    # Get all player coordinates at start frame
    opponents_start = np.array([[c.x, c.y] for p, c in start_frame.players_coordinates.items()
                                    if p.team.team_id != player_team])
    if len(opponents_start) > 0:
        distances_start = cdist([player_coord], opponents_start).flatten()
        close_start = distances_start[distances_start <= 5]
        avg_distance_start = close_start.mean() if len(close_start) > 0 else np.nan
    else:
        avg_distance_start = np.nan
    
    # Get all player coordinates at peak passing option frame
    opponents_peak = np.array([[c.x, c.y] for p, c in peak_frame.players_coordinates.items()
                                    if p.team.team_id != player_team])
    if len(opponents_peak) > 0:
        distances_peak = cdist([peak_coord], opponents_peak).flatten()
        close_peak = distances_peak[distances_peak <= 5]
        avg_distance_peak = close_peak.mean() if len(close_peak) > 0 else np.nan
    else:
        avg_distance_peak = np.nan
    
    space_occupation_value = avg_distance_peak - avg_distance_start

    if row["dangerous"] == True and row["difficult_pass_target"] == False:
        return space_occupation_value
    else:
        return None