In [1]:
from statsbombpy import sb
import pandas as pd
import numpy as np
from shapely.geometry import LineString
import json

def is_progressive_pass(row):
    goal_x = 120
    start_dist = goal_x - row['startx']
    end_dist = goal_x - row['endx']
    return end_dist <= 0.9 * start_dist

def is_progressive_carry(row):
    goal_x = 120
    start_dist = goal_x - row['startx']
    end_dist = goal_x - row['carryendx']
    return end_dist <= 0.9 * start_dist

def detect_line_breaking_pass(row):
    if row['type'] != 'Pass':
        return False
    if not isinstance(row.get('freeze_frame'), list) or len(row['freeze_frame']) < 2:
        return False
    try:
        pass_line = LineString([(row['startx'], row['starty']), (row['endx'], row['endy'])])
    except:
        return False
    defenders = [
        ff['location'] for ff in row['freeze_frame']
        if not ff.get('teammate', True) and not ff.get('keeper', False)
    ]
    for i in range(len(defenders)):
        for j in range(i + 1, len(defenders)):
            def_line = LineString([tuple(defenders[i]), tuple(defenders[j])])
            if pass_line.intersects(def_line):
                return True
    return False

matches = sb.matches(competition_id=53, season_id=315)
match_ids = matches['match_id'].tolist()

all_data = []

for match_id in match_ids:
    print(f"Processing match {match_id}...")
    
    df = sb.events(match_id=match_id)
    
    df = df[['id', 'index', 'minute','second', 'duration','timestamp', 'period',
             'possession_team','team', 'play_pattern', 'player','position', 'player_id',
             'pass_recipient','pass_recipient_id', 'type', 'under_pressure','location',
             'pass_end_location','carry_end_location','pass_outcome','pass_height',
             'pass_length','pass_angle','pass_type','shot_outcome']].copy()
    
    df = df.sort_values(by=['period', 'timestamp']).reset_index(drop=True)
    
    df[['startx', 'starty']] = df['location'].apply(
        lambda x: x if isinstance(x, list) and len(x) == 2 else [None, None]
    ).apply(pd.Series)
    
    df[['endx', 'endy']] = df['pass_end_location'].apply(
        lambda x: x if isinstance(x, list) and len(x) == 2 else [None, None]
    ).apply(pd.Series)
    
    df[['carryendx', 'carryendy']] = df['carry_end_location'].apply(
        lambda x: x if isinstance(x, list) and len(x) == 2 else [None, None]
    ).apply(pd.Series)
    
    df.drop(columns=['location', 'pass_end_location', 'carry_end_location'], inplace=True)
    
    df['progressive_pass'] = df.apply(lambda r: is_progressive_pass(r) if r['type'] == 'Pass' else False, axis=1)
    df['progressive_carry'] = df.apply(lambda r: is_progressive_carry(r) if r['type'] == 'Carry' else False, axis=1)
    
    df = df[df['type'].isin(['Pass', 'Carry'])]
    
    df = df[df['position'].str.contains('Midfield', case=False, na=False)]
    
    df['forward_pass'] = (df['type'] == 'Pass') & (df['endx'] > df['startx'])
    df['forward_carry'] = (df['type'] == 'Carry') & (df['carryendx'] > df['startx'])
    
    try:
        with open(f"{match_id}.json", encoding='utf-8') as f:
            json_data = json.load(f)
        json_df = pd.json_normalize(json_data, sep='_')
        df = df.merge(json_df, how='left', left_on='id', right_on='event_uuid')
    except FileNotFoundError:
        pass  
    
    df['line_breaking'] = df.apply(detect_line_breaking_pass, axis=1)
    
    # Add match_id for reference
    df['match_id'] = match_id
    
    all_data.append(df)

final_df = pd.concat(all_data, ignore_index=True)

final_df.to_csv("competition_53_season_315_all_matches.csv", index=False)
print("Saved to competition_53_season_315_all_matches.csv")




Processing match 4020846...




Processing match 4020077...




Processing match 4020005...




Processing match 4018357...




Processing match 4018356...




Processing match 4018355...




Processing match 4018354...




Processing match 3998858...




Processing match 3998859...




Processing match 3998857...




Processing match 3998856...




Processing match 3998855...




Processing match 3998854...




Processing match 3998852...




Processing match 3998853...




Processing match 3998851...




Processing match 3998850...




Processing match 3998848...




Processing match 3998849...




Processing match 3998846...




Processing match 3998847...




Processing match 3998844...




Processing match 3998845...




Processing match 3998837...




Processing match 3998842...




Processing match 3998843...




Processing match 3998840...




Processing match 3998839...




Processing match 3998838...




Processing match 3998836...




Processing match 3998841...




Saved to competition_53_season_315_all_matches.csv
