In [58]:
import sys
import os
import pandas as pd

# Add the project root to the Python path
sys.path.append(os.path.join(os.getcwd(), '..'))

# Import the project modules
from src.config import setup_logging
from src.extract import fetch_statsbomb_event_data
from src.transform import transform_to_shot_events
from src.stats import calculate_shots_stats

# Reload modules when code is changed (uncomment for development)
%load_ext autoreload
%autoreload 2

# Init logging
logger = setup_logging(log_file="../logs/chance_creation.log")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [59]:
events = fetch_statsbomb_event_data()
list(events.columns)

2025-10-04 00:38:34,835 - src.extract.statsbomb_data - INFO - Fetching StatsBomb event data for Europe - UEFA Euro - 2024 - male
2025-10-04 00:38:47,383 - src.extract.statsbomb_data - INFO - Found 187858 events!


['50_50',
 'bad_behaviour_card',
 'ball_receipt_outcome',
 'ball_recovery_offensive',
 'ball_recovery_recovery_failure',
 'block_deflection',
 'block_offensive',
 'block_save_block',
 'carry_end_location',
 'clearance_aerial_won',
 'clearance_body_part',
 'clearance_head',
 'clearance_left_foot',
 'clearance_other',
 'clearance_right_foot',
 'counterpress',
 'dribble_no_touch',
 'dribble_nutmeg',
 'dribble_outcome',
 'dribble_overrun',
 'duel_outcome',
 'duel_type',
 'duration',
 'foul_committed_advantage',
 'foul_committed_card',
 'foul_committed_offensive',
 'foul_committed_penalty',
 'foul_committed_type',
 'foul_won_advantage',
 'foul_won_defensive',
 'foul_won_penalty',
 'goalkeeper_body_part',
 'goalkeeper_end_location',
 'goalkeeper_outcome',
 'goalkeeper_penalty_saved_to_post',
 'goalkeeper_position',
 'goalkeeper_punched_out',
 'goalkeeper_shot_saved_off_target',
 'goalkeeper_shot_saved_to_post',
 'goalkeeper_success_in_play',
 'goalkeeper_technique',
 'goalkeeper_type',
 'id'

In [60]:
shots = transform_to_shot_events(events)
shots.info()

2025-10-04 00:38:48,172 - src.transform.shot_events - INFO - Transforming 187858 records from events data to shot events.
2025-10-04 00:38:49,860 - src.transform.shot_events - INFO - Transformed 1340 records from events data to shot events.
2025-10-04 00:38:49,865 - src.transform.shot_events - INFO - Shots from set piece: 696
2025-10-04 00:38:49,868 - src.transform.shot_events - INFO - Shots from open play: 644


<class 'pandas.core.frame.DataFrame'>
Index: 1340 entries, 184643 to 185982
Data columns (total 21 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   match_id               1340 non-null   int64  
 1   team                   1340 non-null   object 
 2   player                 1340 non-null   object 
 3   location               1340 non-null   object 
 4   timestamp              1340 non-null   object 
 5   possession             1340 non-null   int64  
 6   type                   1340 non-null   object 
 7   shot_from_set_piece    1340 non-null   bool   
 8   shot_type              1340 non-null   object 
 9   shot_aerial_won        133 non-null    object 
 10  shot_body_part         1340 non-null   object 
 11  shot_end_location      1340 non-null   object 
 12  shot_first_time        380 non-null    object 
 13  shot_follows_dribble   1 non-null      object 
 14  shot_one_on_one        53 non-null     object 
 15  sh

In [61]:
shots.head()

Unnamed: 0,match_id,team,player,location,timestamp,possession,type,shot_from_set_piece,shot_type,shot_aerial_won,...,shot_end_location,shot_first_time,shot_follows_dribble,shot_one_on_one,shot_outcome,shot_redirect,shot_saved_off_target,shot_saved_to_post,shot_statsbomb_xg,shot_technique
184643,3942819,Netherlands,Xavi Simons,"[99.6, 51.2]",00:06:41.445,11,Shot,False,Open Play,,...,"[120.0, 37.4, 2.0]",,,,Goal,,,,0.048935,Normal
184644,3942819,England,Harry Kane,"[88.2, 36.6]",00:12:39.772,17,Shot,False,Open Play,,...,"[118.3, 37.6, 0.3]",,,,Saved,,,,0.028932,Normal
184645,3942819,England,Bukayo Saka,"[104.5, 41.6]",00:13:41.900,18,Shot,True,Open Play,,...,"[106.2, 41.7]",,,,Blocked,,,,0.07175,Normal
184646,3942819,England,Harry Kane,"[108.1, 40.0]",00:17:34.626,19,Shot,False,Penalty,,...,"[120.0, 36.5, 0.2]",,,,Goal,,,,0.7835,Normal
184647,3942819,England,Phil Foden,"[117.0, 47.6]",00:22:28.596,27,Shot,False,Open Play,,...,"[117.7, 46.1, 0.3]",,,,Saved,,,,0.188995,Normal


In [71]:
shot_stats = calculate_shots_stats(shots)
shot_stats.sort_values(by="xg_from_open_play_percentage", ascending=False).head(30)

2025-10-04 00:50:58,739 - src.stats.shots - INFO - Calculating statistics for shots.


Unnamed: 0,team,shots_from_set_piece,shots_from_open_play,shots_from_set_piece_percentage,shots_from_open_play_percentage,xg_from_set_piece,xg_from_open_play,xg_from_set_piece_percentage,xg_from_open_play_percentage
3,France,52,49,51.485149,48.514851,3.020992,9.091092,24.941969,75.058031
8,Switzerland,28,34,45.16129,54.83871,2.1338,6.349971,25.151552,74.848448
15,Georgia,13,17,43.333333,56.666667,1.30939,2.907671,31.049813,68.950187
5,Portugal,55,40,57.894737,42.105263,4.587628,10.121717,31.188525,68.811475
1,England,43,38,53.08642,46.91358,3.339027,7.16965,31.774003,68.225997
14,Slovenia,21,19,52.5,47.5,1.849305,3.900206,32.164562,67.835438
18,Albania,13,19,40.625,59.375,0.741814,1.441797,33.971892,66.028108
13,Romania,14,24,36.842105,63.157895,1.396654,2.360074,37.1774,62.8226
10,Belgium,23,30,43.396226,56.603774,1.689495,2.688633,38.589442,61.410558
22,Croatia,19,24,44.186047,55.813953,2.395776,3.428157,41.136739,58.863261
