In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import warnings
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

warnings.filterwarnings('ignore')

In [2]:
# Import StatsBomb classes - safe to run multiple times
try:
    from src.utils.config import StatsBombConfig
    from src.utils.data_fetcher import StatsBombDataFetcher
    print("✓ Imports successful")
except ImportError as e:
    print(f"✗ Import error: {e}")
    print("Make sure you're running this from the notebooks directory")

✓ Imports successful


In [3]:
# Initialize StatsBomb classes - safe to run multiple times
try:
    config = StatsBombConfig()
    fetcher = StatsBombDataFetcher()
    print("✓ StatsBomb classes initialized successfully")
except Exception as e:
    print(f"✗ Initialization error: {e}")
    print("Check your .env file for STATSBOMB_USERNAME and STATSBOMB_PASSWORD")

Usuario configurado: itam_hackathon@hudl.com
Usuario configurado: itam_hackathon@hudl.com
✓ StatsBomb classes initialized successfully


In [4]:
competitions = fetcher.get_competitions()
competitions

Obteniendo competiciones...
4 competiciones obtenidas


Unnamed: 0,competition_id,season_id,country_name,competition_name,competition_gender,competition_youth,competition_international,season_name,match_updated,match_updated_360,match_available_360,match_available
0,73,317,Mexico,Liga MX,male,False,False,2024/2025,2025-08-30T16:14:20.970616,2025-08-30T16:14:20.970616,2025-08-30T16:14:20.970616,2025-08-30T16:14:20.970616
1,73,281,Mexico,Liga MX,male,False,False,2023/2024,2024-12-20T23:40:31.103974,2024-12-20T23:40:31.103974,2024-12-20T23:40:31.103974,2024-12-20T23:40:31.103974
2,73,235,Mexico,Liga MX,male,False,False,2022/2023,2024-09-28T11:05:11.667984,2024-09-28T11:05:11.667984,2024-09-28T11:05:11.667984,2024-09-28T11:05:11.667984
3,73,108,Mexico,Liga MX,male,False,False,2021/2022,2024-12-10T08:59:57.612449,2024-12-10T08:59:57.612449,2024-12-10T08:59:57.612449,2024-12-10T08:59:57.612449


In [5]:
player_season = fetcher.get_player_season_stats(73, 108)  # Liga MX 2021/2022
print(player_season.columns)
print(f"\nShape: {player_season.shape}")
player_season.head(3)

Obteniendo estadísticas de los jugadores para (Comp: 73, Temp: 108)...
Estadísticas de 603 jugadores obtenidas
Index(['account_id', 'player_id', 'player_name', 'team_id', 'team_name',
       'competition_id', 'competition_name', 'season_id', 'season_name',
       'country_id',
       ...
       'player_season_fhalf_lbp_to_space_10_90',
       'player_season_f3_lbp_to_space_10_90',
       'player_season_lbp_to_space_2_90',
       'player_season_fhalf_lbp_to_space_2_90',
       'player_season_f3_lbp_to_space_2_90', 'player_season_lbp_to_space_5_90',
       'player_season_fhalf_lbp_to_space_5_90',
       'player_season_f3_lbp_to_space_5_90', 'player_season_360_minutes',
       'player_season_defensive_actions_90'],
      dtype='object', length=224)

Shape: (603, 224)


Unnamed: 0,account_id,player_id,player_name,team_id,team_name,competition_id,competition_name,season_id,season_name,country_id,...,player_season_fhalf_lbp_to_space_10_90,player_season_f3_lbp_to_space_10_90,player_season_lbp_to_space_2_90,player_season_fhalf_lbp_to_space_2_90,player_season_f3_lbp_to_space_2_90,player_season_lbp_to_space_5_90,player_season_fhalf_lbp_to_space_5_90,player_season_f3_lbp_to_space_5_90,player_season_360_minutes,player_season_defensive_actions_90
0,41100,12254,Alessio da Cruz,1299,Santos Laguna,73,Liga MX,108,2021/2022,41,...,0.143358,0.143358,1.720293,1.433577,0.716789,0.860147,0.716789,0.286716,627.8,21.933737
1,41100,28529,Fernando David Arce Juárez,1221,Necaxa,73,Liga MX,108,2021/2022,241,...,0.0,0.0,11.111111,0.0,0.0,11.111111,0.0,0.0,16.2,66.666664
2,41100,30458,Alejandro Zendejas Saavedra,1221,Necaxa,73,Liga MX,108,2021/2022,241,...,0.100318,0.0,2.558101,1.554924,0.401271,1.203812,0.601906,0.0,1794.3,24.52767


In [6]:
eventos = fetcher.get_events(3799426, three=True)  # El partido Juárez vs Atlético
print(f"Total eventos: {len(eventos)}")
print(f"\nColumnas: {eventos.columns.tolist()}")
print(f"\nTipos de eventos únicos:")
print(eventos['type'].value_counts())
eventos.head(3)

Obteniendo eventos del partido 3799426...
3053 eventos obtenidos
Total eventos: 3053

Columnas: ['bad_behaviour_card', 'ball_receipt_exceeds_distance', 'ball_receipt_in_space', 'ball_receipt_outcome', 'ball_recovery_recovery_failure', 'block_deflection', 'carry_end_location', 'clearance_aerial_won', 'clearance_body_part', 'clearance_head', 'clearance_left_foot', 'clearance_other', 'clearance_right_foot', 'counterpress', 'distance_to_nearest_defender', 'dribble_nutmeg', 'dribble_outcome', 'dribble_overrun', 'duel_outcome', 'duel_type', 'duration', 'foul_committed_advantage', 'foul_committed_card', 'foul_committed_offensive', 'foul_committed_penalty', 'foul_committed_type', 'foul_won_advantage', 'foul_won_defensive', 'foul_won_penalty', 'goalkeeper_body_part', 'goalkeeper_end_location', 'goalkeeper_outcome', 'goalkeeper_position', 'goalkeeper_shot_saved_off_target', 'goalkeeper_technique', 'goalkeeper_type', 'id', 'index', 'injury_stoppage_in_chain', 'interception_outcome', 'line_breakin

Unnamed: 0,bad_behaviour_card,ball_receipt_exceeds_distance,ball_receipt_in_space,ball_receipt_outcome,ball_recovery_recovery_failure,block_deflection,carry_end_location,clearance_aerial_won,clearance_body_part,clearance_head,...,substitution_replacement,substitution_replacement_id,tactics,team,team_id,timestamp,type,under_pressure,visible_opponents,visible_teammates
0,,,,,,,,,,,...,,,"{'formation': 352, 'lineup': [{'player': {'id'...",Juárez,1291,00:00:00.000,Starting XI,,,
1,,,,,,,,,,,...,,,"{'formation': 4141, 'lineup': [{'player': {'id...",Atlético San Luis,1287,00:00:00.000,Starting XI,,,
2,,,,,,,,,,,...,,,,Juárez,1291,00:00:00.000,Half Start,,,


In [12]:
team_stats = fetcher.get_team_season_stats(73, 108)
print(team_stats.columns.to_list())
team_stats.head(3)

Obteniendo estadísticas de equipos (Comp: 73, Temp: 108)...
Estadísticas de 18 equipos obtenidas
['account_id', 'team_name', 'team_id', 'competition_id', 'competition_name', 'season_id', 'season_name', 'team_female', 'team_season_matches', 'team_season_minutes', 'team_season_gd', 'team_season_xgd', 'team_season_np_shots_pg', 'team_season_op_shots_pg', 'team_season_op_shots_outside_box_pg', 'team_season_sp_shots_pg', 'team_season_np_xg_pg', 'team_season_op_xg_pg', 'team_season_sp_xg_pg', 'team_season_np_xg_per_shot', 'team_season_np_shot_distance', 'team_season_op_shot_distance', 'team_season_sp_shot_distance', 'team_season_possessions', 'team_season_possession', 'team_season_directness', 'team_season_pace_towards_goal', 'team_season_gk_pass_distance', 'team_season_gk_long_pass_ratio', 'team_season_box_cross_ratio', 'team_season_passes_inside_box_pg', 'team_season_defensive_distance', 'team_season_ppda', 'team_season_defensive_distance_ppda', 'team_season_opp_passing_ratio', 'team_seaso

Unnamed: 0,account_id,team_name,team_id,competition_id,competition_name,season_id,season_name,team_female,team_season_matches,team_season_minutes,...,team_season_obv_shot_conceded_pg,team_season_obv_defensive_action_conceded_pg,team_season_obv_dribble_carry_conceded_pg,team_season_obv_gk_conceded_pg,team_season_passes_pg,team_season_successful_passes_pg,team_season_passes_conceded_pg,team_season_successful_passes_conceded_pg,team_season_op_passes_pg,team_season_op_passes_conceded_pg
0,41100,América,1229,73,Liga MX,108,2021/2022,False,40,,...,0.118695,0.193226,0.411756,0.038558,419.1,332.85,403.125,316.725,366.9,354.475
1,41100,Atlas,1296,73,Liga MX,108,2021/2022,False,46,,...,-0.174651,0.24717,0.631067,-0.23245,383.695652,287.369565,401.913043,307.956522,333.76087,352.869565
2,41100,Cruz Azul,1224,73,Liga MX,108,2021/2022,False,38,,...,-0.142014,0.120361,0.595703,-0.087403,429.789474,336.947368,430.763158,341.894737,380.631579,379.763158


In [11]:
for season_id, season_name in [(317, '2024/2025'), (281, '2023/2024'), (235, '2022/2023'), (108, '2021/2022')]:
    matches = fetcher.get_matches(73, season_id)
    teams = pd.concat([matches['home_team'], matches['away_team']]).unique()
    print(f"\n{season_name}: {len(teams)} equipos")
    print(f"Club América presente: {'América' in teams}")

Obteniendo partidos (Comp: 73, Temp: 317)...
340 partidos obtenidos

2024/2025: 18 equipos
Club América presente: True
Obteniendo partidos (Comp: 73, Temp: 281)...
340 partidos obtenidos

2023/2024: 18 equipos
Club América presente: True
Obteniendo partidos (Comp: 73, Temp: 235)...
342 partidos obtenidos

2022/2023: 18 equipos
Club América presente: True
Obteniendo partidos (Comp: 73, Temp: 108)...
342 partidos obtenidos

2021/2022: 18 equipos
Club América presente: True


In [10]:
teams

array(['Juárez', 'Mazatlán', 'Tigres UANL', 'Monterrey', 'Pachuca',
       'Tijuana', 'Atlas', 'Necaxa', 'Puebla', 'León', 'Guadalajara',
       'América', 'Atlético San Luis', 'Toluca', 'Querétaro',
       'Pumas UNAM', 'Santos Laguna', 'Cruz Azul'], dtype=object)