*Copyright 2024 Jaeyoung Chun*

You may not make copies of this and use or distribute it for any purpose.

# Open Data

`statsbombpy` is a Python package that allows you to easily stream StatsBomb data into your Python code.

Installation:

```
conda activate facamp
pip install statsbombpy
```

In [1]:
import os
from statsbombpy import sb
import warnings
warnings.filterwarnings("ignore", message="credentials were not supplied")

## Competitions

### All Competitions

In [2]:
df_competitions = sb.competitions()

In [3]:
df_competitions

Unnamed: 0,competition_id,season_id,country_name,competition_name,competition_gender,competition_youth,competition_international,season_name,match_updated,match_updated_360,match_available_360,match_available
0,9,281,Germany,1. Bundesliga,male,False,False,2023/2024,2024-05-21T03:36:44.090970,2024-05-21T03:38:27.534702,2024-05-21T03:38:27.534702,2024-05-21T03:36:44.090970
1,9,27,Germany,1. Bundesliga,male,False,False,2015/2016,2024-05-19T11:11:14.192381,,,2024-05-19T11:11:14.192381
2,1267,107,Africa,African Cup of Nations,male,False,True,2023,2024-02-28T18:02:23.493027,,,2024-02-28T18:02:23.493027
3,16,4,Europe,Champions League,male,False,False,2018/2019,2024-05-13T13:14:25.239651,2021-06-13T16:17:31.694,,2024-05-13T13:14:25.239651
4,16,1,Europe,Champions League,male,False,False,2017/2018,2024-02-13T02:35:28.134882,2021-06-13T16:17:31.694,,2024-02-13T02:35:28.134882
...,...,...,...,...,...,...,...,...,...,...,...,...
67,55,43,Europe,UEFA Euro,male,False,True,2020,2024-04-16T12:44:40.558402,2024-04-16T12:47:18.505110,2024-04-16T12:47:18.505110,2024-04-16T12:44:40.558402
68,35,75,Europe,UEFA Europa League,male,False,False,1988/1989,2024-02-12T14:45:05.702250,2021-06-13T16:17:31.694,,2024-02-12T14:45:05.702250
69,53,106,Europe,UEFA Women's Euro,female,False,True,2022,2024-02-13T13:27:17.178263,2024-02-13T13:30:52.820588,2024-02-13T13:30:52.820588,2024-02-13T13:27:17.178263
70,72,107,International,Women's World Cup,female,False,True,2023,2024-04-05T13:22:08.250209,2024-04-05T13:29:51.562450,2024-04-05T13:29:51.562450,2024-04-05T13:22:08.250209


In [4]:
df_competitions.competition_name.unique()

array(['1. Bundesliga', 'African Cup of Nations', 'Champions League',
       'Copa del Rey', "FA Women's Super League", 'FIFA U20 World Cup',
       'FIFA World Cup', 'Indian Super league', 'La Liga',
       'Liga Profesional', 'Ligue 1', 'Major League Soccer',
       'North American League', 'NWSL', 'Premier League', 'Serie A',
       'UEFA Euro', 'UEFA Europa League', "UEFA Women's Euro",
       "Women's World Cup"], dtype=object)

### FIFA World Cup

In [5]:
df_fifa_worldcup = df_competitions[ df_competitions.competition_name == "FIFA World Cup" ]
df_fifa_worldcup

Unnamed: 0,competition_id,season_id,country_name,competition_name,competition_gender,competition_youth,competition_international,season_name,match_updated,match_updated_360,match_available_360,match_available
28,43,106,International,FIFA World Cup,male,False,True,2022,2024-05-15T10:23:32.854925,2024-05-15T10:26:45.467818,2024-05-15T10:26:45.467818,2024-05-15T10:23:32.854925
29,43,3,International,FIFA World Cup,male,False,True,2018,2024-02-12T12:31:56.821876,2021-06-13T16:17:31.694,,2024-02-12T12:31:56.821876
30,43,55,International,FIFA World Cup,male,False,True,1990,2023-06-28T10:58:20.137929,2021-06-12T16:17:31.694,,2023-06-28T10:58:20.137929
31,43,54,International,FIFA World Cup,male,False,True,1986,2023-12-26T22:34:04.263530,2021-06-13T16:17:31.694,,2023-12-26T22:34:04.263530
32,43,51,International,FIFA World Cup,male,False,True,1974,2024-02-13T02:52:29.582599,2021-06-13T16:17:31.694,,2024-02-13T02:52:29.582599
33,43,272,International,FIFA World Cup,male,False,True,1970,2024-02-13T14:23:06.735299,,,2024-02-13T14:23:06.735299
34,43,270,International,FIFA World Cup,male,False,True,1962,2023-06-26T10:38:00.323984,,,2023-06-26T10:38:00.323984
35,43,269,International,FIFA World Cup,male,False,True,1958,2024-02-13T14:22:08.222297,,,2024-02-13T14:22:08.222297


In [6]:
df_fifa_worldcup.season_name.unique()

array(['2022', '2018', '1990', '1986', '1974', '1970', '1962', '1958'],
      dtype=object)

### FIFA World Cup 2022

In [7]:
df_fifa_worldcup_2022 = df_fifa_worldcup[ df_fifa_worldcup.season_name == "2022" ]

In [8]:
df_fifa_worldcup_2022

Unnamed: 0,competition_id,season_id,country_name,competition_name,competition_gender,competition_youth,competition_international,season_name,match_updated,match_updated_360,match_available_360,match_available
28,43,106,International,FIFA World Cup,male,False,True,2022,2024-05-15T10:23:32.854925,2024-05-15T10:26:45.467818,2024-05-15T10:26:45.467818,2024-05-15T10:23:32.854925


In [9]:
competition_id, season_id = df_fifa_worldcup_2022.squeeze().loc[["competition_id", "season_id"]]

In [10]:
competition_id, season_id

(43, 106)

## Matches

### All Matches

In [11]:
df_matches = sb.matches(
    competition_id=competition_id,
    season_id=season_id
)

In [12]:
df_matches

Unnamed: 0,match_id,match_date,kick_off,competition,season,home_team,away_team,home_score,away_score,match_status,...,last_updated_360,match_week,competition_stage,stadium,referee,home_managers,away_managers,data_version,shot_fidelity_version,xy_fidelity_version
0,3857256,2022-12-02,21:00:00.000,International - FIFA World Cup,2022,Serbia,Switzerland,2,3,available,...,2023-04-26T23:49:58.956186,3,Group Stage,Stadium 974,Fernando Andrés Rapallini,Dragan Stojković,Murat Yakin,1.1.0,2,2
1,3869151,2022-12-03,21:00:00.000,International - FIFA World Cup,2022,Argentina,Australia,2,1,available,...,2023-07-30T07:48:51.865595,4,Round of 16,Ahmad bin Ali Stadium,Szymon Marciniak,Lionel Sebastián Scaloni,Graham James Arnold,1.1.0,2,2
2,3857257,2022-11-30,17:00:00.000,International - FIFA World Cup,2022,Australia,Denmark,1,0,available,...,2023-06-20T11:04:37.638969,3,Group Stage,Al Janoub Stadium,Mustapha Ghorbal,Graham James Arnold,Kasper Hjulmand,1.1.0,2,2
3,3857258,2022-11-24,21:00:00.000,International - FIFA World Cup,2022,Brazil,Serbia,2,0,available,...,2023-07-11T14:56:31.096588,1,Group Stage,Lusail Stadium,Alireza Faghani,Telê Santana da Silva,Dragan Stojković,1.1.0,2,2
4,3857288,2022-11-26,12:00:00.000,International - FIFA World Cup,2022,Tunisia,Australia,0,1,available,...,2023-04-27T00:30:07.835815,2,Group Stage,Al Janoub Stadium,Daniel Siebert,Jalel Kadri,Graham James Arnold,1.1.0,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59,3857265,2022-11-22,18:00:00.000,International - FIFA World Cup,2022,Mexico,Poland,0,0,available,...,2023-04-27T00:49:30.005344,1,Group Stage,Stadium 974,Chris Beath,Gerardo Daniel Martino,Czesław Michniewicz,1.1.0,2,2
60,3857262,2022-12-02,17:00:00.000,International - FIFA World Cup,2022,South Korea,Portugal,2,1,available,...,2023-04-26T23:48:08.999798,3,Group Stage,Education City Stadium,Facundo Tello Figueroa,Paulo Jorge Gomes Bento,Fernando Manuel Fernandes da Costa Santos,1.1.0,2,2
61,3857261,2022-11-29,21:00:00.000,International - FIFA World Cup,2022,Wales,England,0,3,available,...,2023-04-27T00:02:44.241621,3,Group Stage,Ahmad bin Ali Stadium,Slavko Vinčić,Robert Page,Gareth Southgate,1.1.0,2,2
62,3857255,2022-12-01,21:00:00.000,International - FIFA World Cup,2022,Japan,Spain,2,1,available,...,2023-04-26T23:51:48.352990,3,Group Stage,Sheikh Khalifa International Stadium,Victor Miguel de Freitas Gomes,Hajime Moriyasu,Luis Enrique Martínez García,1.1.0,2,2


### South Korea

<img src="./img/worldcup-2022-group-h.png" width="640"/>

In [13]:
df_match_korea = df_matches[ (df_matches.home_team == "South Korea") | (df_matches.away_team == "South Korea") ]

In [14]:
df_match_korea

Unnamed: 0,match_id,match_date,kick_off,competition,season,home_team,away_team,home_score,away_score,match_status,...,last_updated_360,match_week,competition_stage,stadium,referee,home_managers,away_managers,data_version,shot_fidelity_version,xy_fidelity_version
7,3857287,2022-11-24,15:00:00.000,International - FIFA World Cup,2022,Uruguay,South Korea,0,0,available,...,2023-04-27T00:36:59.281195,1,Group Stage,Education City Stadium,Clément Turpin,Diego Martín Alonso López,Paulo Jorge Gomes Bento,1.1.0,2,2
25,3869253,2022-12-05,21:00:00.000,International - FIFA World Cup,2022,Brazil,South Korea,4,1,available,...,2023-06-19T13:27:43.114201,4,Round of 16,Stadium 974,Clément Turpin,Adenor Leonardo Bacchi,Paulo Jorge Gomes Bento,1.1.0,2,2
38,3857299,2022-11-28,15:00:00.000,International - FIFA World Cup,2022,South Korea,Ghana,2,3,available,...,2023-06-19T12:37:02.364850,2,Group Stage,Education City Stadium,Anthony Taylor,Paulo Jorge Gomes Bento,Otto Addo,1.1.0,2,2
60,3857262,2022-12-02,17:00:00.000,International - FIFA World Cup,2022,South Korea,Portugal,2,1,available,...,2023-04-26T23:48:08.999798,3,Group Stage,Education City Stadium,Facundo Tello Figueroa,Paulo Jorge Gomes Bento,Fernando Manuel Fernandes da Costa Santos,1.1.0,2,2


### South Korea vs. Portugal

The match occurred on December 2, 2022.

In [15]:
df_match_korea_portugal = df_match_korea[ df_match_korea.match_date == "2022-12-02" ]

In [16]:
df_match_korea_portugal

Unnamed: 0,match_id,match_date,kick_off,competition,season,home_team,away_team,home_score,away_score,match_status,...,last_updated_360,match_week,competition_stage,stadium,referee,home_managers,away_managers,data_version,shot_fidelity_version,xy_fidelity_version
60,3857262,2022-12-02,17:00:00.000,International - FIFA World Cup,2022,South Korea,Portugal,2,1,available,...,2023-04-26T23:48:08.999798,3,Group Stage,Education City Stadium,Facundo Tello Figueroa,Paulo Jorge Gomes Bento,Fernando Manuel Fernandes da Costa Santos,1.1.0,2,2


In [17]:
match_id = df_match_korea_portugal.squeeze().match_id

In [18]:
match_id

3857262

## Events

In [19]:
df_events = sb.events(match_id=match_id)

In [20]:
df_events

Unnamed: 0,bad_behaviour_card,ball_receipt_outcome,ball_recovery_offensive,ball_recovery_recovery_failure,block_deflection,carry_end_location,clearance_aerial_won,clearance_body_part,clearance_head,clearance_left_foot,...,shot_technique,shot_type,substitution_outcome,substitution_replacement,tactics,team,team_id,timestamp,type,under_pressure
0,,,,,,,,,,,...,,,,,"{'formation': 433, 'lineup': [{'player': {'id'...",South Korea,791,00:00:00.000,Starting XI,
1,,,,,,,,,,,...,,,,,"{'formation': 433, 'lineup': [{'player': {'id'...",Portugal,780,00:00:00.000,Starting XI,
2,,,,,,,,,,,...,,,,,,Portugal,780,00:00:00.000,Half Start,
3,,,,,,,,,,,...,,,,,,South Korea,791,00:00:00.000,Half Start,
4,,,,,,,,,,,...,,,,,,South Korea,791,00:00:00.000,Half Start,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3374,,,,,,,,,,,...,,,Tactical,Ui-Jo Hwang,,South Korea,791,00:36:13.753,Substitution,
3375,,,,,,,,,,,...,,,Tactical,William Silva de Carvalho,,Portugal,780,00:36:13.762,Substitution,
3376,,,,,,,,,,,...,,,Tactical,Bernardo Mota Veiga de Carvalho e Silva,,Portugal,780,00:36:13.790,Substitution,
3377,,,,,,,,,,,...,,,Tactical,Yu-Min Cho,,South Korea,791,00:47:46.061,Substitution,


In [21]:
df_events.columns

Index(['bad_behaviour_card', 'ball_receipt_outcome', 'ball_recovery_offensive',
       'ball_recovery_recovery_failure', 'block_deflection',
       'carry_end_location', 'clearance_aerial_won', 'clearance_body_part',
       'clearance_head', 'clearance_left_foot', 'clearance_right_foot',
       'counterpress', 'dribble_nutmeg', 'dribble_outcome', 'dribble_overrun',
       'duel_outcome', 'duel_type', 'duration', 'foul_committed_card',
       'foul_committed_type', 'foul_won_defensive', 'goalkeeper_body_part',
       'goalkeeper_end_location', 'goalkeeper_outcome', 'goalkeeper_position',
       'goalkeeper_technique', 'goalkeeper_type', 'id', 'index',
       'injury_stoppage_in_chain', 'interception_outcome', 'location',
       'match_id', 'minute', 'miscontrol_aerial_won', 'off_camera', 'out',
       'pass_aerial_won', 'pass_angle', 'pass_assisted_shot_id',
       'pass_body_part', 'pass_cross', 'pass_cut_back', 'pass_deflected',
       'pass_end_location', 'pass_goal_assist', 'pass_he

In [22]:
len(df_events)

3379

### Write to File

In [23]:
path_data = "./data"

In [24]:
os.makedirs(path_data, exist_ok=True)

In [25]:
df_events.to_csv(
    os.path.join(path_data, "raw_events.csv"),
    index=False
)

In [26]:
df_events.to_parquet(
    os.path.join(path_data, "raw_events.parquet")
)