In [122]:
from socceraction.spadl.statsbomb import convert_to_actions
from socceraction.data.statsbomb import StatsBombLoader
from socceraction.spadl.config import actiontypes, bodyparts
import socceraction.spadl as spadl
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, mean_squared_error
from xgboost import XGBClassifier, XGBRegressor
import math
import pickle
from mplsoccer import Pitch
import hashlib
import copy
import random

In [123]:
api = StatsBombLoader(root="data/statsbomb", getter="local")

In [124]:
# Competitions Input
competitions_df = api.competitions()
competitions_df[competitions_df["competition_gender"] == "male"]

Unnamed: 0,season_id,competition_id,competition_name,country_name,competition_gender,season_name
0,27,9,1. Bundesliga,Germany,male,2015/2016
1,4,16,Champions League,Europe,male,2018/2019
2,1,16,Champions League,Europe,male,2017/2018
3,2,16,Champions League,Europe,male,2016/2017
4,27,16,Champions League,Europe,male,2015/2016
...,...,...,...,...,...,...
62,44,2,Premier League,England,male,2003/2004
63,27,12,Serie A,Italy,male,2015/2016
64,86,12,Serie A,Italy,male,1986/1987
65,43,55,UEFA Euro,Europe,male,2020


In [125]:
# Games Input (Competition_id, Season_id)
# Competition which has 360 files data (UEFA Euro 2020 (55, 43) and FIFA World Cup 2022 (43, 106))
games_world_cup_2018_df = api.games(55, 43)
games_world_cup_2018_df[games_world_cup_2018_df['game_id'] == 3794685] # Argentina vs France

Unnamed: 0,game_id,season_id,competition_id,competition_stage,game_day,game_date,home_team_id,away_team_id,home_score,away_score,venue,referee
21,3794685,43,55,Round of 16,4,2021-06-26 21:00:00,914,915,2,1,Wembley Stadium,Anthony Taylor


In [126]:
# Teams Input (game_id)
teams_world_cup_2018_df = api.teams(3794685)
teams_world_cup_2018_df.head()

Unnamed: 0,team_id,team_name
0,914,Italy
1,915,Austria


In [127]:
# Player Input (game_id)
players_df = api.players(3794685)
players_df

Unnamed: 0,game_id,team_id,player_id,player_name,nickname,jersey_number,is_starter,starting_position_id,starting_position_name,minutes_played
0,3794685,914,3166,Marco Verratti,,6,True,15,Left Center Midfield,67
1,3794685,914,6993,Bryan Cristante,,16,False,0,Substitute,14
2,3794685,914,7024,Jorge Luiz Frello Filho,Jorginho,8,True,10,Center Defensive Midfield,128
3,3794685,914,7036,Gianluigi Donnarumma,,21,True,1,Goalkeeper,128
4,3794685,914,7037,Lorenzo Insigne,,10,True,21,Left Wing,114
5,3794685,914,7038,Manuel Locatelli,,5,False,0,Substitute,61
6,3794685,914,7119,Francesco Acerbi,,15,True,5,Left Center Back,128
7,3794685,914,7131,Domenico Berardi,,11,True,17,Right Wing,84
8,3794685,914,7156,Federico Chiesa,,14,False,0,Substitute,44
9,3794685,914,7173,Leonardo Bonucci,,19,True,3,Right Center Back,128


In [128]:
# Events Input (game_id)
# Italy
home_team_id = 914
this_game_events_df = api.events(3794685, load_360=True)
this_game_events_df_spadl = spadl.add_names(convert_to_actions(this_game_events_df, home_team_id))
this_game_events_df_spadl = pd.merge(this_game_events_df[['event_id', 'minute', 'freeze_frame_360']], this_game_events_df_spadl, left_on="event_id", right_on="original_event_id", how="inner")
# this_game_events_df_spadl[this_game_events_df_spadl['minute'].between(93.5,94)].to_csv('data/input_decision_making_model/real_test/first_goal_italy.csv')
# this_game_events_df_spadl[this_game_events_df_spadl['type_id'].isin([11])]
this_game_events_df_spadl[this_game_events_df_spadl['minute'].between(93.5,94)]

Unnamed: 0,event_id,minute,freeze_frame_360,game_id,original_event_id,period_id,time_seconds,team_id,player_id,start_x,start_y,end_x,end_y,type_id,result_id,bodypart_id,action_id,type_name,result_name,bodypart_name
2155,e236abb7-eed3-4dea-9005-be2cc14d5da2,94,"[{'teammate': True, 'actor': False, 'keeper': ...",3794685,e236abb7-eed3-4dea-9005-be2cc14d5da2,2,2974.0,914,7037.0,87.617647,15.751899,100.588235,37.012658,3,0,5,2230,freekick_crossed,fail,foot_right
2156,7aefba8f-d5d9-4dbe-ac39-23b6554b2621,94,,3794685,7aefba8f-d5d9-4dbe-ac39-23b6554b2621,2,2975.0,915,8779.0,101.382353,36.237975,91.147059,68.0,18,1,1,2231,clearance,success,head
2157,c72180cd-0962-4665-a554-3a313ea90189,94,,3794685,c72180cd-0962-4665-a554-3a313ea90189,2,2999.0,914,8286.0,91.147059,68.0,94.676471,61.544304,2,1,0,2232,throw_in,success,foot
2236,e42cccf5-3240-4f12-8479-a777eecbeefe,94,"[{'teammate': True, 'actor': False, 'keeper': ...",3794685,e42cccf5-3240-4f12-8479-a777eecbeefe,3,241.0,915,8220.0,92.205882,4.217722,60.0,8.177215,0,1,4,2311,pass,success,foot_left
2237,5671d8e2-e5f5-44e0-957b-ea019959563e,94,"[{'teammate': True, 'actor': False, 'keeper': ...",3794685,5671d8e2-e5f5-44e0-957b-ea019959563e,3,242.0,915,3325.0,60.0,8.177215,60.0,8.177215,21,1,0,2312,dribble,success,foot
2238,2b87e72b-81e4-4f60-958f-22e6310a12f7,94,"[{'teammate': True, 'actor': False, 'keeper': ...",3794685,2b87e72b-81e4-4f60-958f-22e6310a12f7,3,243.0,915,3325.0,60.0,8.177215,60.0,8.177215,19,0,0,2313,bad_touch,fail,foot
2239,5096d7fb-3f19-4cba-ac09-8807c0d680c6,94,"[{'teammate': True, 'actor': False, 'keeper': ...",3794685,5096d7fb-3f19-4cba-ac09-8807c0d680c6,3,244.0,914,7024.0,50.558824,7.488608,37.588235,20.486076,0,1,4,2314,pass,success,foot_left
2240,008dc54f-7891-45ad-8b74-3b68dd236e79,94,"[{'teammate': True, 'actor': True, 'keeper': F...",3794685,008dc54f-7891-45ad-8b74-3b68dd236e79,3,246.0,914,7119.0,37.588235,20.486076,36.794118,25.220253,21,1,0,2315,dribble,success,foot
2241,d387c884-7912-4ce1-9635-de9368498815,94,"[{'teammate': True, 'actor': True, 'keeper': F...",3794685,d387c884-7912-4ce1-9635-de9368498815,3,247.0,914,7119.0,36.794118,25.220253,41.823529,17.731646,0,1,4,2316,pass,success,foot_left
2242,e8acc80e-dd49-45b5-9056-ae4967a420ce,94,"[{'teammate': True, 'actor': False, 'keeper': ...",3794685,e8acc80e-dd49-45b5-9056-ae4967a420ce,3,248.0,914,7024.0,41.823529,17.731646,58.5,34.860759,21,1,0,2317,dribble,success,foot


In [129]:
# DIRECTORY_SNAPSHOT = 'data/input_decision_making_model/real_test/first_goal_italy.csv'
# COLUMNS_INPUT = [
#     "timestamp", "is_opponent_team", "player_id", 
#     "coordinate_x", "coordinate_y", "name", "original_event_id"
# ]

# snapshot_df = pd.read_csv(DIRECTORY_SNAPSHOT, delimiter=";", index_col="Unnamed: 0")
# snapshot_df = snapshot_df[snapshot_df.index.isin(list(range(2239,2252)))]

# final_input_df = pd.DataFrame(columns=COLUMNS_INPUT, index=[0])
# timestamp_counter = 1
# event_counter = 2239
# for _, snapshot_row in snapshot_df.iterrows():
#     maps_new_row = {
#         "timestamp": timestamp_counter,
#         "is_opponent_team": 0,
#         "player_id": snapshot_row['player_id'],
#         "coordinate_x": snapshot_row['start_x'],
#         "coordinate_y": snapshot_row['start_y'],
#         "name": "empty",
#         "original_event_id": event_counter
#     }
#     new_row = pd.DataFrame(maps_new_row, index=[0])
#     final_input_df = pd.concat([new_row, final_input_df.loc[:]]).reset_index(drop=True)
#     timestamp_counter += 1
#     event_counter += 1

# final_input_df.dropna(inplace=True)
# final_input_df.sort_values(by="timestamp", ascending=True, inplace=True)
# final_input_df.to_csv('output_timestamp.csv', sep=";")
    


In [130]:
# STANDARD_LENGTH_COURT = 105
# STANDARD_WIDTH_COURT = 68
# STANDARD_GOALLINE_WIDTH = 7.32
# STANDARD_LENGTH_COURT_STATSBOMB = 120
# STANDARD_WIDTH_COURT_STATSBOMB = 80
# filepath = "data/input_decision_making_model/real_test/test.csv"
# filepath_output = "data/input_decision_making_model/real_test/test_converted.csv"

# def convert_statsbomb_coordinate_to_spadl_coordinate(coordinate_x, coordinate_y):
#     converted_coordinate_x = (STANDARD_LENGTH_COURT / STANDARD_LENGTH_COURT_STATSBOMB) * coordinate_x
#     converted_coordinate_y = (STANDARD_WIDTH_COURT / STANDARD_WIDTH_COURT_STATSBOMB) * coordinate_y
#     return (converted_coordinate_x, converted_coordinate_y)

# opponent_list_df = pd.read_csv(filepath, delimiter=";")
# converted_opponent_list_df = pd.DataFrame(columns=opponent_list_df.columns, index=[0])
# for _, opponent_list_row in opponent_list_df.iterrows():
#     converted_x, converted_y = convert_statsbomb_coordinate_to_spadl_coordinate(opponent_list_row['coordinate_x'], opponent_list_row['coordinate_y'])
#     maps_new_row = {
#         "timestamp": opponent_list_row['timestamp'],
#         "is_opponent_team" : opponent_list_row['is_opponent_team'],
#         "player_id" : opponent_list_row["player_id"],
#         "coordinate_x" : converted_x,
#         "coordinate_y" : converted_y,
#         "name" : opponent_list_row["name"],
#         "original_event_id" : opponent_list_row["original_event_id"]
#     }
#     new_row = pd.DataFrame(maps_new_row, index=[0])
#     converted_opponent_list_df = pd.concat([new_row, converted_opponent_list_df.loc[:]]).reset_index(drop=True)
# converted_opponent_list_df.dropna()
# converted_opponent_list_df.sort_values(by="timestamp", ascending=True, inplace=True)
# converted_opponent_list_df.to_csv(filepath_output, sep=";")

In [131]:
columns_attribute = ["height_cm", "weight_kgs", "age"]
columns_skill = [
    "acceleration","aggression","agility","balance",
    "ball_control","composure","crossing","curve","dribbling",
    "finishing","freekick_accuracy","heading_accuracy","interceptions","jumping",
    "long_passing","long_shots","marking","penalties","positioning",
    "reactions","shot_power","sliding_tackle","sprint_speed","stamina",
    "short_passing","standing_tackle","strength","vision","volleys"
]
filepath_origin_player_skills = "data/players_skill_dataset/final_players_skill_dataset.csv"
filepath_skill_output = "data/input_decision_making_model/real_test/italy_vs_austria_2020_success_case_skill_player.csv"
filepath_attribute_output = "data/input_decision_making_model/real_test/italy_vs_austria_2020_success_case_attribute_player.csv"
maps_player_id_with_origin = {
    1 : 7024, # Jorginho
    2 : 7119, # Fransesco Acerbi
    3 : 7038, # Manuel Locatelli
    4 : 8286, # Leonardo Spinazzola
    5 : 7156, # Federico Chiesa
}

origin_player_skill_df = pd.read_csv(filepath_origin_player_skills)
player_skill_df = pd.DataFrame(columns=columns_skill + ["player_id"], index=[0])
player_attribute_df = pd.DataFrame(columns=columns_attribute + ["player_id"], index=[0])
for player_id in sorted(list(maps_player_id_with_origin.keys())):
    origin_player_id = maps_player_id_with_origin[player_id]
    player_data_origin = origin_player_skill_df.loc[origin_player_skill_df["player_id"] == origin_player_id, columns_skill + columns_attribute]
    if (not player_data_origin.empty):
        player_data = player_data_origin.iloc[0]
        # Input data for skill player
        maps_new_row_skill = {}
        maps_new_row_skill['player_id'] = player_id
        for skill in columns_skill:
            maps_new_row_skill[skill] = player_data[skill]
        new_row_skill = pd.DataFrame(maps_new_row_skill, index=[0])
        player_skill_df = pd.concat([new_row_skill, player_skill_df.loc[:]]).reset_index(drop=True)
        # Input data for attribute player
        maps_new_row_attr = {}
        maps_new_row_attr['player_id'] = player_id
        for attribute in columns_attribute:
            maps_new_row_attr[attribute] = player_data[attribute]
        new_row_attr = pd.DataFrame(maps_new_row_attr, index=[0])
        player_attribute_df = pd.concat([new_row_attr, player_attribute_df.loc[:]]).reset_index(drop=True)

player_skill_df.dropna(inplace=True)
player_skill_df.sort_values(by="player_id", ascending=True, inplace=True)
player_skill_df.to_csv(filepath_skill_output)

player_attribute_df.dropna(inplace=True)
player_attribute_df.sort_values(by="player_id", ascending=True, inplace=True)
player_attribute_df.to_csv(filepath_attribute_output)

In [132]:
# # Load 360 data
# list_competitions_ids = []
# list_game_ids = []

# competitions_df = api.competitions()
# for _, row in competitions_df.iterrows():
#     if (row['competition_gender'] == 'male'):
#         list_competitions_ids.append((row['competition_id'], row['season_id']))

# competition_ids_exist_360 = []
# for competition_id, season_id in list_competitions_ids:
#     games_df = api.games(competition_id, season_id)
#     for _, row in games_df.iterrows():
#         try:
#             list_game_ids.append((row['game_id'], row['home_team_id'], row['away_team_id']))
#             this_game_events_df = api.events(row['game_id'], load_360=True)
#             this_game_events_df = spadl.add_names(convert_to_actions(this_game_events_df, row['home_team_id']))
#             competition_ids_exist_360.append((competition_id, season_id))
#         except FileNotFoundError:
#             print("File Not Found")

# print(sorted(set(competition_ids_exist_360)))

# for game_id, home_team_id, away_team_id in list_game_ids:
#     try:
#         this_game_events_df = api.events(game_id, load_360=True)
#         this_game_events_df = spadl.add_names(convert_to_actions(this_game_events_df, home_team_id))
#         # this_game_events_df.to_csv(f'data/statsbomb_event_360_data/{game_id}_{home_team_id}_{away_team_id}_statsbomb_event_360.csv')
#     except FileNotFoundError:
#         print("File not found")

# game_id_test_360 = 3857292
# home_team_id = 795
# this_game_events_df = api.events(game_id_test_360, load_360=True)
# this_game_events_df_spadl = spadl.add_names(convert_to_actions(this_game_events_df, home_team_id))
# this_game_events_df_spadl_with_360 = pd.merge(this_game_events_df_spadl, this_game_events_df[["event_id", "visible_area_360", "freeze_frame_360"]], how="inner", left_on="original_event_id", right_on="event_id")
# this_game_events_df_spadl_with_360[this_game_events_df_spadl_with_360['period_id'] == 2]