In [24]:
import numpy as np

import os 
import warnings
import pandas as pd
pd.set_option('display.max_columns', None)
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
import tqdm 

import socceraction.spadl as spadl
import socceraction.spadl.statsbomb as statsbomb

import matplotsoccer 

import xgboost     

from sklearn.metrics import brier_score_loss, roc_auc_score, log_loss

In [25]:
# Use this if you only want to use the free public statsbomb data

free_open_data_remote = "https://raw.githubusercontent.com/statsbomb/open-data/master/data/"
SBL = statsbomb.StatsBombLoader(root=free_open_data_remote, getter="remote")

In [26]:
# View all available competitions
competitions = SBL.competitions()
set(competitions.competition_name)

{'Champions League',
 "FA Women's Super League",
 'FIFA World Cup',
 'La Liga',
 'NWSL',
 'Premier League',
 "Women's World Cup"}

In [27]:
# Choose Fifa world cup
selected_competitions = competitions[competitions.competition_name=="FIFA World Cup"] 

selected_competitions 

Unnamed: 0,season_id,competition_id,competition_name,country_name,competition_gender,season_name
17,3,43,FIFA World Cup,International,male,2018


In [28]:
# Get games from all selected competitions 
games = list(
    SBL.games(row.competition_id, row.season_id)
    for row in selected_competitions.itertuples()
)
games = pd.concat(games, sort=True).reset_index(drop=True)
games[["home_team_id", "away_team_id", "game_date", "home_score", "away_score"]] 

Unnamed: 0,home_team_id,away_team_id,game_date,home_score,away_score
0,785,776,2018-07-01 20:00:00,1,1
1,775,793,2018-06-22 17:00:00,2,0
2,789,769,2018-06-24 20:00:00,0,3
3,785,775,2018-06-16 21:00:00,2,0
4,781,795,2018-06-22 14:00:00,2,0
...,...,...,...,...,...
59,782,768,2018-07-14 16:00:00,2,0
60,796,774,2018-06-19 20:00:00,3,1
61,770,790,2018-06-23 20:00:00,2,1
62,786,773,2018-06-22 20:00:00,1,2


In [29]:
games_verbose = tqdm.tqdm(list(games.itertuples()), desc="Loading game data")
teams, players = [],[]
actions = {}
for game in games_verbose:
    teams.append(SBL.teams(game.game_id))
    players.append(SBL.players(game.game_id))
    events = SBL.events(game.game_id)
    actions[game.game_id] = statsbomb.convert_to_actions(events, game.home_team_id)

teams = pd.concat(teams).drop_duplicates("team_id").reset_index(drop=True)
players = pd.concat(players).reset_index(drop=True)
#player_games = pd.concat(player_games).reset_index(drop=True)   

Loading game data: 100%|██████████| 64/64 [02:21<00:00,  2.21s/it]


In [30]:
datafolder = "../data-fifa"      

# Create data folder if it doesn't exist
if not os.path.exists(datafolder):  
    os.mkdir(datafolder)          
    print(f"Directory {datafolder} created.")     

spadl_h5 = os.path.join(datafolder, "spadl-statsbomb.h5")    

# Store all spadl data in h5-file
with pd.HDFStore(spadl_h5) as spadlstore:
    spadlstore["competitions"] = selected_competitions    
    spadlstore["games"] = games                            
    spadlstore["teams"] = teams                           
    spadlstore["players"] = players[['player_id', 'player_name', 'nickname']].drop_duplicates(subset='player_id') 
    spadlstore["player_games"] = players[['player_id', 'game_id', 'team_id', 'is_starter', 'starting_position_id', 'starting_position_name', 'minutes_played']]
    for game_id in actions.keys():                                  
        spadlstore[f"actions/game_{game_id}"] = actions[game_id]
    
    spadlstore["actiontypes"] = spadl.actiontypes_df()   
    spadlstore["results"] = spadl.results_df()   
    spadlstore["bodyparts"] = spadl.bodyparts_df()       

In [31]:
#table with all games
with pd.HDFStore(spadl_h5) as spadlstore:     
    games = (
        spadlstore["games"]
        .merge(spadlstore["competitions"], how='left')
        .merge(spadlstore["teams"].add_prefix('home_'), how='left')
        .merge(spadlstore["teams"].add_prefix('away_'), how='left'))     
    

    #all games of a chosen competition and team: belgium at WC18
        #create a dataframe
    all_matches = pd.DataFrame(columns = ['away_score','away_team_id', 'competition_id',
        'competition_stage', 'game_date' ,'game_day', 'game_id', 'home_score' ,'home_team_id' ,
           'referee_id' ,'season_id', 'venue', 'competition_name' ,'country_name','competition_gender',
                                                        'season_name','home_team_name','away_team_name'] )   
    
        #fill it with all games of the chosen competition and team
    for index,row in games.iterrows(): 
        if (games._get_value(index,'competition_name') == "FIFA World Cup") and  (
            games._get_value(index,'home_team_name')== "Belgium" 
            or games._get_value(index,'away_team_name')== "Belgium"): 
           
            this_match = games.iloc[index,] 
            all_matches = all_matches.append(this_match, ignore_index = True) 


    #all actions of those games in 1 dataframe
    actions_of_all_games = pd.DataFrame()     
    for index,row in all_matches.iterrows():
        game_ID = all_matches._get_value(index,'game_id')

        actions_of_this_game =f"actions_{game_ID}"
        actions_of_this_game = spadlstore[f"actions/game_{game_ID}"]      
        actions_of_this_game = (
            actions_of_this_game.merge(spadlstore["actiontypes"], how="left")
            .merge(spadlstore["results"], how="left")
            .merge(spadlstore["bodyparts"], how="left")
            .merge(spadlstore["players"], how="left")
            .merge(spadlstore["teams"], how="left"))
        
        actions_of_all_games = actions_of_all_games.append(actions_of_this_game, ignore_index = True)


# use nickname if available, else use full name
actions_of_all_games["player_name"] = actions_of_all_games[["nickname", "player_name"]].apply(lambda x: x[0] if x[0] else x[1], axis=1)
del actions_of_all_games['nickname']




In [32]:

#make Naive_BuildUp_ID (first naive case-id)

BuildUp_counter = 1         
past_team = actions_of_all_games._get_value(0,'team_id')   
Naive_BuildUp_ID_list = []     


for index, row in actions_of_all_games.iterrows():        
    
    current_team = actions_of_all_games._get_value(index,'team_id')
    
    if current_team != past_team:           
        BuildUp_counter = BuildUp_counter + 1  
                                            
    Naive_BuildUp_ID_list.append(BuildUp_counter) 
    
    past_team = current_team 

actions_of_all_games['Naive_BuildUp_ID']= Naive_BuildUp_ID_list 
      

In [33]:

#players to positions

field_position_list = []
field_position = "unimportant"
for index,row in actions_of_all_games.iterrows():
    player_names = actions_of_all_games._get_value(index, 'player_name')
    match = actions_of_all_games._get_value(index, 'game_id')
    if player_names == "Thibaut Courtois":
        field_position = "GK"
        field_position_list.append(field_position)
    elif player_names == "Jan Vertonghen":
        field_position = "LCB"
        field_position_list.append(field_position)
    elif player_names == "Toby Alderweireld":
        field_position = "RCB"
        field_position_list.append(field_position)
    elif player_names == "Vincent Kompany":
        field_position = "CB"
        field_position_list.append(field_position)
    elif player_names == "Dedryck Boyata":
        field_position = "CB"
        field_position_list.append(field_position)
    elif player_names == "Thomas Vermaelen":
        field_position = "LCB"
        field_position_list.append(field_position)
    elif player_names == "Thomas Meunier":
        field_position = "RW"
        field_position_list.append(field_position)
    elif player_names == "Kevin De Bruyne":
        field_position = "HCM"
        field_position_list.append(field_position)
    elif player_names == "Eden Hazard":
        field_position = "LA"
        field_position_list.append(field_position)
    elif player_names == "Dries Mertens":
        field_position = "RA"
        field_position_list.append(field_position)
    elif player_names == "Axel Witsel":
        field_position = "LCM"
        field_position_list.append(field_position)
    elif player_names == "Nacer Chadli":
        if match == 8655:                          #against france
            field_position = "RW"
        else:
            field_position = "LW"
        field_position_list.append(field_position)
    elif player_names == "Marouane Fellaini":
        field_position = "CM"
        field_position_list.append(field_position)
    elif player_names == "Youri Tielemans":
        field_position = "CM"
        field_position_list.append(field_position)
    elif player_names == "Romelu Lukaku":
        field_position = "ST"
        field_position_list.append(field_position)
    elif player_names == "Mousa Dembélé":
        field_position = "LW"
        field_position_list.append(field_position)
    elif player_names == "Yannick Carrasco":
        field_position = "LW"
        field_position_list.append(field_position)
    elif player_names == "Leander Dendoncker":
        field_position = "RCB"
        field_position_list.append(field_position)
    elif player_names == "Thorgan Hazard":
        field_position = "RW"
        field_position_list.append(field_position)
    elif player_names == "Michy Batshuayi":
        field_position = "ST"
        field_position_list.append(field_position)
    elif player_names == "Adnan Januzaj":
        field_position = "RA"
        field_position_list.append(field_position)
    else:
        field_position = "unimportant"
        field_position_list.append(field_position)
    
    
    
actions_of_all_games['player_position'] = field_position_list




In [34]:
#aggregated location
     
aggregated_Location_list = []     
match_index=0
home_team_check_list = []   

for index, row in actions_of_all_games.iterrows():

    match = actions_of_all_games._get_value(index, 'game_id')
    for index2, row in games.iterrows():
        if games._get_value(index2, 'game_id') == match:
            match_index = index2
            break 
    
    
    if  actions_of_all_games._get_value(index,'team_name') == games._get_value(match_index,'home_team_name'):
        home_team = 1 #binary variable : 1 if team plays at home
    else:
        home_team = 0
        
    home_team_check_list.append(home_team)
    
    x_coord = actions_of_all_games._get_value(index,'start_x')   
    y_coord = actions_of_all_games._get_value(index,'start_y')

    if home_team==1:
        if (x_coord >= 105*2/3) and (y_coord>=68*2/3):         
            aggregated_Location = 'LeftOffense' 
        if (x_coord >= 105*2/3) and (y_coord<=68*2/3 and y_coord>68*1/3):       
            aggregated_Location = 'CentralOffense' 
        if (x_coord >= 105*2/3) and (y_coord<68*1/3):         
            aggregated_Location = 'RightOffense' 
       
        if (x_coord < 105*2/3 and x_coord >= 105*1/3) and (y_coord>=68*2/3): 
            aggregated_Location = 'LeftMid'
        if (x_coord < 105*2/3 and x_coord >= 105*1/3) and (y_coord<=68*2/3 and y_coord>68*1/3): 
            aggregated_Location = 'CentralMid'            
        if (x_coord < 105*2/3 and x_coord >= 105*1/3) and (y_coord<68*1/3): 
            aggregated_Location = 'RightMid'        
        
        if (x_coord < 105*1/3) and (y_coord>=68*2/3):         
            aggregated_Location = 'LeftDefense' 
        if (x_coord < 105*1/3) and (y_coord<=68*2/3 and y_coord>68*1/3):      
            aggregated_Location = 'CentralDefense' 
        if (x_coord < 105*1/3) and (y_coord<68*1/3):         
            aggregated_Location = 'RightDefense' 
           
    else: 
        if (x_coord < 105-105*2/3) and (y_coord<68-68*2/3):         
            aggregated_Location = 'LeftOffense' 
        if (x_coord < 105-105*2/3) and (y_coord>68-68*2/3 and y_coord<=68-68*1/3):        
            aggregated_Location = 'CentralOffense' 
        if (x_coord < 105-105*2/3) and (y_coord>=68-68*1/3):         
            aggregated_Location = 'RightOffense' 
       
        if (x_coord >= 105-105*2/3 and x_coord < 105-105*1/3)  and (y_coord<68-68*2/3):
            aggregated_Location = 'LeftMid'
        if (x_coord >= 105-105*2/3 and x_coord < 105-105*1/3) and (y_coord>68-68*2/3 and y_coord<=68-68*1/3) :
            aggregated_Location = 'CentralMid'
        if (x_coord >= 105-105*2/3 and x_coord < 105-105*1/3) and (y_coord>=68-68*1/3) :
            aggregated_Location = 'RightMid'    
        
        if (x_coord >= 105-105*1/3) and (y_coord<68-68*2/3):         
            aggregated_Location = 'LeftDefense' 
        if (x_coord >= 105-105*1/3) and (y_coord>68-68*2/3 and y_coord<=68-68*1/3):     
            aggregated_Location = 'CentralDefense' 
        if (x_coord >= 105-105*1/3) and (y_coord>=68-68*1/3):         
            aggregated_Location = 'RightDefense' 
           
    aggregated_Location_list.append(aggregated_Location) 

actions_of_all_games['Location']= aggregated_Location_list 
actions_of_all_games['home_team_check']= home_team_check_list




In [35]:
#type_result

type_result_lijst = [] 

for index, row in actions_of_all_games.iterrows():        
    
    type_event_ = actions_of_all_games._get_value(index,'type_name')
    result_ = actions_of_all_games._get_value(index,'result_name')
    
    type_result_= type_event_ + '_'+  result_
    type_result_lijst.append(type_result_)    



actions_of_all_games['type_result']= type_result_lijst 







In [36]:
#new attribute: shows if an event is part of the x (e.g. 25) events before a goal (and gives all events before a certain goal the same number for this attribute (= > possible case ID? )  

reversed_actions= actions_of_all_games.iloc[::-1, :]  
before_which_goal = []
counter = 1
indicator = 0
ID = 0  
match_vd_goal = -1
for index, row in reversed_actions.iterrows():
    if reversed_actions._get_value(index,'type_result')=="shot_success":
        counter = 0 
        ID = ID+1 
        indicator =  1 
        match_vd_goal = reversed_actions._get_value(index,'game_id')
    
    if reversed_actions._get_value(index,'game_id')!=match_vd_goal: 
        indicator = 0  

    if indicator ==1:
        if counter <26:
            before_which_goal.append(ID)
            counter = counter+1
        else: 
            indicator = 0 
            before_which_goal.append ("///") 
    else:
        before_which_goal.append ("///") 
        
reversed_actions['before_goal'] = before_which_goal


actions_of_all_games_met_before_which_goal = reversed_actions.iloc[::-1, :]


actions_of_all_games['before_goal_']= actions_of_all_games_met_before_which_goal['before_goal']



    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  reversed_actions['before_goal'] = before_which_goal


In [37]:

import math 

distance_list = []  

for index, row in actions_of_all_games.iterrows():  
    
    covered_x  = actions_of_all_games._get_value(index,'end_x') - actions_of_all_games._get_value(index,'start_x')
    covered_y = actions_of_all_games._get_value(index,'end_y') - actions_of_all_games._get_value(index,'start_y')

    distance = math.sqrt((covered_x)**2 + (covered_y)**2) #pythagoras
    
    distance_list.append(distance)    
    
actions_of_all_games['covered_distance']= distance_list 



In [38]:


import math
       
covered_x_list = [] 
covered_y_list = [] 
direction_x_list = [] 
direction_y_list = [] 
slope_list = [] 
angle = 0
angle_list = []  

for index, row in actions_of_all_games.iterrows():        
    
    if actions_of_all_games._get_value(index,'home_team_check') == 1: #home team so normal coordinates
        covered_x = actions_of_all_games._get_value(index,'end_x') - actions_of_all_games._get_value(index,'start_x')
        covered_y = actions_of_all_games._get_value(index,'end_y') - actions_of_all_games._get_value(index,'start_y')
    else: #away team so reversed coordinates
        covered_x = - (actions_of_all_games._get_value(index,'end_x') - actions_of_all_games._get_value(index,'start_x'))
        covered_y = - (actions_of_all_games._get_value(index,'end_y') - actions_of_all_games._get_value(index,'start_y'))
    
    covered_x_list.append(covered_x) 
    covered_y_list.append(covered_y) 
    
    #directions
    if covered_x >0:
        direction_x =  'forward'
    if covered_x ==0:      
        direction_x =  '/'            
    if covered_x <0:
        direction_x =  'backward'
    if covered_y >0:
        direction_y =  'to Left'
    if covered_y ==0:      
        direction_y =  '/'            
    if covered_y <0:
        direction_y =  'to Right'
    
    direction_x_list.append(direction_x) 
    direction_y_list.append(direction_y) 

    
    #slope 
    
    if covered_x != 0:
        slope = covered_y / covered_x   
    else:
        slope = 99999999999 #infinite 
        
    slope_list.append(slope) 
    
    #angle: (0°/360°: own goal ; 90: Right ; 180: opposite goal; 270: Left (counter-clockwise))
    if direction_x == 'forward':
        angle = 180 + math.degrees(math.atan(slope))
        
    elif direction_x == 'backward' and direction_y == 'to Left': 
        angle = 360 + math.degrees(math.atan(slope))
        
    else: 
        angle = math.degrees(math.atan(slope))                         
                
    angle_list.append(angle)
    
actions_of_all_games['covered_x']= covered_x_list 
actions_of_all_games['covered_y']= covered_y_list 
actions_of_all_games['direction_x']= direction_x_list 
actions_of_all_games['direction_y']= direction_y_list 
actions_of_all_games['slope']= slope_list 
actions_of_all_games['angle ']= angle_list





In [39]:
#nr_event_in_BuildUp (based on 'Naive_BuildUp_ID') (needed for final buildUp_ID)

event_counter = 0
archiveer_counter=0
last_BuildUp = actions_of_all_games._get_value(0,'Naive_BuildUp_ID')   
number_of_event_since_possession_list = []     
number_of_events_of_every_BuildUp_list  = []  

for index, row in actions_of_all_games.iterrows():        
    
    this_BuildUp = actions_of_all_games._get_value(index,'Naive_BuildUp_ID')
    
    if this_BuildUp == last_BuildUp:           
        event_counter = event_counter + 1
    else: 
        archiveer_counter= event_counter 
        number_of_events_of_every_BuildUp_list.append(archiveer_counter) 
        event_counter=1
                                                          
    number_of_event_since_possession_list.append(event_counter) 
    
    last_BuildUp = this_BuildUp 
    
archiveer_counter= event_counter 
number_of_events_of_every_BuildUp_list.append(archiveer_counter)

actions_of_all_games['nr_event_in_BuildUp']= number_of_event_since_possession_list 


#number_of_events_in_this_possession
total_number_of_events_BuildUp_list = []
for index, row in actions_of_all_games.iterrows():    
    
    this_BuildUp = actions_of_all_games._get_value(index,'Naive_BuildUp_ID')
    spot_in_list = this_BuildUp - 1 
    total_amount = number_of_events_of_every_BuildUp_list[spot_in_list]  
    total_number_of_events_BuildUp_list.append(total_amount)
    

actions_of_all_games['number_of_events_in_this_possession']= total_number_of_events_BuildUp_list 




In [40]:
#number of seconden since last event  (needed for final build up id)
time_between_list = []   
time_between = 0
time_last = 0

for index, row in actions_of_all_games.iterrows():        
     
    time_this = actions_of_all_games._get_value(index,'time_seconds')
    
    time_between = time_this - time_last
    time_between_list.append(time_between) 
    time_last = time_this 

actions_of_all_games['time_since_last_event']= time_between_list




In [41]:

#covered distance wrt start possession 

last_BuildUp = actions_of_all_games._get_value(0,'Naive_BuildUp_ID')   
distance_wrt_possession_list  = []  
start_Location_x = actions_of_all_games._get_value(0,'start_x')
start_Location_y = actions_of_all_games._get_value(0,'start_y')


for index, row in actions_of_all_games.iterrows():        
    
    this_BuildUp = actions_of_all_games._get_value(index,'Naive_BuildUp_ID')
    end_Location_x = actions_of_all_games._get_value(index,'end_x')
    end_Location_y = actions_of_all_games._get_value(index,'end_y')
    
    if this_BuildUp != last_BuildUp:         
        start_Location_x = actions_of_all_games._get_value(index,'start_x')
        start_Location_y = actions_of_all_games._get_value(index,'start_y')
    
    covered_x  = end_Location_x - start_Location_x
    covered_y = end_Location_y - start_Location_y
    distance = math.sqrt((covered_x)**2 + (covered_y)**2) #pythagoras

    distance_wrt_possession_list.append(distance) 
    
    last_BuildUp = this_BuildUp 

actions_of_all_games['distance_wrt_possession']= distance_wrt_possession_list 
  


# maximale distance wrt start possession for every BuildUp  (needed for final buildupID)
import math
last_BuildUp = actions_of_all_games._get_value(0,'Naive_BuildUp_ID')   
max_distance_wrt_possession_list  = []  

this_distance_wrt_start = 0

x_start= actions_of_all_games._get_value(0,'start_x')
y_start= actions_of_all_games._get_value(0,'start_y')
x_end= actions_of_all_games._get_value(0,'end_x')
y_end= actions_of_all_games._get_value(0,'end_y')
    
covered_x  = actions_of_all_games._get_value(index,'end_x') - actions_of_all_games._get_value(index,'start_x')
covered_y = actions_of_all_games._get_value(index,'end_y') - actions_of_all_games._get_value(index,'start_y')

max_distance_wrt_start = math.sqrt((x_end-x_start)**2 + (y_end-y_start)**2) 

archiveer_max_distance_list =  []  

for index, row in actions_of_all_games.iterrows():        
    
    this_BuildUp = actions_of_all_games._get_value(index,'Naive_BuildUp_ID')
   
    x_end= actions_of_all_games._get_value(index,'end_x')
    y_end= actions_of_all_games._get_value(index,'end_y')
    this_distance_wrt_start = math.sqrt((x_end-x_start)**2 + (y_end-y_start)**2) 
    
    if this_BuildUp == last_BuildUp:        
        if this_distance_wrt_start > max_distance_wrt_start: 
            max_distance_wrt_start = this_distance_wrt_start
    else:
        archiveer_max_distance_list.append(max_distance_wrt_start)
        x_start= actions_of_all_games._get_value(index,'start_x')
        y_start= actions_of_all_games._get_value(index,'start_y')
        max_distance_wrt_start = math.sqrt((x_end-x_start)**2 + (y_end-y_start)**2) 
    
    max_distance_wrt_possession_list.append(max_distance_wrt_start) 
    
    last_BuildUp = this_BuildUp 

#last element:
archiveer_max_distance_list.append(max_distance_wrt_start) 

actions_of_all_games['max_distance_so_far_wrt_start_possession']= max_distance_wrt_possession_list 



max_dist_reached_list = []
for index, row in actions_of_all_games.iterrows():    
    
    this_BuildUp = actions_of_all_games._get_value(index,'Naive_BuildUp_ID')
    spot_in_list = this_BuildUp - 1 # voor BuildUp_id i, het i'de element uit list ophalen, maar aangezien nummering van een list bij 0 (/=1) start, moet je -1 doen 
    max_distance_this_BuildUp = archiveer_max_distance_list[spot_in_list]  
    max_dist_reached_list.append(max_distance_this_BuildUp)
    
actions_of_all_games['max_distance_reached_overall_relative_to_start_of_possession']=max_dist_reached_list



In [42]:

#distance and angle -> categorize
categ_distance_list = [] 
categ_angle_list = [] 
short_dribble_list= [] 

for index, row in actions_of_all_games.iterrows():  
        
    #distance 
    distance_covered = actions_of_all_games._get_value(index,'covered_distance')

    if distance_covered < 7:
        category_distance = 'very short'
    elif distance_covered < 15:
        category_distance = 'short'
    elif distance_covered < 25:
        category_distance = 'medium length'
    else:
        category_distance = 'long'

    categ_distance_list.append(category_distance)    
    
    #angle 
    angle = actions_of_all_games._get_value(index,'angle ') 

    if angle < 70 or angle > 290:
        category_angle = 'backwards'
    elif angle <105 :
        category_angle = 'lateral (R)'
    elif angle > 255:
        category_angle = 'lateral (L)'
    elif angle < 165:
        category_angle = 'diagonally forward (R)'
    elif angle > 195:
        category_angle = 'diagonally forward (L)'
    else:
        category_angle = 'straight forward'
    
    categ_angle_list.append(category_angle)   
    

    
actions_of_all_games['covered_distance_category']= categ_distance_list 
actions_of_all_games['angle_category']= categ_angle_list 


In [43]:


#final BuildUp_id

BuildUp_counter_final = 1         
past_team = actions_of_all_games._get_value(0,'team_id')   
BuildUp_id_list_final = []     
indicator = 0


for index, row in actions_of_all_games.iterrows():        
    
    current_team = actions_of_all_games._get_value(index,'team_id')
    nr_events = actions_of_all_games._get_value(index,'number_of_events_in_this_possession')
    max_covered = actions_of_all_games._get_value(index,'max_distance_reached_overall_relative_to_start_of_possession')
    event = actions_of_all_games._get_value(index,'type_name')
    
    if (actions_of_all_games._get_value(index,'time_since_last_event')<15 and actions_of_all_games._get_value(index,'time_since_last_event')>= 0) and (event != 'corner_crossed' and event != 'corner_short' and event != 'freekick_crossed' and event != 'freekick_short' and event != 'goalkick' and event != 'shot_freekick' and event != 'shot_penalty' and event != 'throw_in'):  #als 'tijd tussen' minder is dan bv 15 EN het is geen begin na een stilstaande fase, dan doe je gwn zoals gewoonlijk (obv aantal events en distance)...
        
        if current_team == past_team : 
            BuildUp_counter_comb_incl_stilstaand =  BuildUp_counter_final
        
        elif indicator ==0: 
            if nr_events <3 or max_covered <15 :      
                indicator = 1 
            else: 
                BuildUp_counter_final =  BuildUp_counter_final + 1  
        else:  
            BuildUp_counter_final =  BuildUp_counter_final
            indicator = 0
            
    else: 
        BuildUp_counter_final =  BuildUp_counter_final + 1
        indicator=0 
    
    BuildUp_id_list_final.append(BuildUp_counter_final) 
    past_team = current_team 

actions_of_all_games['BuildUp_ID']= BuildUp_id_list_final






In [44]:
#remove useless attributes for further process mining analysis (only indirectly used for other attributes/columns)

actions_of_all_games.drop('Naive_BuildUp_ID', axis=1, inplace=True)
actions_of_all_games.drop('home_team_check', axis=1, inplace=True) 
actions_of_all_games.drop('before_goal_', axis=1, inplace=True)
actions_of_all_games.drop('covered_x' , axis=1, inplace=True)   
actions_of_all_games.drop('covered_y' , axis=1, inplace=True)  
actions_of_all_games.drop('direction_x', axis=1, inplace=True)
actions_of_all_games.drop('direction_y', axis=1, inplace=True)   
actions_of_all_games.drop('nr_event_in_BuildUp', axis=1, inplace=True)                        
actions_of_all_games.drop('distance_wrt_possession', axis=1, inplace=True)                        
actions_of_all_games.drop('max_distance_so_far_wrt_start_possession', axis=1, inplace=True)                        


In [45]:
#show resulting table:

actions_of_all_games[:50]



Unnamed: 0,game_id,original_event_id,period_id,time_seconds,team_id,player_id,start_x,start_y,end_x,end_y,type_id,result_id,bodypart_id,action_id,type_name,result_name,bodypart_name,player_name,team_name,player_position,Location,type_result,covered_distance,slope,angle,number_of_events_in_this_possession,time_since_last_event,max_distance_reached_overall_relative_to_start_of_possession,covered_distance_category,angle_category,BuildUp_ID
0,7584,a1b55211-a292-4294-887b-5385cc3c5705,1,0.0,782,3289,52.941176,34.43038,42.352941,31.848101,0,1,0,0,pass,success,foot,Romelu Lukaku,Belgium,ST,CentralMid,pass_success,10.898573,0.2438819,13.705848,12,0.0,48.618462,short,backwards,1
1,7584,2eff307a-8759-48aa-b875-aa0a0c94a533,1,1.0,782,5642,42.352941,31.848101,41.470588,34.43038,21,1,0,1,dribble,success,foot,Axel Witsel,Belgium,LCM,CentralMid,dribble_success,2.728866,-2.926582,288.865067,12,1.0,48.618462,very short,lateral (L),1
2,7584,2f9dae19-fbdc-4f42-b836-a3ce2c7dc337,1,3.0,782,5642,41.470588,34.43038,30.882353,61.113924,0,1,0,2,pass,success,foot,Axel Witsel,Belgium,LCM,CentralMid,pass_success,28.70753,-2.520113,291.643558,12,2.0,48.618462,long,backwards,1
3,7584,63093224-6600-49ff-8e19-a43473c1bb14,1,4.0,782,3077,30.882353,61.113924,30.882353,59.392405,21,1,0,3,dribble,success,foot,Jan Vertonghen,Belgium,LCB,LeftDefense,dribble_success,1.721519,100000000000.0,90.0,12,1.0,48.618462,very short,lateral (R),1
4,7584,8c401ae9-1939-428d-95f3-c684068be752,1,5.0,782,3077,30.882353,59.392405,21.176471,46.481013,0,1,0,4,pass,success,foot,Jan Vertonghen,Belgium,LCB,LeftDefense,pass_success,16.152653,1.330265,53.066713,12,1.0,48.618462,medium length,backwards,1
5,7584,923b5e89-c3f8-41e3-af2e-232871e2132d,1,6.0,782,3101,21.176471,46.481013,20.294118,44.759494,21,1,0,5,dribble,success,foot,Vincent Kompany,Belgium,CB,LeftDefense,dribble_success,1.93447,1.951055,62.862898,12,1.0,48.618462,very short,backwards,1
6,7584,3d1391c8-1a93-4ca6-abfc-14aedfcdb9e7,1,7.0,782,3101,20.294118,44.759494,19.411765,22.379747,0,1,0,6,pass,success,foot,Vincent Kompany,Belgium,CB,CentralDefense,pass_success,22.397134,25.36371,87.742203,12,1.0,48.618462,medium length,lateral (R),1
7,7584,cdb14b8d-ec35-46b7-868b-aee31dc46629,1,8.0,782,20005,19.411765,22.379747,20.294118,22.379747,21,1,0,7,dribble,success,foot,Toby Alderweireld,Belgium,RCB,RightDefense,dribble_success,0.882353,0.0,180.0,12,1.0,48.618462,very short,straight forward,1
8,7584,53940e21-8b2b-41f0-aebf-3f99411a6eb1,1,11.0,782,20005,20.294118,22.379747,14.117647,38.734177,0,1,0,8,pass,success,foot,Toby Alderweireld,Belgium,RCB,RightDefense,pass_success,17.481882,-2.64786,290.689718,12,3.0,48.618462,medium length,backwards,1
9,7584,38ac43dc-bc0a-4cbb-beef-a7f2ebce64e9,1,13.0,782,3101,14.117647,38.734177,14.117647,63.696203,0,1,0,9,pass,success,foot,Vincent Kompany,Belgium,CB,CentralDefense,pass_success,24.962025,100000000000.0,90.0,12,2.0,48.618462,medium length,lateral (R),1


In [46]:
#store/export resulting event log in a csv-file
actions_of_all_games.to_csv(....!!!...fill..in..location...!!!...., header=True, index=False)      

