# BePro Get Events

Accesses the BePro API and extracts the matches used for the model. Then uses an event function to extract an event dataframe from the matches event data

Note: Change API functions to the correct API calls as supplied by BePro and replace placeholders (e.g. 'api'). Fill in the placeholder ACCESS_TOKEN with given access token, and add this to authorisation.

In [4]:
import json
import sys
sys.path.append("..")
import math
import re
from flatten_json import flatten
import pandas as pd
import UtilFunctions.util_functions as util_functions
ACCESS_TOKEN = 'token'
import requests
headers = {
    'AUTHORIZATION': ACCESS_TOKEN,
}
#Replace token with API token supplied by BePro

### Get Leagues, Season & Match ID
Uses API function to get JSON on League and match data

In [None]:
def get_league():
    response= requests.get('api', headers=headers)
    league_json = response.json().get('result')[0]
    league_id = league_json.get('id')
    return league_id, league_json

def get_season(league_id):
    params = {
        'league': league_id,
    }
    response = requests.get('api', params=params, headers=headers)
    season = response.json().get('result')
    season_id = season[0].get('id')
    return season_id, season

def get_matches(season_id):
    params = {
        'season': season_id,
    }
    response = requests.get('api', params=params, headers=headers)
    matches = response.json().get('result')
    matches_id = [m.get('id') for m in matches]
    return matches_id, matches
    
def get_team_matches(team_id, season_id):
    params = {
        'season': season_id,
        'team': team_id
    }
    response = requests.get('api', params=params, headers=headers)
    team_games = response.json().get('result')
    team_game_ids = [i.get('id') for i in team_games]
    return team_game_ids, team_games

def get_lineups(match_id):
    response = requests.get('api'+str(match_id)+'/lineup', headers=headers)
    lineup = response.json().get('result')
    lineup_df = [flatten(l) for l in lineup]
    lineup_df = pd.DataFrame.from_records(lineup_df)
    return lineup_df

### Get IDs for league, season and match for 1 game

In [None]:
suwon_team_id = 4220
league_id, league_json = get_league()
season_id, season_json = get_season(league_id)
matches_id, matches_json = get_matches(season_id)
suwon_matches_id, suwon_matches_json = get_team_matches(suwon_team_id, season_id)

### Get Events for a Match ID
Gets all events for a match. Iterates over pages, where 100 events are contained within each API page.

In [None]:
def get_events(match_id):
    events_df=pd.DataFrame()
    response = requests.get('api'+str(match_id)+'/event_data', headers=headers)
    num_events = response.json().get('count')
    for i in range(0, num_events,100):
        params = {'offset': i}
        response = requests.get('api'+str(match_id)+'/event_data', headers=headers, params=params)
        event_json = response.json().get('result')
        event_j = [flatten(e) for e in event_json]
        event_df = pd.DataFrame.from_records(event_j)
        events_df = pd.concat([events_df,event_df],ignore_index=True)
    return events_df

### Get Formation Changes

In [None]:
def get_formation_changes(match_id):
    response = requests.get('api'+str(match_id)+'/formation', headers=headers)
    formations = response.json().get('result')
    formation_df = pd.DataFrame.from_records([flatten(f) for f in formations])
    return formation_df

In [None]:
def get_formation_from_events_df(i):
    ev = pd.read_csv('data/Suwon_FC/events/game'+str(i)+'/events_df.csv')
    match_id = ev['match_id'][0]
    ev = pd.DataFrame(ev['event_time'])
    lu = get_lineups(match_id)
    team_ids = lu['team_id'].unique()
    team1_players = lu[lu['team_id'] == team_ids[0]]['player_id'].unique()
    team2_players = lu[lu['team_id'] == team_ids[1]]['player_id'].unique()
    gfc = get_formation_changes(match_id)
    team1_gfc = gfc[gfc['team_id'] == team_ids[0]].reset_index(drop=True)
    team2_gfc = gfc[gfc['team_id'] == team_ids[1]].reset_index(drop=True)
    
    for t in team1_players:
        ev[t] = 'NA'
        
    for t in team2_players:
        ev[t] = 'NA'
    
    for i,row in team1_gfc.iterrows():
        changed_time = row['changed_time']
        try:
            second_changed_time = team1_gfc.loc[i+1]['changed_time']
        except:
            second_changed_time = 1000000000
        for p in range(0,11):
            p_id = row['formation_'+str(p)+'_player_id']
            if not (math.isnan(p_id)):
                p_pos = util_functions.coords_to_pos(row['formation_'+str(p)+'_position_x'],row['formation_'+str(p)+'_position_y'])
                ev.loc[(ev['event_time'] > changed_time) & (ev['event_time'] <= second_changed_time), p_id] = p_pos
    for i,row in team2_gfc.iterrows():
        changed_time = row['changed_time']
        try:
            second_changed_time = team2_gfc.loc[i+1]['changed_time']
        except:
            second_changed_time = 1000000000
        for p in range(0,11):
            p_id = row['formation_'+str(p)+'_player_id']
            if not (math.isnan(p_id)):
                p_pos = util_functions.coords_to_pos(row['formation_'+str(p)+'_position_x'],row['formation_'+str(p)+'_position_y'])
                ev.loc[(ev['event_time'] > changed_time) & (ev['event_time'] <= second_changed_time), p_id] = p_pos
            
            
    return ev

# Process for storing the input data

In [1]:
#Games to remove - event data games where tracking data is not readily available
#If using your own tracking dataset without missing files, set this as an empty list
missing_games_indexes = [2,15,17,25]
num_games = 34

## Part 1: Get team lineups

In [None]:
game_lineups = []
for s in suwon_matches_id:
    lineup = [flatten(l) for l in get_lineups(s)]
    lineup_df = pd.DataFrame.from_records(lineup)
    game_lineups.append(lineup_df)

In [None]:
#Games to remove - event data games where tracking data is not readily available
for i in missing_games_indexes:
    game_lineups.pop(i)
game_lineups.reverse()

Store home and away players and team formation

In [None]:
for i in range(1, num_games+1):
    home_df = pd.read_csv('data/Suwon_FC/tracking/game'+str(i)+'/home_players.csv')
    home_df = home_df.merge(game_lineups[i-1][['player_id','is_starting_lineup','position_x','position_y','position_name']],on=['player_id'],how='left')
    away_df = pd.read_csv('data/Suwon_FC/tracking/game'+str(i)+'/away_players.csv')
    away_df = away_df.merge(game_lineups[i-1][['player_id','is_starting_lineup','position_x','position_y','position_name']],on=['player_id'],how='left')
    home_df.to_csv('data/Suwon_FC/tracking/game'+str(i)+'/home_players.csv', index=False)
    away_df.to_csv('data/Suwon_FC/tracking/game'+str(i)+'/away_players.csv',index=False)

In [None]:
for i in range(1,num_games+1):
    fdf = get_formation_from_events_df(i)
    fdf.to_csv('data/Suwon_FC/formations/game'+str(i)+'/formation.csv',index=False)

## Part 2: Loop through matches list and get events dataframes for Suwon FC

In [None]:
def get_suwon_match_events(suwon_id, match_ids):
    count=1
    for m in match_ids:
        events_df = get_events(m)
        events_df = util_functions.convert_bepro_to_EPTS(events_df)
        events_df.to_csv('data/Suwon_FC/events/game'+str(count)+'/events_df.csv', index=False)
        count+=1
        
get_suwon_match_events(suwon_team_id, suwon_matches_id)

## Part 3: Get matches dataframe for Suwon in the right format
Need to drop 4 rows and reverse order to match tracking and chronological order

In [None]:
game_df = []
for i in suwon_matches_json:
    game_details = [i.get('id'),i.get('start_time'),i.get('home_team').get('id'), i.get('away_team').get('id'), i.get('detail_match_result').get('home_team_score'), i.get('detail_match_result').get('away_team_score')]
    game_df.append(game_details)
    
game_df = pd.DataFrame(game_df, columns=['id','date','home_id','away_id','home_score','away_score'])
game_df = game_df.drop(missing_indexes).reset_index(drop=True)
game_df = game_df.reindex(index=game_df.index[::-1]).reset_index(drop=True)
game_df.to_csv('Suwon_games.csv',index=False)

## Part 4: Change Data so that coordinates are flipped properly i.e. always in correct direction

In [None]:
#for i in range(1,num_games+1):
#    events_df = pd.read_csv('data/Suwon_FC/events/game'+str(i)+'/events_df.csv')
#    events_df['y'] = 68- events_df['y']
#    events_df.to_csv('data/Suwon_FC/events/game'+str(i)+'/events_df.csv',index=False)