# Precompute Rewards for MDP from Match Prediction Model - Sections 5 and 6

In [None]:
import pandas as pd
import pickle
import tqdm
import numpy as np
import Poisson
import tqdm
import os
import json
from itertools import combinations
from itertools import product
os.chdir('..')
games = pd.read_csv('data/team_data/games_data.csv')
teams = pd.read_csv('data/teams.csv')

## Function to convert team to features

In [None]:
def get_feature_set(team,player_ids,opponent,opponent_player_ids,is_home):
    skill_cols = ['home'+str(i)+'_wr' for i in range(1,12)] + ['away'+str(i)+'_wr' for i in range(1,12)]
    vaep_cols = ['home'+str(i)+'_vaep' for i in range(1,12)] + ['away'+str(i)+'_vaep' for i in range(1,12)]
    all_cols = np.array([['home'+str(i)+'_wr','home'+str(i)+'_vaep'] for i in range(1,12)] + [['away'+str(i)+'_wr','away'+str(i)+'_vaep'] for i in range(1,12)]).flatten()
    team_df = pd.read_csv('data/team_data/'+str(team)+'.csv')
    opponent_df = pd.read_csv('data/team_data/'+str(opponent)+'.csv')
    team_players = team_df[team_df['player_id'].isin(player_ids)]
    opponent_players = opponent_df[opponent_df['player_id'].isin(opponent_player_ids)]
    if is_home:
        skills_row = pd.concat([team_players['skill'],opponent_players['skill']]).reset_index(drop=True).to_frame().T.reset_index(drop=True)
        vaeps_row = pd.concat([team_players['vaep'],opponent_players['vaep']]).reset_index(drop=True).to_frame().T.reset_index(drop=True)
    else:
        skills_row = pd.concat([opponent_players['skill'],team_players['skill']]).reset_index(drop=True).to_frame().T.reset_index(drop=True)
        vaeps_row = pd.concat([opponent_players['vaep'],team_players['vaep']]).reset_index(drop=True).to_frame().T.reset_index(drop=True)
    skills_row.columns = skill_cols
    vaeps_row.columns = vaep_cols
    full_row = pd.concat([skills_row,vaeps_row],axis=1)
    full_row = full_row.reindex(all_cols,axis=1)
    return full_row

## Get Team Combinations
Stores the squad of players for each team. Change team id to save squad. Filter ids changes the players selected in squad. Choose players to fit the teams formation for the squad.

In [None]:
team_ids = pd.read_csv('data/teams.csv')['team_id'].unique()
team_ids

In [None]:
tid=39
team_squad = pd.read_csv('data/team_data/'+str(tid)+'.csv')
filtered_ids = [1,3,4,5,6,7,9,10,11,14,13,18,20,19,21,26,23,22,24,27]
team_squad = team_squad.loc[filtered_ids].reset_index(drop=True)
team_squad = team_squad.append({'player_id':0,'team_id':tid,'player_name':'Reserve','tm_id':0,'gd':0,'games_played':0,'skill':0,'position':'Reserve','vaep':-3},ignore_index=True)#[0,38,'Reserve',0,0,0,0,'Reserve',0],ignore_index=True)
team_squad['position'] = ['Goalkeeper','Center Back','Center Back','Center Back','Center Back','Right Back','Right Back','Left Back','Left Back','Center Midfield','Center Midfield','Center Midfield','Center Midfield','Center Midfield','Right Winger','Right Winger','Left Winger','Left Winger','Striker','Striker','Reserve']
team_squad.to_csv('data/18-19_Squad/'+str(tid)+'.csv',index=False)
team_squad

## Get Team Average VAEP

In [None]:
parent_path = os.path.dirname(os.getcwd())

In [None]:
team_mean_vaeps = {}
for tid in team_ids:
    vaeps = []
    team_games = games[(games['home_team_id']==tid) | (games['away_team_id']==tid)].copy().reset_index(drop=True)
    team_vaeps = pd.read_csv('data/team_data/'+str(tid)+'.csv')
    if len(team_games[team_games['season_id']==1]) == 0:
        team_games = team_games[:5]
    else:
        team_games = team_games[team_games['season_id']==1].reset_index(drop=True)
    for game_id in team_games['game_id']:
        with open((parent_path+'\\data\\events\\'+str(game_id)+'.json'),'r',encoding="utf8") as file:
            json_lineup = json.load(file)[:2]
        team_lineup = [a for a in json_lineup if a['team']['id']==tid]
        lineup_indexes = [a['player']['id'] for a in team_lineup[0]['tactics']['lineup']]
        vaeps.append(team_vaeps[team_vaeps['player_id'].isin(lineup_indexes)]['vaep'].sum())
    mean_vaep = np.array(vaeps).mean()
    team_mean_vaeps[tid]=mean_vaep

## Generate Combinations of Possible Lineups (Section 6.2)

This includes reserves. We have a reserve slot where a player from the club would replace a first team player if noone is available to play in a certain position. Their skill rating is equivalent to the minimum skill rating in the data. These combinations and their rewards for the team in each game is precomputed to be used in the MDP model.

In [None]:
def get_team_reward_df(team_id):
    #Team Games DF
    team_games = games[((games['home_team_id']==team_id) | (games['away_team_id']==team_id)) & (games['season_id']==4)][['game_id','game_date','home_team_id','away_team_id','home_score','away_score']].reset_index(drop=True)
    team_games['home_team'] = [teams[teams['team_id']==t]['team_name'].values[0] for t in team_games['home_team_id']]
    team_games['away_team'] = [teams[teams['team_id']==t]['team_name'].values[0] for t in team_games['away_team_id']]
    team_games['is_home']=True
    team_games = team_games.rename(columns={'home_team':'team_id','away_team':'team_id_vs'})
    team_games = team_games.sort_values('game_date',ascending=True).reset_index(drop=True)
    
    #Opponent DF
    #opponent_dictionary = dict()
    #for i,team_game in team_games.iterrows():
    #    is_home = team_game['home_team_id']==team_id
    #    if is_home:
    #        opponent_id = team_game['away_team_id']
    #    else:
    #        opponent_id = team_game['home_team_id']
    #    opponent = pd.read_csv('optimal_team_data/'+str(opponent_id)+'.csv')['vaep'].sum()
    #    opponent_dictionary[str(opponent_id)] = opponent
    
    all_predictions_rows = []
    for i,row in tqdm.tqdm(team_games.iterrows(),total=len(team_games)):
        if row['home_team_id'] == team_id:
            is_home=True
        else:
            is_home=False
        game_specific = pd.concat([pd.DataFrame([row],columns=team_games.columns)]*len(list_of_values), ignore_index=True)
        if is_home:
            game_specific['team_vaep'] = list_of_values
            game_specific['team_vaep_vs'] = team_mean_vaeps[row['away_team_id']]
        else:
            game_specific['team_vaep'] = team_mean_vaeps[row['home_team_id']]
            game_specific['team_vaep_vs'] = list_of_values
        game_specific['game_id'] = range(len(list_of_values))
        game_specific_predictions = Poisson.get_predictions(game_specific)
        game_specific_predictions['result'] = np.where(game_specific_predictions['goals_a'] > game_specific_predictions['goals_b'], 'HW', np.where(game_specific_predictions['goals_a'] < game_specific_predictions['goals_b'], 'AW', 'DR'))
        game_specific_predictions = game_specific_predictions.groupby(['game_id','result']).sum()['p'].reset_index()
        game_specific_predictions['xph'] = np.where(game_specific_predictions['result'] == 'HW', game_specific_predictions['p'] * 3, np.where(game_specific_predictions['result'] == 'AW', 0, game_specific_predictions['p']))
        game_specific_predictions['xpa'] = np.where(game_specific_predictions['result'] == 'AW', game_specific_predictions['p'] * 3, np.where(game_specific_predictions['result'] == 'HW', 0, game_specific_predictions['p']))
        game_specific_predictions = game_specific_predictions.groupby('game_id').sum()
        if is_home:
            xp = game_specific_predictions['xph'].values
        else:
            xp = game_specific_predictions['xpa'].values
        new_prediction_row = pd.DataFrame([row],columns=team_games.columns).reset_index(drop=True)
        final_preds_row = pd.concat([new_prediction_row,pd.DataFrame([xp],columns=tupled_teams)],axis=1).drop(['home_score','away_score','is_home'],axis=1)
        all_predictions_rows.append(final_preds_row)
    final_prediction_dataframe = pd.concat(all_predictions_rows,ignore_index=True)
    return final_prediction_dataframe

In [None]:
cb_subset_size = 2
cm_subset_size = 3
cb_indexes=[1,2,3,4,20,20]
rb_indexes=[5,6,20]
lb_indexes=[7,8,20]
cm_indexes=[9,10,11,12,13,20,20,20]
rw_indexes=[14,15,20]
lw_indexes=[16,17,20]
st_indexes=[18,19,20]
cb_combos = list(combinations(cb_indexes, cb_subset_size))
rb_combos = list(combinations(rb_indexes,1))
lb_combos = list(combinations(lb_indexes,1))
cm_combos = list(combinations(cm_indexes,cm_subset_size))
rw_combos = list(combinations(rw_indexes,1))
lw_combos = list(combinations(lw_indexes,1))
st_combos = list(combinations(st_indexes,1))

In [None]:
#Set different combos to vary the formation here depending on team.
combinations_list = list(product(cb_combos, rb_combos, lb_combos, cm_combos,rw_combos,lw_combos,st_combos))
combinations_list = list( dict.fromkeys(combinations_list) )
len(combinations_list)

In [None]:
for team_id in [24]:
    print(team_id)
    team_squad = pd.read_csv('data/18-19_Squad/'+str(team_id)+'.csv')
    combinations_list = list(product(cb_combos, rb_combos, lb_combos, cm_combos,rw_combos,lw_combos,st_combos))
    combinations_list = list( dict.fromkeys(combinations_list) )
    list_of_teams = [[0]+[value for inner_tuple in outer_tuple for value in inner_tuple] for outer_tuple in combinations_list]
    list_of_values = [team_squad.loc[l]['vaep'].sum() for l in list_of_teams]
    tupled_teams = [str(tuple(l)) for l in list_of_teams]
    team_reward_df = get_team_reward_df(team_id)
    #team_reward_df.to_csv('Team_reward_DF/'+str(team_id)+'.csv',index=False)
    #except:
    #    continue

In [None]:
team_reward_df.to_csv('data/Team_rewards_DF/'+str(team_id)+'.csv',index=False)