# Solving the Team MDP (Section 6)
##### Also contains some code to get data for experiments

## 1. Imports & loading data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import gc
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime as dt
import pickle
import xgboost as xgb
import daal4py as d4p
import math
import heapq
import random
import scipy.stats as stats
import seaborn as sns
import time
import os
from scipy.stats import gaussian_kde
os.chdir("..")
import load_data
from mcts import Node
import updated_injury_probs as uip
from copy import deepcopy
import MDP
import brute_force
import SingleAgentMCTS
from graph_visualisation import GraphVisualisation
from qtable import QTable
from ucb import UpperConfidenceBounds
from scipy.stats import gaussian_kde

In [None]:
game_dates = load_data.load_player_data('Eden Hazard')[0]['game_date'][-38:].reset_index(drop=True) # Gets consistent game schedule
team_id = 23
team_squad = pd.read_csv('data/18-19_Squad/'+str(team_id)+'.csv')
team_rewards = pd.read_csv('data/Team_rewards/'+str(team_id)+'.csv')
team_reward_without_reserves = team_rewards[[col for col in team_rewards.columns if '20' not in col]].copy()
team_reward_without_reserves['game_date'] = pd.to_datetime(team_reward_without_reserves['game_date']).dt.date
players = team_squad['player_name'].unique().tolist()[1:]
player_1 = players[0]
squad_vaeps = team_squad['vaep'].values[1:]
team_ranked_squads = (team_rewards.iloc[:,6:].sum() / team_rewards.iloc[:,6:].sum().sum()).sort_values()
importances = (team_rewards.iloc[:,6:].max(axis=1)-team_rewards.iloc[:,6:].mean(axis=1)).values

In [None]:
print("Team: ", team_id)
print("Max Expected Points: ", team_reward_without_reserves.iloc[:,6:].sum().max())

In [None]:
ids=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19]
player_dfs = {}
player_features = {}
player_inj_dicts = {}
_,_,injury_model,injury_scaler = load_data.load_player_data(player_1)
scaler_mean = injury_scaler.mean_
scaler_var = injury_scaler.var_

#Create Daal model
daal_injury_model = d4p.get_gbt_model_from_xgboost(injury_model.copy())

for player,id_val in zip(players,ids):
    try:
        player_dfs[id_val],player_features[id_val],_,_ = load_data.load_player_data(player)
    except:
        player_dfs[id_val],player_features[id_val],_,_ = load_data.load_player_data(players[0])
    if math.isnan(player_features[id_val]['dist_covered']):
        player_features[id_val]['dist_covered']=125 # Impute values if data isnt there for a player
    if math.isnan(player_features[id_val]['num_dribbles']):
        player_features[id_val]['num_dribbles']=0.4
    #player_features[id_val]['dist_covered']*=1.1 # accounts for time differences
    #player_features[id_val]['num_dribbles']*=1.1
    player_dfs[id_val] = player_dfs[id_val].drop(['Competition','player_id','rolling_mins_played_exp'],axis=1)
    player_dfs[id_val][['team_id','opp_team_id']] = player_dfs[id_val][['team_id','opp_team_id']].astype("category")
    inj_dict = {'num_injuries':player_dfs[id_val].at[0,"num_injuries"],'total_days_out':player_dfs[id_val].at[0,"total_days_out"],'days_out_last_injury':player_dfs[id_val].at[0,"days_out_last_injury"],
                'days_since_last_injury':player_dfs[id_val].at[0,"days_since_last_injury"],'injuries_past_twelve_months':player_dfs[id_val].at[0,"injuries_past_twelve_months"]}
    player_inj_dicts[id_val] = inj_dict
    
    player_dfs[id_val] = player_dfs[id_val].merge(team_reward_without_reserves,on='game_date',how='right').sort_values('game_date').reset_index(drop=True)
    player_dfs[id_val] = player_dfs[id_val].loc[-38:,[col for col in player_dfs[id_val].columns if '(' not in col]].rename(columns={'team_id_x':'team_id'})
    player_dfs[id_val]['game_date'] = game_dates

In [None]:
#for i in range(1,20):
#    player_dfs[i].to_csv('team_selection_MDP/Feature_DF/'+str(team_id)+'/'+str(i)+'.csv',index=False)

In [None]:
injury_data = pd.read_csv('data/injury_data/all_player_injuries_updated.csv')
inj_kde = gaussian_kde(injury_data['Days']) #Injury length distribution

## 2. Demonstration of the MDP
Uncomment print statements to get an overview of the MDP transitions

In [None]:
#start_time = time.time()
results = []
n_games = 38
n_players = 19
total_reward = 0
mdp = MDP.GameMDP(n_games, n_players, daal_injury_model,game_dates,player_features,scaler_mean,scaler_var,inj_kde,player_inj_dicts)
curr_state=mdp.current_state
start_time = time.time()
total_action_time = 0
total_transition_time = 0
total_reward_time = 0
inj_probs = []
count=0
for i in range(n_games):
    #print("Game: ", i)
    count+=1
    #print(mdp.injury_features)
    st = time.time()
    available_actions = mdp.get_actions(curr_state)
    total_action_time += (time.time()-st)
    #print("State",i,":", curr_state[7])
    #print("Current reward: ", mdp.get_curr_reward()) 
    if len(available_actions) > 0:
        action=random.choice(available_actions)
    else:
        action=None
    #print("Action chosen: ", action)
    #print("Possible outcomes: ", mdp.get_successors(curr_state, action))
    #print("---------")
    st = time.time()
    new_state = mdp.state_transition(curr_state,list(action))
    total_transition_time += (time.time()-st)
    action_print = [a+1 for a in action]
    #print("Action: ", action_print)
    #print("Action names: ", ['Cech']+[team_squad.at[v,'player_name'] for v in action_print])
    print("Injury probs: ", new_state[2])
    print("Rolling Mins: ", new_state[6])
    #print("Injury Length: ", new_state[4])
    #print("Minutes:", action, "Reward: ", mdp.get_reward(curr_state,action,new_state))
    inj_probs.append(curr_state[2])
    #print(curr_state[5])
    mdp.set_state(new_state)
    st = time.time()
    total_reward += mdp.get_reward(curr_state,action,new_state)
    total_reward_time += (time.time()-st)
    print("Reward:", mdp.get_reward(curr_state,action,new_state))
    curr_state=mdp.get_state()
    print("------------------------------------")
    #print(mdp.is_terminal(curr_state))
print("Total Reward: ", total_reward)
print("Total time: ", time.time()-start_time)
print("Total Action time: ", total_action_time)
print("Total Transition time: ", total_transition_time)
print("Total Reward time: ", total_reward_time)

#print(sum(inj_probs))
#results.append(total_reward)

## 3. Running MCTS for a single game

In [None]:
minutes_kde=0
max_importance=0
n_games = 38
mdp = MDP.GameMDP(n_games, n_players, daal_injury_model,game_dates,player_features,scaler_mean,scaler_var,inj_kde,player_inj_dicts)
st = time.time()
qfunction = QTable()
SAM,sel,exp,sim,back = SingleAgentMCTS.SingleAgentMCTS(mdp,qfunction,UpperConfidenceBounds(),minutes_kde,max_importance).mcts(timeout=10)
gv = GraphVisualisation(max_level=3)
#graph = gv.single_agent_mcts_to_graph(SAM)
#graph

In [None]:
graph = gv.single_agent_mcts_to_graph(SAM)
graph

In [None]:
possible_actions = SAM.mdp.get_actions(SAM.state)
max_value = 0
values=[]
indexes=[]
hashable_state = (SAM.state[1],frozenset((k, v) for d in [SAM.state[2],SAM.state[4]] for k, v in d.items()))
for j in possible_actions: 
    if SAM.visits[(hashable_state,tuple(j))] > 100:
        values.append(qfunction.qtable[(hashable_state,tuple(j))])
        if qfunction.qtable[(hashable_state,tuple(j))] > max_value:
            max_value = qfunction.qtable[(hashable_state,tuple(j))]

max_index = values.index(max(values))
best_action = tuple(possible_actions[max_index])

In [None]:
print("Best action team: ", ['GK']+[team_squad.at[v+1,'player_name'] for v in best_action])

## 4. Iteratively run MCTS for a season
Converted to script to run on compute cluster

In [None]:
from collections import defaultdict
n_games = 38
mdp = MDP.GameMDP(n_games, n_players, daal_injury_model,game_dates,player_features,scaler_mean,scaler_var,inj_kde,player_inj_dicts)
current_state = mdp.get_initial_state()
qfunction = QTable()
spurs_vaep = spurs_squad['vaep'].values
child = None
ucb = UpperConfidenceBounds()
total_points = 0
mean_total_inj_prob = []

for i in range(n_games):
    print("Game: ", i)
    print(current_state[1:5])
    print("Mean win chance: ", reward_df.iloc[i,6:].mean())
    i_prob = sum(np.array(list(current_state[2].values()))[[0,3,5,6,10,11,12,13,15,17]])
    mean_total_inj_prob.append(i_prob)
    hashable_state = (current_state[1],frozenset((k, v) for d in [current_state[2],current_state[4]] for k, v in d.items()))
    
    if child == None:
        SAM,sel,exp,sim,back = SingleAgentMCTS.SingleAgentMCTS(mdp,qfunction,ucb,minutes_kde,max_importance).mcts(timeout=180)
    else:
        SAM,sel,exp,sim,back = SingleAgentMCTS.SingleAgentMCTS(mdp,qfunction,ucb,minutes_kde,max_importance).mcts(timeout=180,root_node=child)
    values = []
    possible_actions = SAM.mdp.get_actions(current_state)
    max_value = 0
    for j in possible_actions:
        if SAM.visits[(hashable_state,tuple(j))] > 30:
            values.append(qfunction.qtable[(hashable_state,tuple(j))])
            if qfunction.qtable[(hashable_state,tuple(j))] > max_value:
                max_value = qfunction.qtable[(hashable_state,tuple(j))]
        else:
            values.append(0)
    max_index = values.index(max(values))
    best_action = tuple(possible_actions[max_index])
    print("Best action: ", best_action)
    print("Best action value (xPPG): ", max_value)
    print("Best action team: ", ['GK']+[spurs_squad.at[v+1,'player_name'] for v in best_action])
    print("Top 5: ")
    
    filtered_dict = defaultdict(list)
    for key in qfunction.qtable:
        if key[0] == hashable_state:
            filtered_dict[key] = qfunction.qtable[key]
    top_5 = heapq.nlargest(5, filtered_dict.items(), key=lambda x: x[1])
    for key, value in top_5:
        print(['GK']+[spurs_squad.at[v+1,'player_name'] for v in key[1]], (value))
        
    next_state = mdp.state_transition(current_state,best_action)
    reward = mdp.get_reward(current_state,best_action,next_state)
    print("Reward: ", reward)
    total_points += reward
    
    values = [spurs_vaep[a+1].sum() for a in possible_actions]
    index = values.index(max(values))
    greedy_action = available_actions[index]
    print("Greedy Reward: ", mdp.get_reward(current_state,greedy_action,next_state))
        
    hashable_next_state = (next_state[1],frozenset((k, v) for d in [next_state[2],next_state[4]] for k, v in d.items()))
    del SAM
    del child
    gc.collect()
    child = SingleAgentMCTS.SingleAgentNode(
            mdp, None, next_state, qfunction, ucb
    )
    current_state = child.state
    print("----------------------------------------------------------------------------------")
print("Total points: ", total_points)

In [None]:
gv = GraphVisualisation(max_level=3)
graph = gv.single_agent_mcts_to_graph(SAM)
graph

### Get player appearances

In [None]:
games = pd.read_csv('data/overview_data/games_data.csv')
games_spurs = games[(games['home_team_id'] == team_id) | (games['away_team_id'] == team_id)]
games_spurs = games_spurs[games_spurs['season_id']==4]
all_lineups_spurs = pd.DataFrame()
for game_id in games_spurs['game_id']:
    game_lineup = pd.read_csv('data/game_data/'+str(game_id)+'/'+str(game_id)+'_lineup.csv')
    game_lineup = game_lineup[(game_lineup['team_id']==team_id) & (game_lineup['is_starter']==True)]
    all_lineups_spurs = pd.concat([all_lineups_spurs,game_lineup],axis=0)

In [None]:
player_start_counts = all_lineups_spurs['player_name'].value_counts().to_dict()
player_start_counts_filtered = {key: player_start_counts[key] if key in player_start_counts else 0 for key in team_squad['player_name'].values}
player_id_counts = dict()

for k,v in player_start_counts_filtered.items():
    k_id = team_squad[team_squad['player_name'] == k].index[0]
    player_id_counts[k_id] = v
    
#with open('team_selection_MDP/Player_predictions/player_counts_dict.pkl', 'wb') as f:
#    pickle.dump(player_id_counts, f)

## 5. Test Greedy Method over many seasons

In [None]:
squad_vaep = team_squad['vaep'].values
positions = [[1,2,3,4],[5,6],[7,8],[9,10,11,12,13],[14,15],[16,17],[18,19]]
counts = [2,1,1,3,1,1,1]

def create_binary_indicator_ohe(original_list, sublist_size):
    zeros_array = np.zeros(sublist_size)
    zeros_array[original_list] = 1
    return zeros_array

def selections(selection_list, num_selections, injury_dict):
    total_val = 0
    
    new_selection_list = []
    for i in range(len(selection_list)):
        if injury_dict[selection_list[i]] == False:
            new_selection_list.append(selection_list[i])
    
    while len(new_selection_list) < num_selections:
        new_selection_list.append(20)
            
    selection_values = []
    for key in new_selection_list:
        if key in player_id_counts:
            value = player_id_counts[key]
            total_val += value
            selection_values.append(value)
    
    choices = []
    while len(choices) < num_selections:
        choice = random.choices(new_selection_list,weights=selection_values,k=1)
        choices.append(choice[0]-1)
        selected_index = new_selection_list.index(choice[0])
        del new_selection_list[selected_index]
        del selection_values[selected_index]
    return sorted(choices)

def greedy_selection(selection_list, player_vaeps, num_selections, injury_dict):
    new_selection_list = []
    for i in range(len(selection_list)):
        if injury_dict[selection_list[i]] == False:
            new_selection_list.append(selection_list[i])
    
    while len(new_selection_list) < num_selections:
        new_selection_list.append(20)
            
    selection_values = player_vaeps[new_selection_list]
    max_indexes = np.argsort(selection_values)[-num_selections:]
    return [new_selection_list[i]-1 for i in sorted(max_indexes)]

In [None]:
#start_time = time.time()
all_rewards = []
all_injuries = []
all_injuries_starting = []
for j in range(1000):
    results = []
    n_games = 38
    n_players = 19
    total_reward = 0
    mdp = MDP.GameMDP(n_games, n_players, daal_injury_model,game_dates,player_features,scaler_mean,scaler_var,inj_kde,player_inj_dicts)
    curr_state=mdp.current_state
    start_time = time.time()
    total_action_time = 0
    total_transition_time = 0
    total_reward_time = 0
    total_injury_prob = 0
    inj_probs = []
    reward_list = []
    count=0
    total_injuries = []
    total_injuries_starting = []
    for i in range(n_games):
        count+=1
        st = time.time()
        available_actions = mdp.get_actions(curr_state)
        total_action_time += (time.time()-st)
        action_sim=[]
        for p,c in zip(positions,counts):
            action_sim += greedy_selection(p,squad_vaep,c,curr_state[3])
        inj=curr_state[4]
        starting_inj = {key:value for key,value in inj.items() if key in optimal_team_index}
        total_injuries.append(list(inj.values()))
        total_injuries_starting.append(list(starting_inj.values()))
        st = time.time()
        new_state = mdp.state_transition(curr_state,action_sim)
        total_transition_time += (time.time()-st)
        action_print = [a+1 for a in action]
        #print("Num Injuries: ", sum(list(curr_state[3].values())))
        total_injury_prob += np.array(list(new_state[2].values()))[[0,3,5,6,10,11,12,13,15,17]].sum()
        #print("Minutes:", action, "Reward: ", mdp.get_reward(curr_state,action,new_state))
        inj_probs.append(curr_state[2])
        #print(curr_state[5])
        st = time.time()
        total_reward += mdp.get_reward(curr_state,action_sim,new_state)
        total_reward_time += (time.time()-st)
        #print("Reward:", mdp.get_reward(curr_state,action,new_state))
        curr_state=new_state
        #print("------------------------------------")
        #print(mdp.is_terminal(curr_state))
    greedy_df = pd.concat([pd.DataFrame(total_injuries),pd.DataFrame(inj_probs)],axis=1)
    greedy_df.to_csv('data/greedy_dfs/'+str(team_id)+'/'+str(team_id)+'_'+str(total_reward)+'.csv',index=False)
    injuries_matrix = (pd.DataFrame(total_injuries).diff().fillna(0) > 0).sum().sum()
    injuries_starting_matrix = (pd.DataFrame(total_injuries_starting).diff().fillna(0) > 0).sum().sum()
    all_injuries.append(injuries_matrix)
    all_injuries_starting.append(injuries_starting_matrix)
    all_rewards.append(total_reward)
    #print(total_reward)
    #print("Injuries: ", injuries_matrix.sum().sum())
    #print("FT Injuries: ", injuries_starting_matrix.sum().sum())
    if j % 50 == 0:
        print("-------------------")
        print("Total Reward: ", np.array(all_rewards).mean())
        print("Injuries: ", np.array(all_injuries).mean(), "LQ: ", np.percentile(np.array(all_injuries),25,method='linear'), "UQ: ", np.percentile(np.array(all_injuries),75,method='linear'))
        print("FT Injuries: ", np.array(all_injuries_starting).mean(), "LQ: ", np.percentile(np.array(all_injuries_starting),25,method='linear'), "UQ: ", np.percentile(np.array(all_injuries_starting),75,method='linear'))
        print("SE: ",  np.array(all_rewards).std()/np.sqrt(len(all_rewards)))

In [None]:
print("Mean: ",np.array(all_rewards).mean())
print("LQ: ",np.percentile(np.array(all_rewards),25))
print("STDEV: ",np.array(all_rewards).std())

In [None]:
import matplotlib.pyplot as plt
plt.scatter(range(1000),np.array(all_rewards)[:1000])
plt.xlabel('Samples')
plt.ylabel('Expected Points')

## 6. Real Life Simulator - Experiment 2

In [None]:
from datetime import datetime
squad_ids = team_squad['player_id'].values[1:-1]
squad_real_minutes = []

game_ids = team_rewards['game_id'].values
for game_id in game_ids:
    game_information = pd.read_csv('data/game_data/'+str(game_id)+'/'+str(game_id)+'_lineup.csv')
    game_squad_minutes= []
    for squad_id in squad_ids:
        if squad_id in game_information['player_id'].values:
            minutes = game_information.loc[game_information['player_id']==squad_id,'minutes_played'].values[0]
        else:
            minutes = 0
        game_squad_minutes.append(minutes)
    squad_real_minutes.append(game_squad_minutes) 

squad_real_minutes_df = pd.DataFrame(squad_real_minutes)
squad_real_minutes_df.columns = team_squad[1:-1].index
squad_real_minutes_df['game_date'] = team_rewards['game_date']

In [None]:
real_life_selections_list = {}
squad_real_minutes_df_played = squad_real_minutes_df.iloc[:,:-1].copy()
squad_real_minutes_df_played = squad_real_minutes_df_played > 55
for i,row in squad_real_minutes_df_played.iterrows():
    real_life_selections_list[i] = [0]+list(squad_real_minutes_df_played.columns[row.values])

In [None]:
player_dfs_RL = {}
player_features_RL = {}
player_inj_dicts_pg = {}
for player,id_val in zip(players,ids):
    try:
        player_dfs_RL[id_val],player_features_RL[id_val],_,_ = load_data.load_player_data_RL(player)
    except:
        player_dfs_RL[id_val],player_features_RL[id_val],_,_ = load_data.load_player_data_RL(players[0])
    if player_dfs_RL[id_val]['num_injuries'].isna().sum() == len(player_dfs_RL[id_val]):
        player_dfs_RL[id_val],player_features_RL[id_val],_,_ = load_data.load_player_data_RL(players[0])
    player_dfs_RL[id_val]=player_dfs_RL[id_val].ffill().bfill()
    player_dfs_RL[id_val] = player_dfs_RL[id_val].merge(team_reward_without_reserves,on='game_date',how='right').sort_values('game_date').reset_index(drop=True)
    player_dfs_RL[id_val] = player_dfs_RL[id_val].loc[-38:,[col for col in player_dfs_RL[id_val].columns if '(' not in col]].rename(columns={'team_id_x':'team_id'})

In [None]:
for i in range(len(player_dfs_RL[1])):
    player_inj_dicts_RL = {}
    for id_val in ids:
        inj_dict = {'num_injuries':player_dfs_RL[id_val].at[i,"num_injuries"],'total_days_out':player_dfs_RL[id_val].at[i,"total_days_out"],'days_out_last_injury':player_dfs_RL[id_val].at[i,"days_out_last_injury"],
                'days_since_last_injury':player_dfs_RL[id_val].at[i,"days_since_last_injury"],'injuries_past_twelve_months':player_dfs_RL[id_val].at[i,"injuries_past_twelve_months"]}
        player_inj_dicts_RL[id_val] = inj_dict
    player_inj_dicts_pg[i] = player_inj_dicts_RL
    
squad_minutes_dict_pg = {}
for j in range(len(player_dfs_RL[1])):
    squad_minutes_dict = {key: {} for key in range(1, 20)}
    for i in ids:
        for date in squad_real_minutes_df['game_date'].values[:j]:
            dt = datetime.strptime(date, "%Y-%m-%d %H:%M:%S").date()
            squad_minutes_dict[i][dt] = squad_real_minutes_df.loc[squad_real_minutes_df['game_date'] == date,i].values[0]
    squad_minutes_dict_pg[j] = squad_minutes_dict
    
injury_df = pd.read_csv('data/injury_data/all_player_injuries_updated.csv')
injury_df['from'] = pd.to_datetime(injury_df['from'])
injury_df['until'] = pd.to_datetime(injury_df['until'])

player_injured_dict = {}
for i,row in squad_real_minutes_df.iterrows():
    curr_player_injured_dict={}
    curr_player_length_dict={}
    for j in range(len(squad_ids)):
        temp_df = injury_df[injury_df['sb_id'] == squad_ids[j]]
        is_injured = ((temp_df['from'] <= pd.to_datetime(row['game_date'])) & (temp_df['until'] >= pd.to_datetime(row['game_date']))).any()
        curr_player_injured_dict[j+1]=is_injured
    player_injured_dict[i] = curr_player_injured_dict
    
player_length_dict = {}
for i,row in squad_real_minutes_df.iterrows():
    curr_player_length_dict={}
    for j in range(len(squad_ids)):
        if player_injured_dict[i][j+1] == True:
            count=1
            while ((i+count) < len(squad_real_minutes_df)):
                if player_injured_dict[i+count][j+1] == True:
                    count+=1
                else:
                    break
            curr_player_length_dict[j+1] = count
        else:
            curr_player_length_dict[j+1] = 0
    player_length_dict[i] = curr_player_length_dict
    
rolling_df = squad_real_minutes_df.loc[:,1:19].ewm(alpha=0.7,adjust=False).mean().shift(1).fillna(0).copy()
squad_rolling_mins_pg = {}
for i in range(len(player_dfs_RL[1])):
    curr_rolling_mins = {}
    for j in range(len(squad_ids)):
        curr_rolling_mins[j+1] = rolling_df.loc[i,j+1]
    squad_rolling_mins_pg[i] = curr_rolling_mins
    
def predict_proba(x,m,v,model):
    trans_data = (x - m) / v ** .5
    pred = daal_predict_algo.compute(trans_data, model).probabilities[:,1]
    return pred

daal_predict_algo = d4p.gbt_classification_prediction(
    nClasses=2,
    resultsToEvaluate="computeClassProbabilities",
    fptype='float'
)

distance_per_mins_RL = {key: value['dist_covered'] for key, value in player_features_RL.items()}
num_dribbles_per_mins_RL = {key: value['num_dribbles'] for key, value in player_features_RL.items()}

injury_probs_pg = {}
for i in range(len(player_dfs_RL[1])):
    injury_features_all={}
    pred_list = []
    if i < 37:
        for j in player_features_RL.keys():
            injury_features_all[j] = uip.add_ingame_features_using_minutes_rbr(j,i+1,squad_rolling_mins_pg[i][j],distance_per_mins_RL[j], num_dribbles_per_mins_RL[j],squad_minutes_dict_pg[i][j],player_inj_dicts_pg[i][j])
            pred_list+=injury_features_all[j].tolist()
        inj_updated = predict_proba(pred_list,scaler_mean,scaler_var,daal_injury_model).round(3)
        injury_prob_all = dict(zip(range(1,len(player_dfs_RL[1])+1),inj_updated))
    else:
        injury_prob_all = dict(zip(range(1,len(player_dfs_RL[1])+1),inj_updated))
    injury_probs_pg[i] = injury_prob_all

In [None]:
all_RL_states = {}
for i in range(len(player_dfs_RL[1])):
    tup = (tuple(range(i,len(player_dfs_RL[1]))),i,injury_probs_pg[i],player_injured_dict[i],player_length_dict[i],squad_rolling_mins_pg[i],squad_minutes_dict_pg[i],player_inj_dicts_pg[i])
    all_RL_states[i] = tup

In [None]:
def greedy_selection_RL(selection_list, player_vaeps, num_selections, injury_dict):
    new_selection_list = []
    for i in range(len(selection_list)):
        if injury_dict[selection_list[i]] == False:
            new_selection_list.append(selection_list[i])
    
    while len(new_selection_list) < num_selections:
        new_selection_list.append(20)
            
    selection_values = player_vaeps[new_selection_list]
    max_indexes = np.argsort(selection_values)[-num_selections:]
    return [new_selection_list[i] for i in sorted(max_indexes)]

positions = [[1,2,3,4],[5,6],[7,8],[9,10,11,12,13],[14,15],[16,17],[18,19]]
counts = [2,1,1,3,1,1,1]
squad_vaep = team_squad['vaep'].values

In [None]:
from collections import defaultdict
player_matches_list = []
player_greedy_list = []
n_games=38
n_players=19
game_dates_RL=pd.to_datetime(squad_real_minutes_df['game_date']).dt.date
mdp = MDP.GameMDP(n_games, n_players, daal_injury_model,game_dates_RL,player_features_RL,scaler_mean,scaler_var,inj_kde,player_inj_dicts)
qfunction = QTable()
child = None
ucb = UpperConfidenceBounds()

for i in range(n_games):
    current_state = all_RL_states[i]
    child = SingleAgentMCTS.SingleAgentNode(
            mdp, None, all_RL_states[i], qfunction, ucb
        )
    
    print("Game: ", i)
    print("Mean win chance: ", team_reward_without_reserves.iloc[i,6:].mean())
    hashable_state = (current_state[1],frozenset((k, v) for d in [current_state[2],current_state[4]] for k, v in d.items()))
    SAM,sel,exp,sim,back = SingleAgentMCTS.SingleAgentMCTS(mdp,qfunction,ucb,0,0).mcts(timeout=180,root_node=child)
    values = []
    backup_values = []
    possible_actions = SAM.mdp.get_actions(current_state)
    max_value = 0
    for j in possible_actions:
        if SAM.visits[(hashable_state,tuple(j))] > 30:
            values.append(qfunction.qtable[(hashable_state,tuple(j))])
            if qfunction.qtable[(hashable_state,tuple(j))] > max_value:
                max_value = qfunction.qtable[(hashable_state,tuple(j))]
        else:
            values.append(0)
        backup_values.append(qfunction.qtable[(hashable_state,tuple(j))])
    if max(values) > 0:
        max_index = values.index(max(values))
        best_action = list(possible_actions[max_index])
    else:
        max_index = backup_values.index(max(backup_values))
        best_action = list(possible_actions[max_index])
        max_value = max(backup_values)
    best_action = [0]+[v+1 for v in best_action]
    action_sim=[0]
    for p,c in zip(positions,counts):
        action_sim += greedy_selection_RL(p,squad_vaep,c,current_state[3])
    len_selection = len(real_life_selections_list[i])
    print("Best action: ", best_action)
    print("Action Greedy: ", action_sim)
    print("Best action value (xPPG): ", max_value)
    print(real_life_selections_list[i])
    num_matched_players = min(len_selection,len(set(best_action).intersection(real_life_selections_list[i])) + len([b for b in best_action if b == 20 ]))
    num_matched_players_greedy = min(len_selection,len(set(action_sim).intersection(real_life_selections_list[i])) + len([b for b in action_sim if b == 20 ]))
    
    print("Sim MCTS: ", num_matched_players/len_selection)
    print("Sim Greedy: ", num_matched_players_greedy/len_selection)
    player_matches_list.append(num_matched_players/len_selection)
    player_greedy_list.append(num_matched_players_greedy/len_selection)
    
    #print("Best action team: ", ['Hugo Lloris']+[spurs_squad.at[v+1,'player_name'] for v in best_action])
    del SAM
    del child
    gc.collect()
    qfunction = QTable()
    ucb = UpperConfidenceBounds()
    Node.visits = defaultdict(lambda: 0)

In [None]:
sum(player_greedy_list)/len(player_matches_list)

In [None]:
pd.DataFrame(player_greedy_list).to_csv('MCTS_RL_Data_2/'+str(team_id)+'.csv',index=False)