# Experiment Plots - Section 7

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from matplotlib import rcParams
import os
os.chdir('..')
default_color = '#4477AA'
# figure size in inches
#rcParams['figure.figsize'] = 6.2,3.17

## Paper Experiment 1 - Injury Model

In [None]:
injuries_df = pd.read_csv('data/predictions/injury_predictions_XGB.csv')
injuries_df['date'] = pd.to_datetime(injuries_df['date'],format='%Y-%m-%d %H:%M:%S')
teams = pd.read_csv('data/overview_data/teams.csv')
team_ids = sorted(teams['team_id'].values)

injureds=[]
injured_probs=[]
for team_id in team_ids:
    print(team_id)
    if team_id in [30,26,27,46,55,58]:
        team_injuries = injuries_df[injuries_df['team_id']==team_id]
        injureds.append(team_injuries['injured'].sum())
        injured_probs.append(team_injuries['injury_prob'].sum())
    else:
        team_injuries = injuries_df[injuries_df['team_id']==team_id]
        injureds.append(team_injuries['injured'].sum()/2)
        injured_probs.append(team_injuries['injury_prob'].sum()/2)

In [None]:
plt.style.reload_library()
plt.style.use(['science','no-latex'])
rcParams['figure.figsize'] = 6.6,3.17
plt.gcf()
with plt.style.context(['science','no-latex','bright']):
    rplot = sns.regplot(x=injureds, y=injured_probs,scatter_kws={'s':50})
    team_names = ['Wolves','Burnley','Leicester','Brighton','West Brom','Fulham','Newcastle','Chelsea','Huddersfield','Stoke','Swansea','Watford','Crystal Palace','Bournemouth','Man City','Liverpool','Everton','Cardiff','Southampton','Tottenham','Man United','West Ham','Arsenal']
    count=0
    for (i,j) in zip(injureds,injured_probs):
         if (team_names[count] == 'Huddersfield') or (team_names[count] == 'Tottenham'):   
            plt.text(i-1.2, j+0.25, team_names[count],size=12)
        elif (team_names[count] == "Swansea") or (team_names[count] == "Liverpool") or (team_names[count] == "Southampton") or (team_names[count] == "Burnley"):
            plt.text(i-0.1, j-0.7, team_names[count],size=12)
        else:
            plt.text(i-0.1, j+0.25, team_names[count],size=12)
        count+=1
    rplot.set_xlabel('Actual number of injuries per Season',size=15)
    rplot.set_ylabel('Expected injuries per season',size=15)
    rplot.set_xticks([10,15,20,25,30],size=15)
    rplot.set_yticks([10,15,20,25,30],size=15)
    rplot.set_yticklabels(rplot.get_yticks(), size = 15)
    rplot.set_xticklabels(rplot.get_xticks(), size = 15)
    rplot.set_ylim(10,31)
    rplot.set_xlim(10,31)
    rplot.text(11, 29, 'r={:.2f}, p={:.2g}'.format(0.76957, 0.0000727),size=15)
    plt.show()
    fig = rplot.get_figure()
    fig.savefig("injuries_correlation.png",dpi=800,bbox_inches='tight') 

In [None]:
slope, intercept, r, p, sterr = scipy.stats.linregress(x=injureds,
                                                       y=injured_probs)
p

## Paper Experiment 2 - MCTS and Greedy similarity to real selections
MCTS similarities and Greedy similarities were logged in a spreadsheet following runs on compute cluster

In [None]:
plt.gcf()
fig, ax = plt.subplots(figsize=(5,2))
similarity_data_MCTS = [77.8,81.0,71.8,81.8,72.7,74.1,71.4,78.3,86.1,70.1,76.4,80.0,78.7,70.8,81.4,74.0,76.3,81.6,74.7,71.3]
similarity_data_Greedy = [75.5,89.8,69.8,90.8,74.1,79.3,75.1,80.5,90.8,71.4,79.5,82.1,83.5,81.8,88.1,77.9,88.3,85.3,84.0,75.5]
boxplot_data=[similarity_data_MCTS,similarity_data_Greedy]
boxplot = ax.boxplot(boxplot_data,widths=0.5)
boxplot['medians'][0].set(color='blue',linewidth=1.5)
boxplot['medians'][1].set(color='blue',linewidth=1.5)
for component in boxplot.keys():
    if component != 'medians':
        plt.setp(boxplot[component], linewidth=1.5)
ax.set_xticks([1,2],labels=['MCTS','Greedy'])
ax.set_ylabel('Similarity (%)')
ax.set_ylim(60,100)
plt.show()
fig.savefig('similarity_boxplot.png',dpi=800,bbox_inches='tight')

## Paper Experiment 3 & 4 - MCTS Results
MCTS and Greedy Results logged in a spreadsheet following runs on a compute cluster. The table for experiment 3 was created in word using values from this spreadsheet.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
plt.style.reload_library()
plt.style.use(['science','no-latex'])
with plt.style.context(['science','no-latex','bright']):
    fig = plt.figure(figsize=(7.5,5.5))

    plt.subplot(2, 1, 1)

    N = 2
    ind = np.arange(N) 
    width = 0.25

    xvals = [-0.5, 1.8]
    bar1 = plt.bar(ind-width, xvals, width, yerr=[1.3,1.6], capsize=14.5, color = 'grey')

    yvals = [1.3, 2.8]
    bar2 = plt.bar(ind, yvals, width, yerr=[0.7,1.0], capsize=14.5, color='blue')

    zvals = [1.8, 2.7]
    bar3 = plt.bar(ind+width, zvals, width, yerr=[0.6,0.7], capsize=14.5, color = 'orange')

    plt.ylabel('% Increase vs Greedy',size=14.8)
    plt.axhline(y=0, color='k', linestyle='--',alpha=0.2)
    plt.xticks(ind,['Mean Performance', 'Lower Quartile Performance'], size=14.5)
    plt.yticks([-2,0,2,4],size=14.5)
    plt.ylim(-2,5)
    plt.legend( (bar1, bar2, bar3), ('1x', '2x', '3x'), fontsize="14.5" )

    N = 2
    ind = np.arange(N) 
    width = 0.25

    plt.subplot(2, 1, 2)
    xvals = [4.6, 13.1]
    bar1 = plt.bar(ind-width, xvals, width, yerr=[1.3,1.3], capsize=14.5, color = 'grey')

    yvals = [1.3, 11.4]
    bar2 = plt.bar(ind, yvals, width, yerr=[0.9,1.0], capsize=14.5, color='blue')

    zvals = [-0.2, 8.6]
    bar3 = plt.bar(ind+width, zvals, width, yerr=[0.6,1.2], capsize=14.5, color = 'orange')

    plt.ylabel('% Decrease vs Greedy',size=14.8)
    plt.axhline(y=0, color='k', linestyle='--',alpha=0.2)
    plt.xticks(ind,['Mean Squad Injuries','Mean Optimal Team Injuries'], size=14.5) 
    plt.yticks([0,5,10,15],size=14.5)
    plt.ylim(-2,15)
    plt.axhline(y=0, color='k', linestyle='--',alpha=0.2)
    #plt.legend( (bar1, bar2, bar3), ('1x', '2x','3x') )
    plt.show()
    fig.savefig('Robustness.png',dpi=800)

## Paper Experiment 5 - Wage Data
Wage results data is logged in a spreadsheet after running the code in the MCTSResultsAnalysis notebook.

In [None]:
import numpy as np
team_wages_MCTS = np.array([12.82, 3.06, 2.36, 2.72, 1.91,12.09, 4.37, 5.73, 3.33, 2.02,4.75, 9.53, 14.40, 13.06, 2.62,3.83, 7.19, 3.05, 6.32, 2.61])
team_wages_Greedy = np.array([14.49, 3.43, 3.01, 3.03, 2.17,13.07, 4.99, 6.72, 3.69, 2.33,5.50, 9.90, 16.20, 14.94, 2.88,4.04, 7.90, 3.34, 6.67, 3.15])  # Second set of bar values
perc_decrease = ((team_wages_Greedy-team_wages_MCTS)/team_wages_Greedy)*100
teams = np.array(['Arsenal', 'Bournemouth', 'Brighton', 'Burnley', 'Cardiff','Chelsea', 'Crystal Palace', 'Everton', 'Fulham', 'Huddersfield','Leicester', 'Liverpool', 'Man City', 'Man United', 'Newcastle','Southampton', 'Tottenham', 'Watford', 'West Ham', 'Wolves'])
indexes = sorted(range(len(perc_decrease)), key=lambda x: perc_decrease[x])[::-1]
errors = np.array([2.8,3.4,4.1,2.8,3.4,3.1,4.0,3.4,3.5,3.1,3.3,3.0,3.2,3.3,3.1,3.2,3.2,2.9,2.8,4.4])
perc_decrease_sorted = perc_decrease[indexes]
teams_sorted = teams[indexes]
errors_sorted = errors[indexes]

plt.style.reload_library()
plt.style.use(['science','no-latex'])
rcParams['figure.figsize'] = 16.2,8.34
plt.gcf()
with plt.style.context(['science','no-latex','bright']):
    x = np.arange(20)  # Five x-axis values
    #bar1_data = [12.82, 3.06, 2.36, 2.72, 1.91,12.09, 4.37, 5.73, 3.33, 2.02,4.75, 9.53, 14.40, 13.06, 2.62,3.83, 7.19, 3.05, 6.32, 2.61]  # First set of bar values
    #bar1_errors = [0.70, 0.19, 0.14, 0.16, 0.12,0.51,0.32,0.31,0.21,0.12,0.3,0.6,0.89,0.62,0.18,0.23,0.31,0.14,0.3,0.2]  # Error values for bar 1
    #bar2_errors = [0.20, 0.06, 0.05, 0.05, 0.04,0.2,0.1,0.11,0.07,0.04,0.1,0.16,0.23,0.23,0.05,0.08,0.13,0.06,0.1,0.06]  # Error values for bar 2
    # Width of each bar
    bar_width = 0.6

    # Plotting the bars
    fig, ax = plt.subplots(figsize=(18, 6.5))
    bars1 = ax.bar(x, perc_decrease_sorted, width=bar_width, yerr=errors_sorted, capsize=5, label='MCTS')
   #bars2 = ax.bar(x + bar_width, bar2_data, width=bar_width, yerr=bar2_errors, capsize=5, label='Greedy')

    # Adding labels, title, and legend
    ax.set_xlabel('Team',size=16)
    ax.set_ylabel('Percentage Decrease in Wage Cost with Injuries',size=16)
    ax.set_xticks(x+0.2)
    ax.set_xticklabels(teams_sorted,rotation=60,size=16,ha='right')  # Replace with your x-axis labels
    ax.set_yticks(range(0,28,2))
    ax.set_yticklabels(range(0,28,2),size=16)
    #ax.legend(fontsize=14)

# Show plot
plt.show()
fig.savefig('wages_lost.png',dpi=800)

## Paper Experiment 6 - Player Welfare study

In [None]:
kante_RL_inj_probs = pd.read_csv('kante_inj_probs_RL.csv').iloc[:,1] #Injury probs run in MDP notebook and saved to CSV
kante_simmed_inj_probs = pd.read_csv('data/MCTSResults/33/33_75.683.csv')['12'] # A single season simulation for Chelsea using MCTS.
kante_RL_inj_probs_RA = kante_RL_inj_probs.rolling(window=5,min_periods=0).mean()*100
kante_simmed_inj_probs_RA = kante_simmed_inj_probs.rolling(window=5,min_periods=0).mean()*100
kante_RL_inj_probs_RA.index=range(1,39)
kante_simmed_inj_probs_RA.index=range(1,39)
rests_simmed = [3,6,16,19,22,23] # Can see when they were rested in the simulation file
rests_RL = [32,38] #Real-life rests in 18/19 season

plt.style.reload_library()
plt.style.use(['science','no-latex'])
with plt.style.context(['science','no-latex','bright']):
    fig = plt.figure(figsize=(15, 5.5)) 
    plt.plot(kante_RL_inj_probs_RA,label='Real World',linewidth=2)
    plt.plot(kante_simmed_inj_probs_RA,label='MCTS',linewidth=2)
    plt.scatter([r for r in rests_simmed],kante_simmed_inj_probs_RA[[i for i in rests_simmed]],color='palevioletred',alpha=1,s=50)
    plt.scatter([r for r in rests_RL],kante_RL_inj_probs_RA[[i for i in rests_RL]],color=default_color,alpha=1,s=50)
    plt.xlabel('Gameweek',size = 15)
    plt.ylabel('Rolling Average Injury Probability (%)',size = 15)
    plt.title("N'Golo Kanté 2018/19",size = 15)
    plt.legend(fontsize=15)
    plt.xticks(range(1,39),size = 12)
    plt.yticks([3,3.5,4,4.5,5],size = 12)
    plt.show()
    fig.savefig('PlayerWelfare.png',dpi=800)

# Other experiments

## Shap values
Shap values stored from using SHAP on the injury model

In [None]:
shap_df = pd.read_csv('shap_df.csv')
shap_df['col_name'] = ['Acute workload','Number of past injuries','Career total days injured','Distance covered','Total dribbles (r.a)','Number of modal injury occurences','Chronic workload','Duration of longest injury (days)','Days since last injury','Opposing team','Injuries past twelve months','Age','Game day temperature (C)','Most recent injury length (days)',"Player's current team",'Opposition tackles (r.a)','Opposition fouls (r.a)','Travel distance to game (km)','Game day precipitation (mm)','Career total days injured with most common injury']
plt.style.reload_library()
plt.style.use(['science','no-latex'])
rcParams['figure.figsize'] = 6.2,8.34
plt.gcf()
with plt.style.context(['science','no-latex','bright']):
    fig, ax = plt.subplots(figsize=(6.2,8.34))
    plt.barh(shap_df['col_name'][::-1],shap_df['feature_importance_vals'][::-1])
    plt.xlabel('Mean |SHAP| (average impact on model output)',size=15)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    fig.savefig('SHAP_values.png',dpi=600)

## Rested Player Example

In [None]:
plt.style.reload_library()
plt.style.use(['science','no-latex'])
rcParams['figure.figsize'] = 13.2,8.34
plt.gcf()
x=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
xlabels = ['GW1 (1.7)','GW2 (1.7)','GW3 (0.6)','GW4 (0.3)','GW5 (1.3)','GW6 (2.4)','GW7 (2.7)','GW8 (2.3)','GW9 (1.8)','GW10 (1.3)','GW11 (2.7)','GW12 (2.2)','GW13 (1.7)','GW14 (0.6)','GW15 (2.4)']
y=[4,4.3,4.9,5.8,4.8,4.6,5.0,5.2,6.0,4.9,4.7,5.5,6.5,6.8,5.1]#,3.6,4.4,4.8],1,2,1,2,3,1,2,1,2,3]
fig, ax = plt.subplots(figsize=(14, 5))
with plt.style.context(['science','no-latex','bright']):
    plt.plot(x,y,zorder=1,linewidth=2.5) 
    plt.scatter(x,y,zorder=2,s=50)
    plt.xticks(x, xlabels, rotation=270,size=15)
    plt.yticks([4,5,6,7], [4,5,6,7], size=15)
    plt.xlabel('Gameweek',size = 15)
    plt.ylabel('Injury probability (%)',size = 15)
    plt.axvline(x = 4, color = 'palevioletred',linestyle="dashed",lw=1.5)
    plt.axvline(x = 9, color = 'palevioletred',linestyle="dashed",lw=1.5)
    plt.axvline(x = 14, color = 'palevioletred',linestyle="dashed",lw=1.5)
    plt.text(4.1,4,"Rested",size=15)
    plt.text(9.1,4,"Rested",size=15)
    plt.text(14.1,4,"Rested",size=15)
    for t in ax.get_xticklabels():
        splt1 = str(t).split("'", 2)[1]
        num = float(splt1.replace(")","(").split("(",2)[1])
        if (num > 2):
            t.set_color('Red')
        elif (num > 1):
            t.set_color('Orange')
        else:
            t.set_color('Green')
    xlabels = ['GW1','GW2','GW3','GW4','GW5','GW6','GW7','GW8','GW9','GW10','GW11','GW12','GW13','GW14','GW15']
    plt.xticks(x, x, rotation=0, size=15)
    legend_elements = [
    plt.Line2D([0], [0], marker='o',color='w', markerfacecolor='green',lw=2, label='Least Important'),
    plt.Line2D([0], [0], marker='o',color='w', lw=2, markerfacecolor='orange',label='Medium Importance'),
     plt.Line2D([0], [0], marker='o',color='w', lw=2, markerfacecolor='red',label='Most Important'),
    ]

    # Displaying the custom legend underneath the chart
    plt.legend(handles=legend_elements, loc='upper center', bbox_to_anchor=(0.5, -0.1), shadow=True, ncol=3,fontsize=15)

    fig.savefig("injuries_rest.png",dpi=800,bbox_inches='tight')
    plt.show()