In [105]:
df_predictions = df

# Calculate absolute error
df_predictions['absolute_error'] = abs(df_predictions['predicted_points'] - df_predictions['actual_points'])
df_predictions = df_predictions.drop(columns={'predicted_points','actual_points'}).reset_index(drop=True)

# Groupby at two levels to get: a) Predictions for all positions by gameweek (i.e. overall level)
df_predictions_overall = df_predictions.drop(columns='position').groupby(['dataset','gameweek']).agg(['mean']).reset_index()
df_predictions_overall.insert(2, 'position', 'overall')

# b) Predictions by gameweek and by position
df_predictions_by_position = df_predictions.groupby(['dataset','gameweek','position']).agg(['mean']).reset_index()

# Concatenate to get full grouped predictions
df_predictions_grouped = pd.concat([df_predictions_overall, df_predictions_by_position])
df_predictions_grouped.columns = df_predictions_grouped.columns.droplevel(1)
df_predictions_grouped = df_predictions_grouped.rename(columns={'absolute_error':'mean_absolute_error'})

df_predictions_grouped.to_csv('df_predictions_grouped.csv')

In [106]:
df_predictions_grouped

Unnamed: 0,dataset,gameweek,position,event,mean_absolute_error
0,test,3,overall,5.129147,2.860811
1,test,4,overall,6.085202,2.198158
2,test,5,overall,7.041237,1.987693
3,test,6,overall,8.012313,1.922248
4,test,7,overall,8.958198,1.916588
...,...,...,...,...,...
283,training,37,midfielder,19.763992,1.613621
284,training,38,defender,20.219419,2.023379
285,training,38,forward,20.806944,1.946664
286,training,38,goalkeeper,20.689947,2.153495


# parameter tuning

In [107]:
import numpy as np
np.logspace(-10, 5, 100)

# perform 10 cross fold validation for 100 different alpha values...
# picks out best alpha value

array([1.00000000e-10, 1.41747416e-10, 2.00923300e-10, 2.84803587e-10,
       4.03701726e-10, 5.72236766e-10, 8.11130831e-10, 1.14975700e-09,
       1.62975083e-09, 2.31012970e-09, 3.27454916e-09, 4.64158883e-09,
       6.57933225e-09, 9.32603347e-09, 1.32194115e-08, 1.87381742e-08,
       2.65608778e-08, 3.76493581e-08, 5.33669923e-08, 7.56463328e-08,
       1.07226722e-07, 1.51991108e-07, 2.15443469e-07, 3.05385551e-07,
       4.32876128e-07, 6.13590727e-07, 8.69749003e-07, 1.23284674e-06,
       1.74752840e-06, 2.47707636e-06, 3.51119173e-06, 4.97702356e-06,
       7.05480231e-06, 1.00000000e-05, 1.41747416e-05, 2.00923300e-05,
       2.84803587e-05, 4.03701726e-05, 5.72236766e-05, 8.11130831e-05,
       1.14975700e-04, 1.62975083e-04, 2.31012970e-04, 3.27454916e-04,
       4.64158883e-04, 6.57933225e-04, 9.32603347e-04, 1.32194115e-03,
       1.87381742e-03, 2.65608778e-03, 3.76493581e-03, 5.33669923e-03,
       7.56463328e-03, 1.07226722e-02, 1.51991108e-02, 2.15443469e-02,
      

# old interpreting xgboost v2 doc

## Interpreting XGBoost V2

Undoubtedly, XGBv2 was the best performing model out of the models tested so far, and so for that reason XGBv2 will be the model which we deploy. Before deploying XGBv2, however, I want to spend some time **understanding what features are generating the model's predictions**. 

Also, if possible, I'm hoping we'll be able to see what's causing the sharp error spikes mentioned above. However, if do not get a definitive answer to this question, it should be noted that I will proceed and still deploy the final model. The reason for this is because I soon go back to University. And at this moment, therefore, I believe it would be best to focus on deploying "something". I'm aware this is not best practice; however, I want to learn more about how we actually deploy ML algorithms in the real-world - as well as researching them, which is what we have done in this notebook. 

With all that said, let's define some new functions ```get_preds_xgboostv2()``` and ```simulate_xgboostv2()``` to interpret XGBv2*. 

*[*These functions are replicates of what we had earlier, however, they now collect the features/settings used to generate the predictions each gameweek.]*


In [None]:
def get_preds_xgboostv2(gameweek):

    """Returns the predictions made for a given simulated gameweek 
    for XGBv2.

    :param: int64 gameweek: The first gameweek in the test range. 
            E.g. if we're at the start of gameweek 5, we make 
            predictions for gameweeks 5,6,7,8 and 9. 

    :rtype: DataFrame train: Training data predictions obtained via.
            10 Fold Cross Validation.
            DataFrame test: Training data predictions obtained via.
            10 Fold Cross Validation.
            DataFrame df_gw_param_used: Parameters used to generate the 
            predictions for the GW.
            DataFrame df_gw_feat_importance: Importance of each of the
            variables(/features) used for the GW. 
    """    

    # Initialise gameweek ranges
    prev_gw = gameweek-1
    all_gameweeks = list(range(0,prev_gw+6))
    train_gameweeks = list(range(0,prev_gw+1))
    test_gameweeks = list(range(prev_gw+1,prev_gw+6))

    # Get all gameweeks in both sets of ranges
    df_all_gameweeks = df[(df.index.get_level_values('event').isin(all_gameweeks))]

    # Rename target variable
    df_all_gameweeks = df_all_gameweeks.rename(columns={'total_points':'total_points_actual'})

    # Standardise the independent variables
    df_all_gameweeks.loc[:, df_all_gameweeks.columns != 'total_points_actual'] = StandardScaler().fit_transform(
                                            df_all_gameweeks.loc[:, df_all_gameweeks.columns != 'total_points_actual'])

    # Train-Test split the data
    train = df_all_gameweeks[(df_all_gameweeks.index.get_level_values('event').isin(train_gameweeks))]
    test = df_all_gameweeks[(df_all_gameweeks.index.get_level_values('event').isin(test_gameweeks))]

    # Define independent and dependent variables
    X_train = train.loc[:, train.columns != 'total_points_actual']
    y_train_actual = train['total_points_actual']     
    X_test = test.loc[:, train.columns != 'total_points_actual']
    y_test_actual = test['total_points_actual'] 

    # Initialise XGBoost
    xbgr = xgb.XGBRegressor()

    # Setup search heuristic using parameter grid from earlier
    sh = HalvingGridSearchCV(xbgr, param_grid, cv = 5, factor = 5, 
                    min_resources ='exhaust', n_jobs = -1, verbose = 2, random_state = 42).fit(X_train, y_train_actual)

    # Fit XGBoost best estimator to training data 
    model = sh.best_estimator_.fit(X_train, y_train_actual) 

    # Define parameters used / and their importance
    param_used = sh.best_params_
    feat_importance = model.feature_importances_

    # Define DataFrames to be outputted
    df_gw_feat_importance = pd.DataFrame({'gameweek':gameweek, 'feature':X_train.columns, 'importance':feat_importance})
    df_gw_param_used = pd.DataFrame({'gameweek':gameweek, 'value':param_used}).reset_index().rename(columns={'index':'parameter'})[[
                                                                                            'gameweek', 'parameter', 'value']]
    # Make predictions    
    y_train_pred = cross_val_predict(model, X_train, y_train_actual, cv=10)
    y_test_pred = model.predict(X_test)

    # Create prediction column for train/test DataFrames
    train['total_points_predicted'] = y_train_pred
    test['total_points_predicted'] = y_test_pred

    return train, test, df_gw_param_used, df_gw_feat_importance


def simulate_xgboostv2():
    
    """Returns the predictions made for every gameweek and every subdataset
    for XGBv2 - as well as: the parameters used each gameweek and the 
    importance of the features.

    :rtype: DataFrame df_predictions_grouped: Stores the predictions made for 
            every gameweek and every subdataset/submodel.
            DataFrame df_predictions_grouped: Stores the predictions made for 
            every gameweek and every subdataset/submodel.
    
    """   
    
    gw_predictions_array = []
    gw_param_used_array = [] 
    gw_feat_importance_array = [] 

    # For every gameweek in the season
    for gw in tqdm(list(range(3,39))):
        
        # Simulate predictions for the gameweek 
        train, test, df_gw_param_used, df_gw_feat_importance = get_preds_xgboostv2(gw) 

        # Manipulate training predictions
        df_train_predictions = train.reset_index()
        df_train_predictions['dataset'] = 'training'
        df_train_predictions['gameweek'] = gw
        df_train_predictions = df_train_predictions.rename(columns = {'total_points_predicted':'predicted_points',
                                                                      'total_points_actual':'actual_points'})
        df_train_predictions = df_train_predictions[['dataset','gameweek','position','predicted_points', 'actual_points']]
        
        # Manipulate test predictions
        df_test_predictions = test.reset_index()
        df_test_predictions['dataset'] = 'test'
        df_test_predictions['gameweek'] = gw
        df_test_predictions = df_test_predictions.rename(columns = {'total_points_predicted':'predicted_points',
                                                                    'total_points_actual':'actual_points'})
        df_test_predictions = df_test_predictions[['dataset','gameweek','position','predicted_points', 'actual_points']]

        # Concatenate & append
        df_gw_predictions = pd.concat([df_train_predictions, df_test_predictions])
        gw_predictions_array.append(df_gw_predictions)
        gw_param_used_array.append(df_gw_param_used)
        gw_feat_importance_array.append(df_gw_feat_importance)

    # Concatenate and append
    df_predictions = pd.concat(gw_predictions_array)
    df_param_used = pd.concat(gw_param_used_array)
    df_feat_importance = pd.concat(gw_feat_importance_array)
    
    # Calculate absolute error
    df_predictions['absolute_error'] = abs(df_predictions['predicted_points'] - df_predictions['actual_points'])
    df_predictions = df_predictions.drop(columns={'predicted_points','actual_points'}).reset_index(drop=True)

    # Groupby at two levels to get: a) Predictions for all positions by gameweek (i.e. overall level)
    df_predictions_overall = df_predictions.drop(columns='position').groupby(['dataset','gameweek']).agg(['mean']).reset_index()
    df_predictions_overall.insert(2, 'position', 'overall')

    # b) Predictions by gameweek and by position
    df_predictions_by_position = df_predictions.groupby(['dataset','gameweek','position']).agg(['mean']).reset_index()

    # Concatenate to get full grouped predictions
    df_predictions_grouped = pd.concat([df_predictions_overall, df_predictions_by_position])
    df_predictions_grouped.columns = df_predictions_grouped.columns.droplevel(1)
    df_predictions_grouped = df_predictions_grouped.rename(columns={'absolute_error':'xgboost_v2_mae'})
    
    return df_predictions_grouped, df_param_used, df_feat_importance

In [None]:
# Run XGBoost V2 simulation again, however, this time we return the params used/feature importance for each GW
df_predictions_grouped, df_param_used, df_feat_importance = simulate_xgboostv2()

### shap

### GW11 and GW15 spikes

To start with, I'd like to look at the first spikes which occured during GWs 11 and 15. And we will use **SHAP values** in particular as our feature attribution technique of choice. These seemed to be a good place to start based on my research (See [Interpretable Machine Learning with XGBoost](https://towardsdatascience.com/interpretable-machine-learning-with-xgboost-9ec80d148d27) and [Explain Your Model with the SHAP Values](https://towardsdatascience.com/explain-your-model-with-the-shap-values-bc36aac4de3d) for more).

In particular, let's look at the SHAP values in GWs 9-17 (from the XGBv2 model). This will allow us to examine the feature importances before, during and after both spikes. Any changes throughout this period might point us toward what's going on. 

In [None]:
# Initialise shap ranks array
shap_importance_array = []

# For every GW in the period we're investigating
for gameweek in range(9,18):

    # Initialise gameweek ranges
    prev_gw = gameweek-1
    all_gameweeks = list(range(0,prev_gw+6))
    train_gameweeks = list(range(0,prev_gw+1))
    test_gameweeks = list(range(prev_gw+1,prev_gw+6))

    # Get all gameweeks in both sets of ranges
    df_all_gameweeks = df[(df.index.get_level_values('event').isin(all_gameweeks))]

    # Rename target variable
    df_all_gameweeks = df_all_gameweeks.rename(columns={'total_points':'total_points_actual'})

    # Standardise the independent variables
    df_all_gameweeks.loc[:, df_all_gameweeks.columns != 'total_points_actual'] = StandardScaler().fit_transform(
                                            df_all_gameweeks.loc[:, df_all_gameweeks.columns != 'total_points_actual'])

    # Train-Test split the data
    train = df_all_gameweeks[(df_all_gameweeks.index.get_level_values('event').isin(train_gameweeks))]
    test = df_all_gameweeks[(df_all_gameweeks.index.get_level_values('event').isin(test_gameweeks))]

    # Define independent and dependent variables
    X_train = train.loc[:, train.columns != 'total_points_actual']
    y_train_actual = train['total_points_actual']     
    X_test = test.loc[:, train.columns != 'total_points_actual']
    y_test_actual = test['total_points_actual'] 

    # Initialise XGBoost
    xbgr = xgb.XGBRegressor()

    # Setup search heuristic using parameter grid from earlier
    sh = HalvingGridSearchCV(xbgr, param_grid, cv = 5, factor = 5, 
                    min_resources ='exhaust', n_jobs = -1, verbose = 2, random_state = 42).fit(X_train, y_train_actual);
    
    # Fit XGBoost best estimator to training data 
    model = sh.best_estimator_.fit(X_train, y_train_actual) 

    # Calculate SHAP values
    shap_values = shap.TreeExplainer(model).shap_values(X_test)
    
    # SHAP values DataFrame 
    df_shap_values = pd.DataFrame(shap_values, columns = X_test.columns)

    # 'Sum of SHAP value magnitudes' over all samples to get feature importance 
    df_shap_importance = pd.DataFrame(abs(df_shap_values).sum().sort_values(ascending=False), 
                                                  columns=['importance_gw'+str(gameweek)]).reset_index()
    # Append to array outside loop
    shap_importance_array.append(df_shap_importance)

In [None]:
# Merge SHAP value importances for all the GWs
for i in range(0,9):
    if i in [0,1]:
        df_shap_importance_by_gw = pd.merge(shap_importance_array[0], shap_importance_array[1], on =['index'])
    else:
        df_shap_importance_by_gw = pd.merge(df_shap_importance_by_gw, shap_importance_array[i], on =['index'])

# Calculate relative feature importances
df_rel_importance = pd.DataFrame(df_shap_importance_by_gw['index'])
for col in df_shap_importance_by_gw.columns[1:]:
    df_rel_importance['rel_'+col] = df_shap_importance_by_gw[col]/df_shap_importance_by_gw[col].sum()*100

# Split DataFrame into 'spike' and 'nonspike' GWs / Then take averages
df_spike_gws = df_rel_importance[['index', 'rel_importance_gw11', 'rel_importance_gw15']]
df_nonspike_gws = df_rel_importance[df_rel_importance.columns.drop(['rel_importance_gw11', 'rel_importance_gw15'])]
df_nonspike_gws['mean_rel_importance_nonspike_gws'] = df_nonspike_gws.mean(axis=1)

# Compare SHAP values across both sets of GWs
df_compare = pd.merge(df_spike_gws, df_nonspike_gws[['index', 'mean_rel_importance_nonspike_gws']], on = 'index', how='left')
df_compare.head(4)

In [None]:
# # Calculate SHAP ranks
df_shap_ranks = df_compare
for col in df_shap_ranks.columns[1:]:
    df_shap_ranks['rank_'+col] = df_shap_ranks[col].rank(ascending=False)
df_shap_ranks.head(4)