In [27]:
#| default_exp model.model_results

In [28]:
#| hide
%load_ext autoreload
%autoreload 2
from IPython.core.debugger import set_trace

# Load in results data from Model
> In this notebook we will load and analyse the model.

In [29]:
#| export

import numpy as np
import pandas as pd
import math

from typing import Callable, List, Optional, Tuple, Dict

In [30]:
#| export

POSITION_GROUPS = {
                "G": "Goalkeeper", 
                "CD": "Center Back", 
                "LFB": "Wing Back",
                "RFB": "Wing Back",
                "LW": "Winger",
                "RW": "Winger",
                "DMF": "Center Midfielder",
                "AMF": "Center Midfielder",
                "CMF": "Center Midfielder",
                "S": "Striker",
                "SS": "Striker"
            }

In [31]:
whole_input = pd.read_csv("/home/user/Downloads/agent_imputter_output/17_pred/whole_input.csv")
train_df = pd.read_csv("/home/user/Downloads/agent_imputter_output/17_pred/train_df.csv")
test_df = pd.read_csv("/home/user/Downloads/agent_imputter_output/17_pred/test_df.csv")

## Util functions

In [32]:
#| export

def merge_dfs(
    whole_input: pd.DataFrame(),
    train_df: pd.DataFrame(),
    test_df: pd.DataFrame(),
) -> Tuple:
    "merge train and test predictions with full features data frame "
    
    # drop unnecessary columns
    whole_input = whole_input.drop(columns="Unnamed: 0")
    train_df = train_df.drop(columns="Unnamed: 0")
    test_df = test_df.drop(columns="Unnamed: 0")
    
    # merge the two dataframes on 'match_id', 'event_id', and 'player_id'
    train_df = train_df.merge(whole_input[['match_id', 'event_id', 'player_id', 'position']], 
                              on=['match_id', 'event_id', 'player_id'], 
                              how='left')
    # merge the two dataframes on 'match_id', 'event_id', and 'player_id'
    test_df = test_df.merge(whole_input[['match_id', 'event_id', 'player_id', 'position']], 
                              on=['match_id', 'event_id', 'player_id'], 
                              how='left')
    
    return train_df, test_df

In [33]:
train_df, test_df = merge_dfs(whole_input, train_df, test_df)

## Group player py postions

In [34]:
#| export

def group_positions(
    train_df: pd.DataFrame(),
    test_df: pd.DataFrame(),
    position_groups: Dict = POSITION_GROUPS,
)-> Tuple:
    "Group position player ."
    # Create a new column in the DataFrame using the map function and the position_groups dictionary
    train_df["group"] = train_df["position"].map(position_groups)
    test_df["group"] = test_df["position"].map(position_groups)
    
    return train_df, test_df

In [35]:
train_df, test_df = group_positions(train_df, test_df)

## Compute error

In [37]:
def compute_error(
    df: pd.DataFrame(),
    dataset: str = "train"
)-> None:
    "Compute and display error"
    
    df["x_error"] = np.abs(df["pred_x"] - df["act_x"])
    df["y_error"] = np.abs(df["pred_y"] - df["act_y"])
    df["xy_error"] = np.sqrt(df["x_error"] ** 2 + df["y_error"] ** 2)

    # Compute the mean absolute error
    mae_x = np.mean(np.abs(df["pred_x"] - df["act_x"]))
    mae_y = np.mean(np.abs(df["pred_y"] - df["act_y"]))
    mae_xy = np.mean(np.sqrt((df["pred_x"] - df["act_x"]) ** 2 + (df["pred_y"] - df["act_y"]) ** 2))

    print(dataset)
    print("-------------")
    print("MAE X:", mae_x)
    print("MAE Y:", mae_y)
    print("MAE XY:", mae_xy)


In [38]:
compute_error(train_df,"train")

train
-------------
MAE X: 19.939218988471257
MAE Y: 11.332364427134776
MAE XY: 24.969440932312853


In [39]:
compute_error(test_df,"test")

test
-------------
MAE X: 19.39706549647137
MAE Y: 11.03497309733208
MAE XY: 24.414308316013024


In [None]:
train_df[["x_error", "y_error", "xy_error", "group"]].groupby("group").mean().sort_values(by='xy_error')

In [None]:
test_df[["x_error", "y_error", "xy_error", "group"]].groupby("group").mean().sort_values(by='xy_error')

In [None]:
#| hide
from nbdev import nbdev_export

nbdev_export()