In [53]:
from socceraction.data.wyscout import PublicWyscoutLoader
from socceraction.spadl.wyscout import convert_to_actions
from socceraction.data.opta import OptaLoader
from socceraction.data.statsbomb import StatsBombLoader
from socceraction.spadl.config import actiontypes, bodyparts
import socceraction.spadl as spadl
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, mean_squared_error
from xgboost import XGBClassifier, XGBRegressor
import math
import pickle
from mplsoccer import Pitch
import hashlib
import copy
import random

In [54]:
# DATA STRUCTURE FOR PATH TO FINAL TIMESTAMP
class Skill_Player_Values:
    def __init__(self, player_id):
        self.player_id = player_id
        self.attribute_values = {
            "height_cm" : 0,
            "weight_kgs" : 0,
            "age" : 0
        }
        self.skill_values = {
            "acceleration" : 0, "aggression" : 0, 
            "agility" : 0, "balance" : 0, 
            "ball_control" : 0,"composure" : 0, 
            "crossing" : 0, "curve" : 0, 
            "dribbling" : 0, "finishing" : 0,
            "freekick_accuracy" : 0, "heading_accuracy" : 0, 
            "interceptions" : 0, "jumping" : 0, 
            "long_passing" : 0, "long_shots" : 0, 
            "marking" : 0, "penalties" : 0, 
            "positioning" : 0, "reactions" : 0,
            "shot_power" : 0, "sliding_tackle" : 0, 
            "sprint_speed" : 0, "stamina" : 0, 
            "short_passing" : 0, "standing_tackle" : 0, 
            "strength" : 0, "vision" : 0, 
            "volleys" : 0
        }

    def get_player_id(self):
        return self.player_id
    
    def set_player_id(self, player_id):
        self.player_id = player_id

    def get_skill_value_by_key(self, key):
        return self.skill_values[key]
    
    def set_skill_value_by_key(self, key, score):
        self.skill_values[key] = score

    def get_attribute_value_by_key(self, key):
        return self.attribute_values[key]
    
    def set_attribute_value_by_key(self, key, score):
        self.attribute_values[key] = score

class Node_Decision_Making:
    def __init__(self, timestamp, player_id, coordinate_x, coordinate_y, is_home_team, include_skill):
        self.timestamp = timestamp
        self.player_id = player_id
        self.coordinate = (coordinate_x, coordinate_y)
        self.xgoal_value = None
        self.is_home_team = is_home_team
        if (include_skill):
            self.skill_and_attributes_values = Skill_Player_Values(player_id)
        else:
            self.skill_and_attributes_values = None        

    def get_timestamp(self):
        return self.timestamp
    
    def set_timestamp(self, timestamp):
        self.timestamp = timestamp
    
    def get_player_id(self):
        return self.player_id
    
    def set_player_id(self, player_id):
        self.player_id = player_id
    
    def get_xgoal_value(self):
        return self.xgoal_value
    
    def set_xgoal_value(self, xgoal_value):
        self.xgoal_value = xgoal_value

    def get_coordinate_value(self):
        return self.coordinate
    
    def set_coordinate_value(self, coordinate_x, coordinate_y):
        self.coordinate = (coordinate_x, coordinate_y)

    def get_is_home_team(self):
        return self.is_home_team
    
    def set_is_home_team(self, is_home_team):
        self.is_home_team = is_home_team

    def get_skill_values(self):
        return self.skill_and_attributes_values
    
    def set_skill_values(self, object_skill_values):
        self.skill_and_attributes_values = object_skill_values

class Xpass_or_XDribble_Value_Decision_Making:
    def __init__(self, node_start, node_end, xpass_or_xdribble_value):
        self.node_start = node_start
        self.node_end = node_end
        self.unique_id = hashlib.md5((str(node_start.get_player_id()) + '-' + str(node_start.get_timestamp()) \
                            + '-' + str(node_end.get_player_id()) + '-' + str(node_end.get_timestamp())) \
                            .encode('utf-8')).hexdigest()
        self.xpass_or_xdribble_value = xpass_or_xdribble_value

    def get_unique_id(self):
        return self.unique_id

    def get_node_start(self):
        return self.node_start
    
    def set_node_start(self, node):
        self.node_start = node

    def get_node_end(self):
        return self.node_end
    
    def set_node_end(self, node):
        self.node_end = node

    def get_xpass_or_xdribble_value(self):
        return self.xpass_or_xdribble_value
    
    def set_xpass_or_xdribble_value(self, xpass_or_xdribble_value):
        self.xpass_or_xdribble_value = xpass_or_xdribble_value

class Path_Decision_Making:
    def __init__(self):
        self.path_list = []
        self.xpass_values = []
        self.opponent_node_list = []

    def append_new_node_in_path(self, timestamp, player_id, coordinate_x, coordinate_y, is_home_team, include_skill):
        new_node = Node_Decision_Making(timestamp, player_id, coordinate_x, coordinate_y, is_home_team, include_skill)
        if is_home_team:
            if len(self.path_list) > 0:
                last_node_in_this_path = self.path_list[-1]
                xpass_object = Xpass_or_XDribble_Value_Decision_Making(last_node_in_this_path, new_node, 0)
                self.xpass_values.append(xpass_object)
            self.path_list.append(new_node)
        else:
            self.opponent_node_list.append(new_node)

    def set_player_skills_value_into_node(self, timestamp, player_id, object_skill_values):
        for path_node in self.path_list:
            if (path_node.get_player_id() == player_id) and (path_node.get_timestamp() == timestamp):
                path_node.set_skill_values(object_skill_values)
        for xpass_object in self.xpass_values:
            start_node = xpass_object.get_node_start()
            if (start_node.get_player_id() == player_id) and (start_node.get_timestamp() == timestamp):
                start_node.set_skill_values(object_skill_values)
            end_node = xpass_object.get_node_end()
            if (end_node.get_player_id() == player_id) and (end_node.get_timestamp() == timestamp):
                end_node.set_skill_values(object_skill_values)

    def get_path_list(self):
        return self.path_list
    
    def get_specific_node_by_index(self, index):
        return self.path_list[index]
    
    def get_xpass_values(self):
        return self.xpass_values
    
    def get_opponent_node_list(self):
        return self.opponent_node_list
    
    def set_opponent_node_list(self, opponent_node_list):
        self.opponent_node_list = opponent_node_list
    
    def get_opponent_node_list_by_timestamp(self, timestamp):
        list_node_opponents_by_timestamp = []
        for node_opponent in self.opponent_node_list:
            if (self.opponent_node_list.get_timestamp() == timestamp):
                list_node_opponents_by_timestamp.append(node_opponent)
        return list_node_opponents_by_timestamp
    
    def get_index_with_greatest_xg(self):
        greatest_xg = self.path_list[0].get_xgoal_value()
        idx_greatest_xg = 0
        for idx, node_class in enumerate(self.path_list):
            if node_class.get_xgoal_value() > greatest_xg:
                greatest_xg = node_class.get_xgoal_value()
                idx_greatest_xg = idx
        return idx_greatest_xg
    
    def get_xpass_value_from_start_and_end_nodes(self, node_start, node_end):
        xpass_value = 0
        unique_id_xpass_obj = hashlib.md5((str(node_start.get_player_id()) + '-' + str(node_start.get_timestamp()) \
                            + '-' + str(node_end.get_player_id()) + '-' + str(node_end.get_timestamp())) \
                            .encode('utf-8')).hexdigest()
        for _, xpass_class in enumerate(self.xpass_values):
            if xpass_class.get_unique_id() == unique_id_xpass_obj:
                xpass_value = xpass_class.get_xpass_or_xdribble_value()
        return xpass_value
    
    def set_xpass_value_from_start_and_end_nodes(self, node_start, node_end, xpass_value):
        unique_id_xpass_obj = hashlib.md5((str(node_start.get_player_id()) + '-' + str(node_start.get_timestamp()) \
                            + '-' + str(node_end.get_player_id()) + '-' + str(node_end.get_timestamp())) \
                            .encode('utf-8')).hexdigest()
        for _, xpass_class in enumerate(self.xpass_values):
            if xpass_class.get_unique_id() == unique_id_xpass_obj:
                xpass_class.set_xpass_or_xdribble_value(xpass_value)

    def __str__(self):
        string_description = ""
        index = 0
        for node in self.path_list:
            string_description += f'(Player Desc : player_id({node.get_player_id()}), timestamp({node.get_timestamp()}), xG_value({node.get_xgoal_value()}))'
            if index < (len(self.path_list)-1):
                string_description += ' ---> '
            string_description += '\n'
            index += 1
        string_description += "=========================================================="
        return string_description

In [55]:
COLUMNS_XPASS_MODEL = ["path_id", "player_id_src", "player_id_target", "timestamp_origin", "timestamp_target", "start_x", "start_y", 
                       "end_x", "end_y", "distance_pass", "is_home_team", "distance_sideline", "distance_goal", 
                       "distance_receiver_sideline", "distance_receiver_goal", "angle_pass", "distance_opponent",
                       "num_opponent_closer_goal", "distance_receiver_opponent", "num_opponent_closer_goal_receiver",
                       "num_opponent_in_path", "num_opponent_in_path_receiver"]
COLUMNS_XGOAL_MODEL = ["start_x", "start_y", "path_id", "timestamp", "player_id", "distance_to_goal", "angle_to_goal", "is_home_team",
                       "distance_opponent", "num_opponent_closer_goal", "num_opponent_in_path"]
COLUMNS_PLAYERS_SKILL = ["acceleration", "aggression", "agility", "balance", "ball_control",
                        "composure", "crossing", "curve", "dribbling", "finishing",
                        "freekick_accuracy", "heading_accuracy", "interceptions", "jumping", "long_passing",
                        "long_shots", "marking", "penalties", "positioning", "reactions",
                        "shot_power", "sliding_tackle", "sprint_speed", "stamina", "short_passing",
                        "standing_tackle", "strength", "vision", "volleys"]
COLUMNS_XDRIBBLE_MODEL = ["path_id", "player_id", "timestamp_origin", "timestamp_target", "start_x", "start_y", "end_x", "end_y", 
                          "is_take_on", "is_home_team", "distance_opponent", "num_opponent_closer_goal", "num_opponent_in_path"]
COLUMNS_PLAYERS_ATTRIBUTE = ["height_cm", "weight_kgs", "age"]

STANDARD_LENGTH_COURT = 105
STANDARD_WIDTH_COURT = 68
STANDARD_GOALLINE_WIDTH = 7.32
STANDARD_LENGTH_COURT_STATSBOMB = 120
STANDARD_WIDTH_COURT_STATSBOMB = 80
INPUT_PLAYER_SKILL_FILE_CSV_PATH = 'data/input_decision_making_model/input_skill_player.csv'
INPUT_PLAYER_ATTRIBUTE_FILE_CSV_PATH = 'data/input_decision_making_model/input_attribute_player.csv'

# Load Model from External Directory
def load_model_from_external_file_pickle(filename_path):
    return pickle.load(open(filename_path, 'rb'))

# Create Dataframe from Raw Input Coordinate Player (Mandatory)
def load_df_raw_input_coordinate_player(filename_path):
    return pd.read_csv(filename_path, delimiter=";")

# Create Dataframe from Raw Input Player Skills (Optional)
def load_df_raw_input_player_skills(filename_path):
    return pd.read_csv(filename_path)

# Create Dataframe from Raw Input Player Attributes (Mandatory)
def load_df_raw_input_player_attributes(filename_path):
    return pd.read_csv(filename_path)

# Helper to calculate distance pass
def calculate_distance_pass(coordinate_x, coordinate_y, end_x, end_y):
    distance_passing = math.sqrt((abs(end_x - coordinate_x)) ** 2 + (abs(end_y - coordinate_y)) ** 2)
    return distance_passing

# Xpass : Helper to calculate nearest distance from sideline given coordinate
def calculate_nearest_distance_from_sideline(coordinate_x, coordinate_y):
    distance_to_left = coordinate_x
    distance_to_bottom = coordinate_y
    distance_to_top = STANDARD_WIDTH_COURT - coordinate_y
    distance_to_right = STANDARD_LENGTH_COURT - coordinate_x
    return min(distance_to_top, distance_to_bottom, distance_to_left, distance_to_right)

# Xpass : Helper to calculate distance with opposing goal given coordinate
def calculate_distance_with_opposing_goal(coordinate_x, coordinate_y, is_home_team):
    distance_vertical = abs((STANDARD_WIDTH_COURT / 2) - coordinate_y)
    if (is_home_team):
        distance_horizontal = STANDARD_LENGTH_COURT - coordinate_x
    else:
        distance_horizontal = coordinate_x
    return math.sqrt(distance_vertical ** 2 + distance_horizontal ** 2)

# Xpass : Helper to calculate distance between two coordinates (x1,y1) and (x2,y2)
def calculate_distance_between_two_coordinates(x1, y1, x2, y2):
    return math.sqrt(abs(x2-x1) ** 2 + abs(y2-y1) ** 2)

# Xpass : Helper to calculate angle between passer and receiver
def calculate_angle_pass(coordinate_x, coordinate_y, end_x, end_y, is_home_team):
    if (is_home_team):
        coordinate_x_goal = STANDARD_LENGTH_COURT
    else:
        coordinate_x_goal = 0
    coordinate_y_goal = STANDARD_WIDTH_COURT / 2

    distance_passer_to_goal = calculate_distance_between_two_coordinates(coordinate_x_goal, coordinate_y_goal, coordinate_x, coordinate_y)
    distance_receiver_to_goal = calculate_distance_between_two_coordinates(coordinate_x_goal, coordinate_y_goal, end_x, end_y)
    distance_passer_to_receiver = calculate_distance_between_two_coordinates(coordinate_x, coordinate_y, end_x, end_y)

    if (coordinate_x == end_x) and (coordinate_y == end_y):
        return 0
    else:
        cosine_value = (distance_passer_to_goal ** 2 + distance_passer_to_receiver ** 2 - distance_receiver_to_goal ** 2) / (2 * distance_passer_to_goal * distance_passer_to_receiver)
        if (cosine_value > 1):
            cosine_value = 1
        elif (cosine_value < -1):
            cosine_value = -1
        return math.acos(cosine_value)
    
# Opponent Feature : Calculate nearest opponent distance
def calculate_distance_opponent(start_x, start_y, list_node_opponents):
    list_distance_opponent = []
    for opponent_obj in list_node_opponents:
        opponent_x, opponent_y = opponent_obj.get_coordinate_value()
        distance_opponent = calculate_distance_between_two_coordinates(start_x, start_y, opponent_x, opponent_y)
        list_distance_opponent.append(distance_opponent)
    return min(list_distance_opponent) if len(list_distance_opponent) > 0 else 0

# Opponent Feature : Calculate number of opponents closer to goal
def calculate_num_opponent_closer_goal(start_x, start_y, list_node_opponents, is_home_team):
    if (is_home_team):
        coordinate_x_goal = STANDARD_LENGTH_COURT
    else:
        coordinate_x_goal = 0
    coordinate_y_goal = STANDARD_WIDTH_COURT / 2

    num_opponent_closer_to_goal = 0
    for opponent_obj in list_node_opponents:
        opponent_x, opponent_y = opponent_obj.get_coordinate_value()
        distance_passer_to_goal = calculate_distance_between_two_coordinates(start_x, start_y, coordinate_x_goal, coordinate_y_goal)
        distance_opponent_to_goal = calculate_distance_between_two_coordinates(opponent_x, opponent_y, coordinate_x_goal, coordinate_y_goal)
        if (distance_opponent_to_goal < distance_passer_to_goal):
            num_opponent_closer_to_goal += 1
    return num_opponent_closer_to_goal

# Opponent Feature : calculate number of opponents in path distance (default 10 metres)
def calculate_num_opponent_in_path(start_x, start_y, list_node_opponents):
    path_distance = 10
    num_opponent_in_path = 0
    for opponent_obj in list_node_opponents:
        opponent_x, opponent_y = opponent_obj.get_coordinate_value()
        distance_with_opponent = calculate_distance_between_two_coordinates(start_x, start_y, opponent_x, opponent_y)
        if (distance_with_opponent <= path_distance):
            num_opponent_in_path += 1
    return num_opponent_in_path

# Xgoal : Helper to calculate distance to opponent's goal
def calculate_distance_to_goal(length_court, width_court, coordinate_x, coordinate_y, is_home_team):
    if is_home_team:
        distance_to_goal = math.sqrt((abs(length_court - coordinate_x)) ** 2 + (abs((0.5 * width_court) - coordinate_y)) ** 2)
    else:
        distance_to_goal = math.sqrt((coordinate_x) ** 2 + (abs((0.5 * width_court) - coordinate_y)) ** 2)
    return distance_to_goal

# Helper to calculate angle to opponent's goal
# def calculate_angle_to_goal(goalline_width, length_court, width_court, coordinate_x, coordinate_y, is_home_team):
#     if is_home_team:
#         L = abs(length_court - coordinate_x)
#     else:
#         L = coordinate_x
#     W = abs((0.5 * width_court) - coordinate_y)
#     return math.atan((goalline_width * L) / (L ** 2 + W ** 2 - (goalline_width / 2) ** 2))

# Xgoal : Helper to calculate angle to opponent's goal v2
def calculate_angle_to_goal_v2(goalline_width, length_court, width_court, coordinate_x, coordinate_y, is_home_team):
    if is_home_team:
        coordinate_x_post_1, coordinate_x_post_2 = (length_court, length_court)
    else:
        coordinate_x_post_1, coordinate_x_post_2 = (0, 0)
    coordinate_y_post_1 = (width_court / 2) + (goalline_width / 2)
    coordinate_y_post_2 = (width_court / 2) - (goalline_width / 2)

    distance_to_post_1 = math.sqrt(abs(coordinate_x - coordinate_x_post_1) ** 2 + abs(coordinate_y - coordinate_y_post_1) ** 2)
    distance_to_post_2 = math.sqrt(abs(coordinate_x - coordinate_x_post_2) ** 2 + abs(coordinate_y - coordinate_y_post_2) ** 2)

    return math.acos((distance_to_post_1 ** 2 + distance_to_post_2 ** 2 - goalline_width ** 2) / (2 * distance_to_post_1 * distance_to_post_2))

# Generate adjacency structure data for paths from coordinate player input for each timestamp
def generate_adjancency_structure_data_for_paths(df_raw_input, player_id_start, player_id_final, num_of_steps):
    unique_timestamps = sorted(df_raw_input['timestamp'].unique())
    if ((num_of_steps > len(unique_timestamps)) or (num_of_steps < 2) or (num_of_steps == None)):
        num_of_steps = len(unique_timestamps)
    unique_player_ids = df_raw_input['player_id'].unique()
    considered_timestamps = unique_timestamps[:num_of_steps]
    idx_timestamp_control = 0
    initial_start_timestamp = 1
    adjacency_matrix_maps = {}
    node_visited = [(player_id_start, initial_start_timestamp)]
    copy_of_node_visited = []
    
    while idx_timestamp_control < (len(considered_timestamps)-1):
        timestamp_next = considered_timestamps[idx_timestamp_control + 1]
        copy_of_node_visited = copy.deepcopy(node_visited)
        node_visited.clear()
        player_id_targets = [player_id_final] if (idx_timestamp_control == (len(considered_timestamps)-2)) else unique_player_ids
        for player_id_next in player_id_targets:
            node_visited.append((player_id_next, timestamp_next))
        for player_id_prev, timestamp_prev in copy_of_node_visited:
            for player_id_next in player_id_targets:
                if (player_id_prev, timestamp_prev) in adjacency_matrix_maps:
                    adjacency_matrix_maps[(player_id_prev, timestamp_prev)].append((player_id_next, timestamp_next))
                else:
                    adjacency_matrix_maps[(player_id_prev, timestamp_prev)] = [(player_id_next, timestamp_next)]
        idx_timestamp_control += 1
    return adjacency_matrix_maps

# Generate all alternate paths from player_id_start to player_id_final in certain timestamp, given adjacency matrix maps
# Example : list_path = [(player_id_1, timestamp_1), (player_id_2, timestamp_2), (player_id_3, timestamp_3)]
def generate_list_path_from_adjacency_matrix(adjacency_matrix_maps, df_raw_input, player_id_start, player_id_final, num_of_steps):
    initial_start_timestamp = 1
    unique_timestamps = sorted(df_raw_input['timestamp'].unique())
    if ((num_of_steps > len(unique_timestamps)) or (num_of_steps < 2) or (num_of_steps == None)):
        num_of_steps = len(unique_timestamps)
    considered_timestamps = unique_timestamps[:num_of_steps]

    list_all_available_paths = []
    list_all_available_paths.append([(player_id_start, initial_start_timestamp)])
    final_path_from_src_to_target = []
    while len(list_all_available_paths) > 0:
        pop_available_paths = list_all_available_paths.pop(0)
        last_nodes_pop_available_paths = pop_available_paths[-1]
        leaf_player_id, leaf_player_timestamp = last_nodes_pop_available_paths
        if (leaf_player_id == player_id_final) and (leaf_player_timestamp == considered_timestamps[-1]):
            final_path_from_src_to_target.append(pop_available_paths)
        else:
            list_neighbours_nodes = adjacency_matrix_maps[(leaf_player_id, leaf_player_timestamp)]
            for node in list_neighbours_nodes:
                list_all_available_paths.append(pop_available_paths + [node])
    return final_path_from_src_to_target

# Generate list of node opponent (Node_Decision_Making class) from input csv decision making model
def generate_list_node_opponents_from_df_raw_input(df_raw_input):
    list_node_opponents = []
    for _, row in df_raw_input.iterrows():
        if (row['is_opponent_team'] == 1):
            node_opponent_obj = Node_Decision_Making(row['timestamp'],row['player_id'],row['coordinate_x'],row['coordinate_y'],0)
            list_node_opponents.append(node_opponent_obj)
    return list_node_opponents

# Generate list of Path_Decision_Making objects from input csv decision making model
def generate_list_path_objects_from_df_raw_input(df_raw_input, player_id_start, player_id_final, num_of_steps, include_skill=False):
    adjacency_matrix_maps_for_paths = generate_adjancency_structure_data_for_paths(df_raw_input, player_id_start, player_id_final, num_of_steps)
    list_node_opponents = generate_list_node_opponents_from_df_raw_input(df_raw_input)
    list_raw_paths_from_src_to_target = generate_list_path_from_adjacency_matrix(adjacency_matrix_maps_for_paths, df_raw_input, player_id_start, player_id_final, num_of_steps)
    list_paths_objects_from_src_to_target = []
    df_raw_input_skill = None
    df_raw_input_attribute = None
    if (include_skill):
        df_raw_input_skill = load_df_raw_input_player_skills(INPUT_PLAYER_SKILL_FILE_CSV_PATH)
        df_raw_input_attribute = load_df_raw_input_player_attributes(INPUT_PLAYER_ATTRIBUTE_FILE_CSV_PATH)

    for path_route in list_raw_paths_from_src_to_target:
        path_object = Path_Decision_Making()
        # Collect data from all home team members
        for current_player_id, current_timestamp in path_route:
            coordinate_x, coordinate_y = df_raw_input.loc[(df_raw_input["player_id"] == current_player_id) & (df_raw_input["is_opponent_team"] == 0) \
                                        & (df_raw_input["timestamp"] == current_timestamp), ["coordinate_x", "coordinate_y"]].iloc[0]
            path_object.append_new_node_in_path(current_timestamp, current_player_id, coordinate_x, coordinate_y, 1, include_skill)
            if (include_skill):
                new_skill_values_object = Skill_Player_Values(current_player_id)
                for skill in COLUMNS_PLAYERS_SKILL:
                    skill_score = df_raw_input_skill.loc[(df_raw_input_skill["player_id"] == current_player_id), skill].iloc[0]
                    new_skill_values_object.set_skill_value_by_key(skill, skill_score)
                for attribute in COLUMNS_PLAYERS_ATTRIBUTE:
                    attribute_value = df_raw_input_attribute.loc[(df_raw_input_attribute["player_id"] == current_player_id), attribute].iloc[0]
                    new_skill_values_object.set_attribute_value_by_key(attribute, attribute_value)
                path_object.set_player_skills_value_into_node(current_timestamp, current_player_id, new_skill_values_object)
        # Collect data from all opponent team members
        path_object.set_opponent_node_list(list_node_opponents)
        list_paths_objects_from_src_to_target.append(path_object)
    return list_paths_objects_from_src_to_target

# Construct xpass dataset from input list_path_objects (Path_Decision_Making class)
def construct_xpass_dataset_from_list_path_objects(list_path_objects):
    empty_xpass_df = pd.DataFrame(columns=COLUMNS_XPASS_MODEL+COLUMNS_PLAYERS_SKILL+COLUMNS_PLAYERS_ATTRIBUTE, index=[0])
    for path_idx, path_object in enumerate(list_path_objects):
        xpass_objects = path_object.get_xpass_values()
        opponent_objects = path_object.get_opponent_node_list()
        for xpass_object in xpass_objects:
            node_start = xpass_object.get_node_start()
            node_end = xpass_object.get_node_end()
            start_x, start_y = node_start.get_coordinate_value()
            end_x, end_y = node_end.get_coordinate_value()
            # All these objects should be all home team
            is_home_team = True
            # Basic features
            distance_pass = calculate_distance_pass(start_x, start_y, end_x, end_y)
            distance_sideline = calculate_nearest_distance_from_sideline(start_x, start_y)
            distance_goal = calculate_distance_with_opposing_goal(start_x, start_y, is_home_team)
            distance_receiver_sideline = calculate_nearest_distance_from_sideline(end_x, end_y)
            distance_receiver_goal = calculate_distance_with_opposing_goal(end_x, end_y, is_home_team)
            angle_pass = calculate_angle_pass(start_x, start_y, end_x, end_y, is_home_team)
            # Add opponent features
            distance_opponent = calculate_distance_opponent(start_x, start_y, opponent_objects)
            num_opponent_closer_goal = calculate_num_opponent_closer_goal(start_x, start_y, opponent_objects, is_home_team)
            distance_receiver_opponent = calculate_distance_opponent(end_x, end_y, opponent_objects)
            num_opponent_closer_goal_receiver = calculate_num_opponent_closer_goal(end_x, end_y, opponent_objects, is_home_team)
            num_opponent_in_path = calculate_num_opponent_in_path(start_x, start_y, opponent_objects)
            num_opponent_in_path_receiver = calculate_num_opponent_in_path(end_x, end_y, opponent_objects)
            # Append new row to (empty) xpass dataframe
            if (node_start.get_player_id() != node_end.get_player_id()):
                maps_new_row = {
                    "path_id":path_idx, "player_id_src":node_start.get_player_id(), "player_id_target":node_end.get_player_id(),
                    "timestamp_origin":node_start.get_timestamp(), "timestamp_target":node_end.get_timestamp(), "start_x":start_x, "start_y":start_y, 
                    "end_x":end_x, "end_y":end_y, "distance_pass":distance_pass, "is_home_team":node_start.get_is_home_team(),
                    "distance_sideline": distance_sideline, "distance_goal": distance_goal, "distance_receiver_sideline": distance_receiver_sideline, 
                    "distance_receiver_goal": distance_receiver_goal, "angle_pass": angle_pass, "distance_opponent": distance_opponent,
                    "num_opponent_closer_goal": num_opponent_closer_goal, "distance_receiver_opponent":distance_receiver_opponent,
                    "num_opponent_closer_goal_receiver":num_opponent_closer_goal_receiver, "num_opponent_in_path":num_opponent_in_path,
                    "num_opponent_in_path_receiver": num_opponent_in_path_receiver
                }
                for skill in COLUMNS_PLAYERS_SKILL:
                    maps_new_row[skill] = node_start.get_skill_values().get_skill_value_by_key(skill)
                for attribute in COLUMNS_PLAYERS_ATTRIBUTE:
                    maps_new_row[attribute] = node_start.get_skill_values().get_attribute_value_by_key(attribute)
                new_row = pd.DataFrame(maps_new_row, index=[0])
                empty_xpass_df = pd.concat([new_row, empty_xpass_df.loc[:]]).reset_index(drop=True)
    return empty_xpass_df

# Construct xdribble dataset from input list_path_objects (Path_Decision_Making class)
def construct_xdribble_dataset_from_list_path_objects(list_path_objects):
    empty_xdribble_df = pd.DataFrame(columns=COLUMNS_XDRIBBLE_MODEL+COLUMNS_PLAYERS_SKILL+COLUMNS_PLAYERS_ATTRIBUTE, index=[0])
    for path_idx, path_object in enumerate(list_path_objects):
        xpass_objects = path_object.get_xpass_values()
        opponent_objects = path_object.get_opponent_node_list()
        for xpass_object in xpass_objects:
            node_start = xpass_object.get_node_start()
            node_end = xpass_object.get_node_end()
            start_x, start_y = node_start.get_coordinate_value()
            end_x, end_y = node_end.get_coordinate_value()
            # Assume is_take_on is true (surpass opponent)
            is_take_on = 1
            # Assume is_home_team is always true
            is_home_team = True
            # Add opponent features
            distance_opponent = calculate_distance_opponent(start_x, start_y, opponent_objects)
            num_opponent_closer_goal = calculate_num_opponent_closer_goal(start_x, start_y, opponent_objects, is_home_team)
            num_opponent_in_path = calculate_num_opponent_in_path(start_x, start_y, opponent_objects)
            # Append new row to (empty) xdribble dataframe
            if (node_start.get_player_id() == node_end.get_player_id()):
                maps_new_row = {"path_id":path_idx, "player_id":node_start.get_player_id(), "timestamp_origin":node_start.get_timestamp(), 
                                "timestamp_target":node_end.get_timestamp(), "start_x":start_x, "start_y":start_y, "end_x":end_x, "end_y":end_y, 
                                "is_take_on":is_take_on, "is_home_team":node_start.get_is_home_team(), "distance_opponent":distance_opponent,
                                "num_opponent_closer_goal":num_opponent_closer_goal, "num_opponent_in_path":num_opponent_in_path}
                for skill in COLUMNS_PLAYERS_SKILL:
                    maps_new_row[skill] = node_start.get_skill_values().get_skill_value_by_key(skill)
                for attribute in COLUMNS_PLAYERS_ATTRIBUTE:
                    maps_new_row[attribute] = node_start.get_skill_values().get_attribute_value_by_key(attribute)
                new_row = pd.DataFrame(maps_new_row, index=[0])
                empty_xdribble_df = pd.concat([new_row, empty_xdribble_df.loc[:]]).reset_index(drop=True)
    return empty_xdribble_df

# (NOT USED) Construct xpass dataset from input csv decision making model
def construct_xpass_dataset_from_df_raw_input(df_raw_input):
    unique_timestamps = sorted(df_raw_input['timestamp'].unique())
    empty_xpass_df = pd.DataFrame(columns=COLUMNS_XPASS_MODEL, index=[0])

    for timestamp in unique_timestamps:
        df_raw_this_timestamp = df_raw_input[df_raw_input['timestamp'] == timestamp]
        unique_id_players_this_timestamp = df_raw_this_timestamp['player_id'].unique()
        for player_id_source in unique_id_players_this_timestamp:
            for player_id_target in unique_id_players_this_timestamp:
                if player_id_source == player_id_target:
                    continue
                # Select Features
                start_x = df_raw_this_timestamp.loc[df_raw_this_timestamp["player_id"] == player_id_source, "coordinate_x"].iloc[0]
                start_y = df_raw_this_timestamp.loc[df_raw_this_timestamp["player_id"] == player_id_source, "coordinate_y"].iloc[0]
                end_x = df_raw_this_timestamp.loc[df_raw_this_timestamp["player_id"] == player_id_target, "coordinate_x"].iloc[0]
                end_y = df_raw_this_timestamp.loc[df_raw_this_timestamp["player_id"] == player_id_target, "coordinate_y"].iloc[0]
                is_home_team = 1 if (df_raw_this_timestamp.loc[df_raw_this_timestamp["player_id"] == player_id_source, "is_opponent_team"].iloc[0] == 0) else 0
                # Bodypart ID is assumed to be foot (id = 0) for now
                # bodypart_id = 0
                distance_pass = calculate_distance_pass(start_x, start_y, end_x, end_y)
                distance_sideline = calculate_nearest_distance_from_sideline(start_x, start_y)
                distance_goal = calculate_distance_with_opposing_goal(start_x, start_y, is_home_team)
                distance_receiver_sideline = calculate_nearest_distance_from_sideline(end_x, end_y)
                distance_receiver_goal = calculate_distance_with_opposing_goal(end_x, end_y, is_home_team)
                angle_pass = calculate_angle_pass(start_x, start_y, end_x, end_y, is_home_team)
                # Append new row to (empty) xpass dataframe
                new_row = pd.DataFrame({"timestamp": timestamp, "player_id_src": player_id_source, "player_id_target": player_id_target, 
                           "start_x": start_x, "start_y": start_y, "end_x": end_x, "end_y": end_y, 
                           "distance_pass": distance_pass, "is_home_team": is_home_team,
                           "distance_sideline": distance_sideline, "distance_goal": distance_goal,
                           "distance_receiver_sideline": distance_receiver_sideline, "distance_receiver_goal": distance_receiver_goal,
                           "angle_pass": angle_pass}, index=[0])
                empty_xpass_df = pd.concat([new_row, empty_xpass_df.loc[:]]).reset_index(drop=True)

    return empty_xpass_df

# Construct xgoal dataset from input list_path_objects (Path_Decision_Making class)
def construct_xgoal_dataset_from_list_path_objects(list_path_objects):
    empty_xgoal_df = pd.DataFrame(columns=COLUMNS_XGOAL_MODEL+COLUMNS_PLAYERS_SKILL+COLUMNS_PLAYERS_ATTRIBUTE, index=[0])
    for path_idx, path_object in enumerate(list_path_objects):
        path_list = path_object.get_path_list()
        opponent_objects = path_object.get_opponent_node_list()
        for node in path_list:
            start_x, start_y = node.get_coordinate_value()
            is_home_team = node.get_is_home_team()
            # Basic Features
            distance_to_goal = calculate_distance_to_goal(STANDARD_LENGTH_COURT, STANDARD_WIDTH_COURT, start_x, start_y, is_home_team)
            angle_to_goal = calculate_angle_to_goal_v2(STANDARD_GOALLINE_WIDTH, STANDARD_LENGTH_COURT, STANDARD_WIDTH_COURT, start_x, start_y, is_home_team)
            # Add opponent features
            distance_opponent = calculate_distance_opponent(start_x, start_y, opponent_objects)
            num_opponent_closer_goal = calculate_num_opponent_closer_goal(start_x, start_y, opponent_objects, is_home_team)
            num_opponent_in_path = calculate_num_opponent_in_path(start_x, start_y, opponent_objects)
            # Append new row to (empty) xgoal dataframe
            maps_new_row = {"path_id":path_idx, "timestamp": node.get_timestamp(), "player_id": node.get_player_id(), 
                            "distance_to_goal": distance_to_goal, "angle_to_goal": angle_to_goal, "is_home_team": is_home_team,
                            "distance_opponent": distance_opponent, "num_opponent_closer_goal": num_opponent_closer_goal,
                            "num_opponent_in_path": num_opponent_in_path, "start_x": start_x, "start_y": start_y}
            for skill in COLUMNS_PLAYERS_SKILL:
                maps_new_row[skill] = node.get_skill_values().get_skill_value_by_key(skill)
            for attribute in COLUMNS_PLAYERS_ATTRIBUTE:
                maps_new_row[attribute] = node.get_skill_values().get_attribute_value_by_key(attribute)
            new_row = pd.DataFrame(maps_new_row, index=[0])
            empty_xgoal_df = pd.concat([new_row, empty_xgoal_df.loc[:]]).reset_index(drop=True)
    return empty_xgoal_df

# (NOT USED) Construct xgoal dataset from input csv decision making model
def construct_xgoal_dataset_from_df_raw_input(df_raw_input):
    unique_timestamps = sorted(df_raw_input['timestamp'].unique())
    empty_xgoal_df = pd.DataFrame(columns=COLUMNS_XGOAL_MODEL, index=[0])

    for timestamp in unique_timestamps:
        df_raw_this_timestamp = df_raw_input[df_raw_input['timestamp'] == timestamp]
        unique_id_players_this_timestamp = df_raw_this_timestamp['player_id'].unique()
        for player_id in unique_id_players_this_timestamp:
            # Select Features
            start_x = df_raw_this_timestamp.loc[df_raw_this_timestamp["player_id"] == player_id, "coordinate_x"].iloc[0]
            start_y = df_raw_this_timestamp.loc[df_raw_this_timestamp["player_id"] == player_id, "coordinate_y"].iloc[0]
            is_home_team = 1 if (df_raw_this_timestamp.loc[df_raw_this_timestamp["player_id"] == player_id, "is_opponent_team"].iloc[0] == 0) else 0
            # Bodypart ID is assumed to be foot (id = 0) for now
            # bodypart_id = 0
            distance_to_goal = calculate_distance_to_goal(STANDARD_LENGTH_COURT, STANDARD_WIDTH_COURT, start_x, start_y, is_home_team)
            angle_to_goal = calculate_angle_to_goal_v2(STANDARD_GOALLINE_WIDTH, STANDARD_LENGTH_COURT, STANDARD_WIDTH_COURT, start_x, start_y, is_home_team)
            # Append new row to (empty) xpass dataframe
            new_row = pd.DataFrame({"timestamp": timestamp, "player_id": player_id, "distance_to_goal": distance_to_goal, "angle_to_goal": angle_to_goal, "is_home_team": is_home_team}, index=[0])
            empty_xgoal_df = pd.concat([new_row, empty_xgoal_df.loc[:]]).reset_index(drop=True)

    return empty_xgoal_df

In [56]:
INPUT_RAW_PLAYER_COORDINATE_FILEPATH = "data/input_decision_making_model/input_coordinate_player.csv"

raw_input_coordinate_player_df = load_df_raw_input_coordinate_player(INPUT_RAW_PLAYER_COORDINATE_FILEPATH)
raw_input_coordinate_player_df

Unnamed: 0,timestamp,is_opponent_team,player_id,coordinate_x,coordinate_y
0,1,0,1,28,51
1,1,0,2,36,18
2,1,0,3,43,34
3,2,0,1,40,51
4,2,0,2,43,18
5,2,0,3,46,34
6,3,0,1,52,51
7,3,0,2,50,18
8,3,0,3,49,34
9,4,0,1,72,51


In [57]:
INPUT_RAW_PLAYER_SKILL_FILEPATH = "data/input_decision_making_model/input_skill_player.csv"

raw_input_player_skills_df = load_df_raw_input_player_skills(INPUT_RAW_PLAYER_SKILL_FILEPATH)
raw_input_player_skills_df

Unnamed: 0.1,Unnamed: 0,player_id,acceleration,aggression,agility,balance,ball_control,composure,crossing,curve,...,reactions,shot_power,sliding_tackle,sprint_speed,stamina,short_passing,standing_tackle,strength,vision,volleys
0,0,3,55,54,86,85,72,100,62,64,...,99,71,68,50,83,80,77,96,66,85
1,1,2,62,67,63,64,50,79,57,56,...,60,85,98,71,61,88,71,75,89,58
2,2,1,63,59,59,61,93,78,93,62,...,57,76,92,78,87,64,65,79,60,71


In [58]:
INPUT_RAW_PLAYER_ATTRIBUTE_FILEPATH = "data/input_decision_making_model/input_attribute_player.csv" 

raw_input_player_attributes_df = load_df_raw_input_player_attributes(INPUT_RAW_PLAYER_ATTRIBUTE_FILEPATH)
raw_input_player_attributes_df

Unnamed: 0.1,Unnamed: 0,player_id,height_cm,weight_kgs,age
0,0,3,163,55,22
1,1,2,177,62,27
2,2,1,155,53,27


In [59]:
# GENERATE LIST OF PATHS OBJECT FROM PLAYER ID SOURCE TO PLAYER ID TARGET
PLAYER_ID_SOURCE = 1
PLAYER_ID_TARGET = 3
NUM_OF_STEPS = 5
INCLUDE_SKILL = True

final_list_paths_obj = generate_list_path_objects_from_df_raw_input(raw_input_coordinate_player_df, PLAYER_ID_SOURCE, PLAYER_ID_TARGET, NUM_OF_STEPS, INCLUDE_SKILL)

In [60]:
# FINAL XPASS DATAFRAME
# xpass_dataframes_test = construct_xpass_dataset_from_df_raw_input(raw_input_coordinate_player_df)
xpass_dataframes_test = construct_xpass_dataset_from_list_path_objects(final_list_paths_obj)
xpass_dataframes_test.dropna(inplace=True)
xpass_dataframes_test.sort_values(by=['path_id']).reset_index()
xpass_dataframes_test

Unnamed: 0,path_id,player_id_src,player_id_target,timestamp_origin,timestamp_target,start_x,start_y,end_x,end_y,distance_pass,...,sprint_speed,stamina,short_passing,standing_tackle,strength,vision,volleys,height_cm,weight_kgs,age
0,26,1,3,1,2,28,51,46,34,24.758837,...,78,87,64,65,79,60,71,155,53,27
1,25,2,3,4,5,60,18,98,34,41.231056,...,71,61,88,71,75,89,58,177,62,27
2,25,3,2,3,4,49,34,60,18,19.416488,...,50,83,80,77,96,66,85,163,55,22
3,25,1,3,1,2,28,51,46,34,24.758837,...,78,87,64,65,79,60,71,155,53,27
4,24,1,3,4,5,72,51,98,34,31.064449,...,78,87,64,65,79,60,71,155,53,27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,3,1,2,2,3,40,51,50,18,34.481879,...,78,87,64,65,79,60,71,155,53,27
68,2,1,3,3,4,52,51,55,34,17.262677,...,78,87,64,65,79,60,71,155,53,27
69,1,2,3,4,5,60,18,98,34,41.231056,...,71,61,88,71,75,89,58,177,62,27
70,1,1,2,3,4,52,51,60,18,33.955854,...,78,87,64,65,79,60,71,155,53,27


In [61]:
# FINAL XGOAL DATAFRAME
# xgoal_dataframes_test = construct_xgoal_dataset_from_df_raw_input(raw_input_coordinate_player_df)
xgoal_dataframes_test = construct_xgoal_dataset_from_list_path_objects(final_list_paths_obj)
xgoal_dataframes_test.dropna(inplace=True)
xgoal_dataframes_test.sort_values(by=['path_id']).reset_index()
xgoal_dataframes_test

Unnamed: 0,path_id,timestamp,player_id,distance_to_goal,angle_to_goal,is_home_team,distance_opponent,num_opponent_closer_goal,num_opponent_in_path,start_x,...,sprint_speed,stamina,short_passing,standing_tackle,strength,vision,volleys,height_cm,weight_kgs,age
0,26,5,3,7.000000,0.963531,1,0,0,0,98,...,50,83,80,77,96,66,85,163,55,22
1,26,4,3,50.000000,0.146139,1,0,0,0,55,...,50,83,80,77,96,66,85,163,55,22
2,26,3,3,56.000000,0.130529,1,0,0,0,49,...,50,83,80,77,96,66,85,163,55,22
3,26,2,3,59.000000,0.123909,1,0,0,0,46,...,50,83,80,77,96,66,85,163,55,22
4,26,1,1,78.854296,0.090594,1,0,0,0,28,...,78,87,64,65,79,60,71,155,53,27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130,0,5,3,7.000000,0.963531,1,0,0,0,98,...,50,83,80,77,96,66,85,163,55,22
131,0,4,1,37.121422,0.175203,1,0,0,0,72,...,78,87,64,65,79,60,71,155,53,27
132,0,3,1,55.659680,0.125116,1,0,0,0,52,...,78,87,64,65,79,60,71,155,53,27
133,0,2,1,67.186308,0.105328,1,0,0,0,40,...,78,87,64,65,79,60,71,155,53,27


In [62]:
# FINAL XDRIBBLE DATAFRAME
xdribble_dataframes_test = construct_xdribble_dataset_from_list_path_objects(final_list_paths_obj)
xdribble_dataframes_test.dropna(inplace=True)
xdribble_dataframes_test.sort_values(by=['path_id']).reset_index()
xdribble_dataframes_test

Unnamed: 0,path_id,player_id,timestamp_origin,timestamp_target,start_x,start_y,end_x,end_y,is_take_on,is_home_team,...,sprint_speed,stamina,short_passing,standing_tackle,strength,vision,volleys,height_cm,weight_kgs,age
0,26,3,4,5,55,34,98,34,1,1,...,50,83,80,77,96,66,85,163,55,22
1,26,3,3,4,49,34,55,34,1,1,...,50,83,80,77,96,66,85,163,55,22
2,26,3,2,3,46,34,49,34,1,1,...,50,83,80,77,96,66,85,163,55,22
3,25,3,2,3,46,34,49,34,1,1,...,50,83,80,77,96,66,85,163,55,22
4,24,3,2,3,46,34,49,34,1,1,...,50,83,80,77,96,66,85,163,55,22
5,23,3,4,5,55,34,98,34,1,1,...,50,83,80,77,96,66,85,163,55,22
6,22,2,3,4,50,18,60,18,1,1,...,71,61,88,71,75,89,58,177,62,27
7,20,3,4,5,55,34,98,34,1,1,...,50,83,80,77,96,66,85,163,55,22
8,18,1,3,4,52,51,72,51,1,1,...,78,87,64,65,79,60,71,155,53,27
9,17,3,4,5,55,34,98,34,1,1,...,50,83,80,77,96,66,85,163,55,22


In [63]:
XPASS_CASE_NUMBERS_INCLUDED_IN_CALCULATION = [1]

# FUNCTION TO PREPROCESS FINAL XPASS DATAFRAME
def preprocess_xpass_test(xpass_df):
    # 1. Change all numeric columns with MinMaxScaler
    scaler = preprocessing.MinMaxScaler(feature_range=(0,1))
    columns_minmax_scaler = COLUMNS_PLAYERS_SKILL + COLUMNS_PLAYERS_ATTRIBUTE + \
                            ["start_x", "start_y", "end_x", "end_y", "distance_pass", "distance_sideline", 
                            "distance_goal", "distance_receiver_sideline", "distance_receiver_goal", "angle_pass",
                            "distance_opponent", "num_opponent_closer_goal", "distance_receiver_opponent", 
                            "num_opponent_closer_goal_receiver", "num_opponent_in_path", "num_opponent_in_path_receiver"]
    xpass_df[columns_minmax_scaler] = scaler.fit_transform(xpass_df[columns_minmax_scaler])

    # 2. Change incorrect type of bodypart_id and is_home_team columns
    xpass_df['is_home_team'] = xpass_df['is_home_team'].astype("int64")

    return xpass_df

# FUNCTION TO UPDATE XPASS VALUE TO LIST FINAL PATHS OBJECTS (Path_Decision_Making class)
def update_xpass_value_to_list_path_objects(list_path_objects, xpass_dataframes_test):
    for path_idx, path_object in enumerate(list_path_objects):
        xpass_objects = path_object.get_xpass_values()
        for xpass_object in xpass_objects:
            player_id_src = xpass_object.get_node_start().get_player_id()
            player_id_target = xpass_object.get_node_end().get_player_id()
            timestamp_src = xpass_object.get_node_start().get_timestamp()
            timestamp_target = xpass_object.get_node_end().get_timestamp()
            if (player_id_src != player_id_target):
                xpass_value = xpass_dataframes_test.loc[(xpass_dataframes_test["player_id_src"] == player_id_src) & \
                                (xpass_dataframes_test["player_id_target"] == player_id_target) & (xpass_dataframes_test["timestamp_origin"] == timestamp_src) & \
                                (xpass_dataframes_test["timestamp_target"] == timestamp_target) & (xpass_dataframes_test["path_id"] == path_idx), \
                                "probability_score"].iloc[0]
                xpass_object.set_xpass_or_xdribble_value(xpass_value)
    return list_path_objects

# VERIFY XPASS PROBABILITY BASED ON MODEL
for case_number in XPASS_CASE_NUMBERS_INCLUDED_IN_CALCULATION:
    filename_model = f'xpass_model_case_{case_number}.sav'
    directory_model = "data/model_xpass/"
    xpass_model = pickle.load(open(directory_model + filename_model, 'rb'))
    filename_df_with_score = f'xpass_decision_making_result_case_{case_number}.csv'

    xpass_dataframes_test_verify = xpass_dataframes_test.copy(deep=True)
    xpass_dataframes_test_verify = preprocess_xpass_test(xpass_dataframes_test_verify)
    featured_column_based_on_model = xpass_model.get_booster().feature_names
    X_test = xpass_dataframes_test_verify[featured_column_based_on_model]
    y_verify_result = [p[1] for p in xpass_model.predict_proba(X_test)]
    # y_verify_result = xpass_model.predict(X_test)

    xpass_dataframes_test['probability_score'] = y_verify_result
    final_list_paths_obj = update_xpass_value_to_list_path_objects(final_list_paths_obj, xpass_dataframes_test)
    xpass_dataframes_test.to_csv(directory_model + filename_df_with_score)
    xpass_dataframes_test

In [64]:
# PLOT PASS PROBABILITY INTO PITCH IMAGE
def plot_passing_probability_network(raw_input_from_csv, xpass_final_df):
    fig, axs = plt.subplots(2,1)
    fig.set_figheight(80)
    fig.set_figwidth(100)
    pitchXPass_part1 = Pitch(pitch_type="custom", pitch_length=105, pitch_width=68, pitch_color="green")
    pitchXPass_part1.draw(axs[0], figsize=(200,100), constrained_layout=True, tight_layout=False)
    axs[0].set_title("XPass Distribution Part 1", fontsize=60)
    pitchXPass_part2 = Pitch(pitch_type="custom", pitch_length=105, pitch_width=68, pitch_color="green")
    pitchXPass_part2.draw(axs[1], figsize=(200,100), constrained_layout=True, tight_layout=False)
    axs[1].set_title("XPass Distribution Part 2", fontsize=60)

    timestamps_sorted = sorted(raw_input_from_csv['timestamp'].unique())
    for timestamp in timestamps_sorted:
        raw_input_this_timestamp = raw_input_from_csv[raw_input_from_csv['timestamp'] == timestamp]
        unique_player_ids_this_timestamp = raw_input_this_timestamp['player_id'].unique()

        for player_id in unique_player_ids_this_timestamp:
            start_x = raw_input_this_timestamp.loc[raw_input_this_timestamp["player_id"] == player_id, "coordinate_x"].iloc[0]
            start_y = raw_input_this_timestamp.loc[raw_input_this_timestamp["player_id"] == player_id, "coordinate_y"].iloc[0]
            idx_column = 0
            for pitch in [pitchXPass_part1, pitchXPass_part2]:
                pitch.scatter(x=start_x, y=start_y, color="red", ax=axs[idx_column], s=20000)
                axs[idx_column].annotate(text=player_id, xy=(start_x - 5, start_y), ha="center", va="center", size=100)
                idx_column += 1

        list_unique_arrows = []
        for player_id_src in unique_player_ids_this_timestamp:
            for player_id_target in unique_player_ids_this_timestamp:
                if player_id_src == player_id_target:
                    continue
                if ((player_id_src, player_id_target) not in list_unique_arrows) and ((player_id_target, player_id_src) not in list_unique_arrows):
                    list_unique_arrows.append((player_id_src, player_id_target))
                    pitch = pitchXPass_part1
                    idx_column = 0
                else:
                    pitch = pitchXPass_part2
                    idx_column = 1
                specific_row_xpass_df = xpass_final_df.loc[(xpass_final_df["player_id_src"] == player_id_src) & (xpass_final_df["player_id_target"] == player_id_target), ["start_x", "start_y", "end_x", "end_y", "probability_score"]].iloc[0]
                pitch.arrows(xstart=specific_row_xpass_df["start_x"], ystart=specific_row_xpass_df["start_y"], xend=specific_row_xpass_df["end_x"], yend=specific_row_xpass_df["end_y"], ax=axs[idx_column], color="white", width=10)
                x_line = (specific_row_xpass_df["end_x"] + specific_row_xpass_df["start_x"]) / 2
                y_line = (specific_row_xpass_df["end_y"] + specific_row_xpass_df["start_y"]) / 2   
                axs[idx_column].annotate(text=str(round(specific_row_xpass_df["probability_score"], 2)), xy=(x_line, y_line), ha="center", va="center", size=70, color="orange")
    
    plt.show()

# plot_passing_probability_network(raw_input_coordinate_player_df, xpass_dataframes_test)

In [65]:
XGOAL_CASE_NUMBERS_INCLUDED_IN_CALCULATION = [1]

# FUNCTION TO PREPROCESS FINAL XGOAL DATAFRAME
def preprocess_xgoal_test(xgoal_df):
    # 1. Change all numeric column with MinMaxScaler
    scaler = preprocessing.MinMaxScaler()
    columns_minmax_scaler = COLUMNS_PLAYERS_SKILL + COLUMNS_PLAYERS_ATTRIBUTE + \
                            ["distance_to_goal", "angle_to_goal", "distance_opponent", 
                             "num_opponent_closer_goal", "num_opponent_in_path", "start_x", "start_y"]
    xgoal_df[columns_minmax_scaler] = scaler.fit_transform(xgoal_df[columns_minmax_scaler])

    # 2. Change incorrect type of bodypart_id and is_home_team columns
    xgoal_df['is_home_team'] = xgoal_df['is_home_team'].astype("int64")

    return xgoal_df

# FUNCTION TO UPDATE XGOAL VALUE TO LIST FINAL PATHS OBJECTS (Path_Decision_Making class)
def update_xgoal_value_to_list_path_objects(list_path_objects, xgoal_dataframes_test):
    for path_idx, path_object in enumerate(list_path_objects):
        # Updates xgoal_value in list of node class
        node_objects_list = path_object.get_path_list()
        for node_object in node_objects_list:
            player_id = node_object.get_player_id()
            timestamp = node_object.get_timestamp()
            xgoal_value = xgoal_dataframes_test.loc[(xgoal_dataframes_test["player_id"] == player_id) & \
                             (xgoal_dataframes_test["timestamp"] == timestamp) & \
                             (xgoal_dataframes_test["path_id"] == path_idx), "probability_score"].iloc[0]
            node_object.set_xgoal_value(xgoal_value)
        # Updates xgoal_value in xpass class
        xpass_objects = path_object.get_xpass_values()
        for xpass_object in xpass_objects:
            player_id_src = xpass_object.get_node_start().get_player_id()
            timestamp_src = xpass_object.get_node_start().get_timestamp()
            xgoal_value_src = xgoal_dataframes_test.loc[(xgoal_dataframes_test["player_id"] == player_id_src) & \
                             (xgoal_dataframes_test["timestamp"] == timestamp_src) & \
                             (xgoal_dataframes_test["path_id"] == path_idx), "probability_score"].iloc[0]
            xpass_object.get_node_start().set_xgoal_value(xgoal_value_src)

            player_id_target = xpass_object.get_node_end().get_player_id()
            timestamp_target = xpass_object.get_node_end().get_timestamp()
            xgoal_value_target = xgoal_dataframes_test.loc[(xgoal_dataframes_test["player_id"] == player_id_target) & \
                             (xgoal_dataframes_test["timestamp"] == timestamp_target) & \
                             (xgoal_dataframes_test["path_id"] == path_idx), "probability_score"].iloc[0]
            xpass_object.get_node_end().set_xgoal_value(xgoal_value_target)
    return list_path_objects

# VERIFY XGOAL PROBABILITY BASED ON MODEL
for case_number in XGOAL_CASE_NUMBERS_INCLUDED_IN_CALCULATION:
    filename = f'xgoal_model_case_{case_number}.sav'
    directory_model = "data/model_xgoal/"
    xgoal_model = pickle.load(open(directory_model + filename, 'rb'))
    filename_df_with_score = f'xgoal_decision_making_result_case_{case_number}.csv'

    xgoal_dataframes_test_verify = xgoal_dataframes_test.copy(deep=True)
    xgoal_dataframes_test_verify = preprocess_xgoal_test(xgoal_dataframes_test_verify)
    featured_column_based_on_model = xgoal_model.get_booster().feature_names
    X_test = xgoal_dataframes_test_verify[featured_column_based_on_model]
    y_verify_result = [p[1] for p in xgoal_model.predict_proba(X_test)]
    # y_verify_result = xgoal_model.predict(X_test)

    xgoal_dataframes_test['probability_score'] = y_verify_result
    final_list_paths_obj = update_xgoal_value_to_list_path_objects(final_list_paths_obj, xgoal_dataframes_test)
    xgoal_dataframes_test.to_csv(directory_model + filename_df_with_score)
    xgoal_dataframes_test

In [66]:
# PLOT GOAL PROBABILITY INTO PITCH IMAGE
def plot_goal_probability_network(raw_input_from_csv, xgoal_final_df):
    pitchXGoal = Pitch(pitch_type="custom", pitch_length=105, pitch_width=68, pitch_color="green")
    fig, ax = pitchXGoal.draw(figsize=(100,80), constrained_layout=True, tight_layout=False)
    ax.set_title("XGoal Distribution", fontsize=100)

    timestamps_sorted = sorted(raw_input_from_csv['timestamp'].unique())
    for timestamp in timestamps_sorted:
        raw_input_this_timestamp = raw_input_from_csv[raw_input_from_csv['timestamp'] == timestamp]
        unique_player_ids_this_timestamp = raw_input_this_timestamp['player_id'].unique()

        for player_id in unique_player_ids_this_timestamp:
            start_x = raw_input_this_timestamp.loc[raw_input_this_timestamp["player_id"] == player_id, "coordinate_x"].iloc[0]
            start_y = raw_input_this_timestamp.loc[raw_input_this_timestamp["player_id"] == player_id, "coordinate_y"].iloc[0]
            pitchXGoal.scatter(x=start_x, y=start_y, color="red", ax=ax, s=20000)
            ax.annotate(text=player_id, xy=(start_x - 5, start_y), ha="center", va="center", size=100)

        for player_id in unique_player_ids_this_timestamp:
            specific_row_xgoal_df = xgoal_final_df.loc[xgoal_final_df["player_id"] == player_id, ["is_home_team", "probability_score"]].iloc[0]
            start_x = raw_input_this_timestamp.loc[raw_input_this_timestamp["player_id"] == player_id, "coordinate_x"].iloc[0]
            start_y = raw_input_this_timestamp.loc[raw_input_this_timestamp["player_id"] == player_id, "coordinate_y"].iloc[0]
            end_x = 105 if (specific_row_xgoal_df['is_home_team'] == 1) else 0
            end_y = 34

            pitchXGoal.arrows(xstart=start_x, ystart=start_y, xend=end_x, yend=end_y, ax=ax, color="white", width=10)
            x_line = (end_x + start_x) / 2
            y_line = (end_y + start_y) / 2   
            ax.annotate(text=("xG = " + str(round(specific_row_xgoal_df["probability_score"], 2))), xy=(x_line, y_line), ha="center", va="center", size=100, color="orange")
    
    plt.show()

# plot_goal_probability_network(raw_input_coordinate_player_df, xgoal_dataframes_test)

In [67]:
XDRIBBLE_CASE_NUMBERS_INCLUDED_IN_CALCULATION = [1]

# FUNCTION TO PREPROCESS FINAL XDRIBBLE DATAFRAME
def preprocess_xdribble_test(xdribble_df):
    # 1. Change all numeric columns with MinMaxScaler
    scaler = preprocessing.MinMaxScaler(feature_range=(0,1))
    columns_minmax_scaler = COLUMNS_PLAYERS_SKILL + COLUMNS_PLAYERS_ATTRIBUTE + \
                            ["start_x", "start_y", "end_x", "end_y", "distance_opponent", 
                             "num_opponent_closer_goal", "num_opponent_in_path"]
    xdribble_df[columns_minmax_scaler] = scaler.fit_transform(xdribble_df[columns_minmax_scaler])

    # 2. Change incorrect type of is_home_team and is_take_on column
    xdribble_df['is_home_team'] = xdribble_df['is_home_team'].astype("int64")
    xdribble_df['is_take_on'] = xdribble_df['is_take_on'].astype("int64")
    return xdribble_df

# FUNCTION TO UPDATE XPASS VALUE TO LIST FINAL PATHS OBJECTS (Path_Decision_Making class)
def update_xdribble_value_to_list_path_objects(list_path_objects, xdribble_dataframes_test):
    for path_idx, path_object in enumerate(list_path_objects):
        xpass_objects = path_object.get_xpass_values()
        for xpass_object in xpass_objects:
            player_id_src = xpass_object.get_node_start().get_player_id()
            player_id_target = xpass_object.get_node_end().get_player_id()
            timestamp_src = xpass_object.get_node_start().get_timestamp()
            timestamp_target = xpass_object.get_node_end().get_timestamp()
            if (player_id_src == player_id_target):
                xdribble_value = xdribble_dataframes_test.loc[(xdribble_dataframes_test["player_id"] == player_id_src) & \
                                (xdribble_dataframes_test["timestamp_origin"] == timestamp_src) & \
                                (xdribble_dataframes_test["timestamp_target"] == timestamp_target) & \
                                (xdribble_dataframes_test["path_id"] == path_idx), \
                                "probability_score"].iloc[0]
                xpass_object.set_xpass_or_xdribble_value(xdribble_value)
    return list_path_objects

# VERIFY XDRIBBLE PROBABILITY BASED ON MODEL
for case_number in XDRIBBLE_CASE_NUMBERS_INCLUDED_IN_CALCULATION:
    filename_model = f'xdribble_model_case_{case_number}.sav'
    directory_model = "data/model_xdribble/"
    xdribble_model = pickle.load(open(directory_model + filename_model, 'rb'))
    filename_df_with_score = f'xdribble_decision_making_result_case_{case_number}.csv'

    xdribble_dataframes_test_verify = xdribble_dataframes_test.copy(deep=True)
    xdribble_dataframes_test_verify = preprocess_xdribble_test(xdribble_dataframes_test_verify)
    featured_column_based_on_model = xdribble_model.get_booster().feature_names
    X_test = xdribble_dataframes_test_verify[featured_column_based_on_model]
    y_verify_result = [p[1] for p in xdribble_model.predict_proba(X_test)]
    # y_verify_result = xdribble_model.predict(X_test)

    xdribble_dataframes_test['probability_score'] = y_verify_result
    final_list_paths_obj = update_xdribble_value_to_list_path_objects(final_list_paths_obj, xdribble_dataframes_test)
    xdribble_dataframes_test.to_csv(directory_model + filename_df_with_score)
    xdribble_dataframes_test

In [68]:
# PLOT ALL PATH DECISION MAKING FROM SOURCE TO TARGET
def plot_all_path_decision_making(final_list_paths_obj):
    if (len(final_list_paths_obj) > 1):
        ncolumn = 2
        if (len(final_list_paths_obj) % ncolumn == 0):
            nrow = len(final_list_paths_obj) // ncolumn
        else:
            nrow = (len(final_list_paths_obj) // ncolumn) + 1
        fig, axs = plt.subplots(nrow, ncolumn)
        fig.set_figheight(500)
        fig.set_figwidth(100)

        maps_path_id_with_idx_subplots = {}
        idx_row, idx_column = 0, 0
        counter_size = 0
        while (counter_size < len(final_list_paths_obj)):
            if idx_column == ncolumn:
                idx_column = 0
                idx_row += 1
            maps_path_id_with_idx_subplots[counter_size] = (idx_row, idx_column)
            idx_column += 1
            counter_size += 1

        unique_player_ids = raw_input_coordinate_player_df['player_id'].unique()
        maps_player_id_and_color_node = {}
        for player_id in unique_player_ids:
            random_color = random.randrange(0, 2**24)
            hex_random_color = hex(random_color)
            final_color = "#" + hex_random_color[2:]
            maps_player_id_and_color_node[player_id] = final_color
        
        for path_idx, path_obj in enumerate(final_list_paths_obj):
            idx_row, idx_column = maps_path_id_with_idx_subplots[path_idx]
            pitch = Pitch(pitch_type="custom", pitch_length=105, pitch_width=68, pitch_color="green")
            pitch.draw(axs[idx_row, idx_column], constrained_layout=True, tight_layout=False)
            axs[idx_row, idx_column].set_title(f'Path Steps {path_idx}', fontsize=150)

            path_nodes_list = path_obj.get_path_list()
            for path_node in path_nodes_list:
                start_x, start_y = path_node.get_coordinate_value()
                player_id = path_node.get_player_id()
                timestamp = path_node.get_timestamp()
                pitch.scatter(x=start_x, y=start_y, c=maps_player_id_and_color_node[player_id], ax=axs[idx_row, idx_column], s=10000)
                axs[idx_row, idx_column].annotate(text=f'{player_id}({timestamp})', xy=(start_x - 5, start_y), ha="center", va="center", size=100)

            xpass_obj_list = path_obj.get_xpass_values()
            for xpass_obj in xpass_obj_list:
                start_x, start_y = xpass_obj.get_node_start().get_coordinate_value()
                end_x, end_y = xpass_obj.get_node_end().get_coordinate_value()
                pitch.arrows(xstart=start_x, ystart=start_y, xend=end_x, yend=end_y, ax=axs[idx_row, idx_column], color="white", width=10)
                # axs[idx_column].annotate(text=str(round(specific_row_xpass_df["probability_score"], 2)), xy=(x_line, y_line), ha="center", va="center", size=70, color="orange")
    else:
        pitch = Pitch(pitch_type="custom", pitch_length=105, pitch_width=68, pitch_color="green")
        fig, ax = pitch.draw(figsize=(100,80), constrained_layout=True, tight_layout=False)

        for path_idx, path_obj in enumerate(final_list_paths_obj):
            pitch = Pitch(pitch_type="custom", pitch_length=105, pitch_width=68, pitch_color="green")
            pitch.draw(ax, constrained_layout=True, tight_layout=False)
            ax.set_title(f'Path Steps {path_idx}', fontsize=150)

            path_nodes_list = path_obj.get_path_list()
            for path_node in path_nodes_list:
                start_x, start_y = path_node.get_coordinate_value()
                player_id = path_node.get_player_id()
                timestamp = path_node.get_timestamp()
                pitch.scatter(x=start_x, y=start_y, c=maps_player_id_and_color_node[player_id], ax=ax, s=10000)
                ax.annotate(text=f'{player_id}({timestamp})', xy=(start_x - 5, start_y), ha="center", va="center", size=100)

            xpass_obj_list = path_obj.get_xpass_values()
            for xpass_obj in xpass_obj_list:
                start_x, start_y = xpass_obj.get_node_start().get_coordinate_value()
                end_x, end_y = xpass_obj.get_node_end().get_coordinate_value()
                pitch.arrows(xstart=start_x, ystart=start_y, xend=end_x, yend=end_y, ax=ax, color="white", width=10)
                # axs[idx_column].annotate(text=str(round(specific_row_xpass_df["probability_score"], 2)), xy=(x_line, y_line), ha="center", va="center", size=70, color="orange")


# plot_all_path_decision_making(final_list_paths_obj)