In [1]:
from socceraction.data.wyscout import PublicWyscoutLoader
from socceraction.spadl.wyscout import convert_to_actions as convert_to_actions_wyscout
from socceraction.spadl.statsbomb import convert_to_actions as convert_to_actions_statsbomb
from socceraction.data.opta import OptaLoader
from socceraction.data.statsbomb import StatsBombLoader
from socceraction.spadl.config import actiontypes, bodyparts
import socceraction.spadl as spadl
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, roc_auc_score, brier_score_loss, log_loss, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, balanced_accuracy_score
from xgboost import XGBClassifier, XGBRegressor
from catboost import CatBoostClassifier
import math
import pickle
import os
from name_matching.name_matcher import NameMatcher
from rapidfuzz import fuzz
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.feature_selection import r_regression, SelectKBest, chi2, mutual_info_classif, SequentialFeatureSelector, RFECV, SelectFromModel
from scipy.stats import pearsonr, chisquare
from mrmr import mrmr_classif
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.svm import LinearSVR
from sklearn.linear_model import Lasso, LogisticRegression

In [2]:
# CONFIG FOR EXPERIMENTS SCENARIO
USE_EVALUATION_METRIC_CLASSIFICATION = True
SAMPLING_OPTIONS = [
    "none",
    "random_oversampled",
    "random_undersampled",
    "smote_oversampled"
]
FEATURE_SELECTION_OPTIONS = [
    "pearson",
    "chisquare",
    "mutualinf",
    "mrmr",
    "rfembedded",
    "lasso"
]
if USE_EVALUATION_METRIC_CLASSIFICATION:
    MODEL_ALGORITHM_OPTIONS = [
        "xgbclassifier",
        "catboostclassifier",
        "rfclassifier"
    ]
else:
    MODEL_ALGORITHM_OPTIONS = [
        "xgbregressor",
        # "rfregressor",
        # "logregression"
    ]
CONFIG_EXPERIMENTS_SCENARIO_MAP = {}

def construct_config_experiments_scenario_map():
    index_counter = 1
    for sampling_opt in SAMPLING_OPTIONS:
        for feature_selection_opt in FEATURE_SELECTION_OPTIONS:
            for algorithm_opt in MODEL_ALGORITHM_OPTIONS:
                CONFIG_EXPERIMENTS_SCENARIO_MAP[index_counter] = \
                    {"sampling_opt" : sampling_opt, \
                    "feature_selection_opt" : feature_selection_opt, \
                    "algorithm_opt" : algorithm_opt}
                index_counter += 1

construct_config_experiments_scenario_map()

In [3]:
# COLUMNS FOR TEST EXPERIMENT RESULT
COLUMNS_EVALUATION_METRIC_CLASSIFICATION = [
    "rec_score",
    "prec_score",
    "F1_score",
    "acc_score",
    "auc_score",
    "mcc_score",
    "brier_score",
    "log_loss_score",
    "balanced_acc_score"
]
COLUMNS_EVALUATION_METRIC_REGRESSION = [
    "mean_squared_error_score",
    "root_mean_squared_error_score",
    "auc_score",
    "brier_score",
    "log_loss_score",
    "mean_absolute_error_score",
    "r_squared_score",
    "mean_absolute_percentage_error_score"
]
COLUMNS_SCENARIO_NAME = [
    "sampling_opt",
    "feature_selection_opt",
    "algorithm_opt"
]
if (USE_EVALUATION_METRIC_CLASSIFICATION):
    COLUMNS_EXPERIMENT_RESULT = ["case_number"] + COLUMNS_SCENARIO_NAME + COLUMNS_EVALUATION_METRIC_CLASSIFICATION
else:
    COLUMNS_EXPERIMENT_RESULT = ["case_number"] + COLUMNS_SCENARIO_NAME + COLUMNS_EVALUATION_METRIC_REGRESSION

In [4]:
api_wyscout = PublicWyscoutLoader(root="data/wyscout")
api_opta = OptaLoader(root="data/opta")
api_statsbomb = StatsBombLoader(root="data/statsbomb", getter="local")

In [5]:
for idx, action_name in enumerate(actiontypes):
    print(f'action_id : {idx}   action_name : {action_name}')

action_id : 0   action_name : pass
action_id : 1   action_name : cross
action_id : 2   action_name : throw_in
action_id : 3   action_name : freekick_crossed
action_id : 4   action_name : freekick_short
action_id : 5   action_name : corner_crossed
action_id : 6   action_name : corner_short
action_id : 7   action_name : take_on
action_id : 8   action_name : foul
action_id : 9   action_name : tackle
action_id : 10   action_name : interception
action_id : 11   action_name : shot
action_id : 12   action_name : shot_penalty
action_id : 13   action_name : shot_freekick
action_id : 14   action_name : keeper_save
action_id : 15   action_name : keeper_claim
action_id : 16   action_name : keeper_punch
action_id : 17   action_name : keeper_pick_up
action_id : 18   action_name : clearance
action_id : 19   action_name : bad_touch
action_id : 20   action_name : non_action
action_id : 21   action_name : dribble
action_id : 22   action_name : goalkick


In [6]:
for idx, bodypart_name in enumerate(bodyparts):
    print(f'bodypart_id : {idx}   bodypart_name : {bodypart_name}')

bodypart_id : 0   bodypart_name : foot
bodypart_id : 1   bodypart_name : head
bodypart_id : 2   bodypart_name : other
bodypart_id : 3   bodypart_name : head/other
bodypart_id : 4   bodypart_name : foot_left
bodypart_id : 5   bodypart_name : foot_right


In [7]:
def convert_events_df_to_spadl(events_df, home_team_id, source):
    if (source == "Statsbomb"):
        spadl_events_df = convert_to_actions_statsbomb(events_df, home_team_id)
    else:
        spadl_events_df = convert_to_actions_wyscout(events_df, home_team_id)
    spadl_events_df['time_seconds'] = spadl_events_df['time_seconds'].astype('float64')
    spadl_events_df['timestamp'] = pd.to_datetime(spadl_events_df['time_seconds'], unit='s')
    return spadl_events_df

In [8]:
# FUNCTION TO ADD ADDITIONAL INFO IN RAW SPADL DATAFRAME
STANDARD_LENGTH_COURT = 105
STANDARD_WIDTH_COURT = 68
STANDARD_GOALLINE_WIDTH = 7.32
STANDARD_LENGTH_COURT_STATSBOMB = 120
STANDARD_WIDTH_COURT_STATSBOMB = 80

# Helper Functions
def filter_out_is_home_team_apply_df(row, home_team_id):
    return 1 if row['team_id'] == home_team_id else 0

def filter_out_take_on_or_dribble_apply_df(row, take_on_action_id):
    return 1 if row['action_id'] == take_on_action_id else 0

# Helper functions specific to statsbomb opponent data
def calculate_distance_between_two_coordinates(x1, y1, x2, y2):
    return math.sqrt(abs(x2-x1) ** 2 + abs(y2-y1) ** 2)

def filter_out_non_opponent_coordinate_freeze_frame(freeze_frame_360_list):
    if (freeze_frame_360_list == None or not isinstance(freeze_frame_360_list, list)):
        return []
    return [x for x in freeze_frame_360_list if x['teammate'] == False and x['actor'] == False]

def convert_statsbomb_coordinate_to_spadl_coordinate(coordinate_x, coordinate_y):
    converted_coordinate_x = (STANDARD_LENGTH_COURT / STANDARD_LENGTH_COURT_STATSBOMB) * coordinate_x
    converted_coordinate_y = (STANDARD_WIDTH_COURT / STANDARD_WIDTH_COURT_STATSBOMB) * coordinate_y
    return (converted_coordinate_x, converted_coordinate_y)

def calculate_distance_opponent_apply_df(row):
    freeze_frame_360_opponents = filter_out_non_opponent_coordinate_freeze_frame(row['freeze_frame_360'])
    list_distance_opponent = []
    for object_loc in freeze_frame_360_opponents:
        opponent_x, opponent_y = convert_statsbomb_coordinate_to_spadl_coordinate(object_loc['location'][0], object_loc['location'][1])
        distance_opponent = calculate_distance_between_two_coordinates(row['start_x'], row['start_y'], opponent_x, opponent_y)
        list_distance_opponent.append(distance_opponent)
    return min(list_distance_opponent) if len(list_distance_opponent) > 0 else 0

def calculate_num_opponent_closer_goal(start_x, start_y, freeze_frame_360, is_home_team):
    freeze_frame_360_opponents = filter_out_non_opponent_coordinate_freeze_frame(freeze_frame_360)
    if (is_home_team):
        coordinate_x_goal = STANDARD_LENGTH_COURT
    else:
        coordinate_x_goal = 0
    coordinate_y_goal = STANDARD_WIDTH_COURT / 2

    num_opponent_closer_to_goal = 0
    for object_loc in freeze_frame_360_opponents:
        opponent_x, opponent_y = convert_statsbomb_coordinate_to_spadl_coordinate(object_loc['location'][0], object_loc['location'][1])
        distance_passer_to_goal = calculate_distance_between_two_coordinates(start_x, start_y, coordinate_x_goal, coordinate_y_goal)
        distance_opponent_to_goal = calculate_distance_between_two_coordinates(opponent_x, opponent_y, coordinate_x_goal, coordinate_y_goal)
        if (distance_opponent_to_goal < distance_passer_to_goal):
            num_opponent_closer_to_goal += 1
    return num_opponent_closer_to_goal

def calculate_num_opponent_closer_goal_apply_df(row, home_team_id):
    return calculate_num_opponent_closer_goal(row['start_x'], row['start_y'], row['freeze_frame_360'], (row['team_id'] == home_team_id))

def calculate_num_opponent_in_path(start_x, start_y, freeze_frame_360):
    path_distance = 10
    freeze_frame_360_opponents = filter_out_non_opponent_coordinate_freeze_frame(freeze_frame_360)
    num_opponent_in_path = 0
    for object_loc in freeze_frame_360_opponents:
        opponent_x, opponent_y = convert_statsbomb_coordinate_to_spadl_coordinate(object_loc['location'][0], object_loc['location'][1])
        distance_with_opponent = calculate_distance_between_two_coordinates(start_x, start_y, opponent_x, opponent_y)
        if (distance_with_opponent <= path_distance):
            num_opponent_in_path += 1
    return num_opponent_in_path

def calculate_num_opponent_in_path_apply_df(row):
    return calculate_num_opponent_in_path(row['start_x'], row['start_y'], row['freeze_frame_360'])

# Add is_home_team column (boolean 0/1)
def add_is_home_team_column_to_spadl_df(spadl_df, home_team_id):
    spadl_df['is_home_team'] = spadl_df.apply(lambda x : filter_out_is_home_team_apply_df(x, home_team_id), axis=1)
    return spadl_df

# Add is_take_on column (boolean 0/1)
def add_is_take_on_column_to_spadl_df(spadl_df, take_on_action_id):
    spadl_df['is_take_on'] = spadl_df.apply(lambda x : filter_out_take_on_or_dribble_apply_df(x, take_on_action_id), axis=1)
    return spadl_df

# Opponent Feature 1 : distance opponent
def add_distance_opponent_column_to_spadl_df(spadl_df):
    spadl_df['distance_opponent'] = spadl_df.apply(calculate_distance_opponent_apply_df, axis=1)
    return spadl_df

# Opponent Feature 2 : opponents closer to goal
def add_num_opponent_closer_goal_column_to_spadl_df(spadl_df, home_team_id):
    spadl_df['num_opponent_closer_goal'] = spadl_df.apply(lambda x : calculate_num_opponent_closer_goal_apply_df(x, home_team_id), axis=1)
    return spadl_df

# Opponent Feature 3 : opponents in path
def add_num_opponent_in_path_column_to_spadl_df(spadl_df):
    spadl_df['num_opponent_in_path'] = spadl_df.apply(calculate_num_opponent_in_path_apply_df, axis=1)
    return spadl_df

In [9]:
# Collect all dataset action specific type, export them to csv files
# Take_on (action_id = 7), Dribble (action_id = 21)
DRIBBLE_ACTION_ID = [7, 21] 
TAKE_ON_ACTION_ID = 7

def collect_raw_dribble_spadl_df(source="Wyscout", period=1):
    if source == "Statsbomb":
        api = api_statsbomb
    else:
        api = api_wyscout
    list_competitions_ids = []
    list_game_ids = []

    competitions_df = api.competitions()
    for _, row in competitions_df.iterrows():
        if source == "Statsbomb":
            if row['competition_gender'] == 'male':
                list_competitions_ids.append((row['competition_id'], row['season_id']))
        else:
            list_competitions_ids.append((row['competition_id'], row['season_id']))
        
    for competition_id, season_id in list_competitions_ids:
        games_df = api.games(competition_id, season_id)
        for _, row in games_df.iterrows():
            list_game_ids.append((row['game_id'], row['home_team_id'], row['away_team_id']))
            
    for game_id, home_team_id, away_team_id in list_game_ids:
        try:
            if (source == "Statsbomb"):
                this_game_events_df = api.events(game_id, load_360=True)
            else:
                this_game_events_df = api.events(game_id)
            this_game_events_spadl_df = convert_events_df_to_spadl(this_game_events_df, home_team_id, source)

            # Add column 360 data into events spadl data (Statsbomb)
            if (source == "Statsbomb"):
                this_game_events_spadl_df = pd.merge(this_game_events_spadl_df, this_game_events_df[["event_id", "visible_area_360", "freeze_frame_360"]], how="inner", left_on="original_event_id", right_on="event_id")
                this_game_events_spadl_df.dropna(subset=["freeze_frame_360"])
            
            # Filter action id with type dribble only, pick only data from first period
            this_game_events_spadl_df = this_game_events_spadl_df[this_game_events_spadl_df['type_id'].isin(DRIBBLE_ACTION_ID)]
            if (period != None):
                this_game_events_spadl_df = this_game_events_spadl_df[this_game_events_spadl_df['period_id'] == period]
            else:
                this_game_events_spadl_df = this_game_events_spadl_df[this_game_events_spadl_df['period_id'] == 1]
            
            # Add additional computed column to support xDribble model
            this_game_events_spadl_df = add_is_home_team_column_to_spadl_df(this_game_events_spadl_df, home_team_id)
            this_game_events_spadl_df = add_is_take_on_column_to_spadl_df(this_game_events_spadl_df, TAKE_ON_ACTION_ID)
            if (source == "Statsbomb"):
                this_game_events_spadl_df = add_distance_opponent_column_to_spadl_df(this_game_events_spadl_df)
                this_game_events_spadl_df = add_num_opponent_closer_goal_column_to_spadl_df(this_game_events_spadl_df, home_team_id)
                this_game_events_spadl_df = add_num_opponent_in_path_column_to_spadl_df(this_game_events_spadl_df)
                
            # Export to external csv iteratively
            this_game_events_spadl_df.to_csv(f'data/training_data_xdribble/{game_id}_{home_team_id}_{away_team_id}_xdribble_data.csv')
        
        except FileNotFoundError:
            print(f'File 360 data not found {game_id}-{home_team_id}-{away_team_id}') 

In [10]:
# FUNCTIONS TO CREATE ALL DATASET PLAYERS
def collect_raw_all_players_df(source="Wyscout"):
    if (source == "Statsbomb"):
        api = api_statsbomb
    else:
        api = api_wyscout
    list_competitions_ids = []
    list_game_ids = []

    competitions_df = api.competitions()
    for _, row in competitions_df.iterrows():
        if (source == "Statsbomb"):
            if (row['competition_gender'] == 'male'):
                list_competitions_ids.append((row['competition_id'], row['season_id']))
        else:
            list_competitions_ids.append((row['competition_id'], row['season_id']))
        
    for competition_id, season_id in list_competitions_ids:
        games_df = api.games(competition_id, season_id)
        for _, row in games_df.iterrows():
            list_game_ids.append((row['game_id'], row['home_team_id'], row['away_team_id']))

    for game_id, home_team_id, away_team_id in list_game_ids:
        players_df = api.players(game_id)
        if (source == "Statsbomb"):
            players_df.to_csv(f'data/training_data_players_statsbomb/{game_id}_{home_team_id}_{away_team_id}_players_data.csv')
        else:
            players_df.to_csv(f'data/training_data_players_wyscout/{game_id}_{home_team_id}_{away_team_id}_players_data.csv')

def load_and_concat_players_df_from_csv(path_to_raw_players_df):
    list_raw_players_df = []
    for filename in os.listdir(path_to_raw_players_df):
        f = os.path.join(path_to_raw_players_df, filename)
        if os.path.isfile(f):
            players_df = pd.read_csv(f)
            list_raw_players_df.append(players_df)
    merged_players_df = pd.concat(list_raw_players_df)
    merged_players_df = merged_players_df.drop_duplicates(subset='player_id').reset_index()
    return merged_players_df

def load_csv_players_data_sofifa(path_to_sofifa_file):
    return pd.read_csv(path_to_sofifa_file)

In [11]:
# Comment it if players dataset already loaded
# collect_raw_all_players_df(source="Statsbomb")

In [12]:
# Merge wyscout player datasets with sofifa datasets by matching string name
def create_maps_for_name_matching_scores(list_unique_names_df_1, list_unique_names_df_2):
    maps_name_matching_score = {}
    for name_1 in list_unique_names_df_1:
        for name_2 in list_unique_names_df_2:
            maps_name_matching_score[(name_1, name_2)] = fuzz.ratio(name_1, name_2)
    return maps_name_matching_score

def filter_out_maps_for_name_matching_scores(maps_name_matching, threshold):
    filtered_maps_name_matching = {}
    for name_1, name_2 in maps_name_matching:
        if (maps_name_matching[(name_1, name_2)] >= threshold):
            filtered_maps_name_matching[(name_1, name_2)] = maps_name_matching[(name_1, name_2)]
    return filtered_maps_name_matching

def merge_big_dataframe_wyscout_with_sofifa(big_dataframe_players, sofifa_players_dataset, maps_name_matching_score):
    # Preprocess both dataframes and add prefix 1- and 2- to all column names to avoid duplicate column names
    big_dataframe_players.dropna(subset=['player_name'], inplace=True)
    big_dataframe_players.rename(columns=lambda x: '1-'+x, inplace=True)
    sofifa_players_dataset.dropna(subset=['full_name'], inplace=True)
    sofifa_players_dataset.rename(columns=lambda x: '2-'+x, inplace=True)
    # Merge into new empty dataframe one by one by iterating maps name matching score
    big_dataframe_players_with_sofifa = pd.DataFrame(columns=list(big_dataframe_players.columns)+list(sofifa_players_dataset.columns), index=[0])
    big_dataframe_players_with_sofifa.reset_index(inplace=True)
    for name_1, name_2 in maps_name_matching_score:
        row_from_big_dataframe_players = big_dataframe_players[big_dataframe_players['1-player_name'] == name_1].iloc[0]
        row_from_sofifa_players_dataset = sofifa_players_dataset[sofifa_players_dataset['2-full_name'] == name_2].iloc[0]
        new_row = pd.concat([row_from_big_dataframe_players, row_from_sofifa_players_dataset], axis=0, ignore_index=False)
        new_row = pd.DataFrame([new_row]).reset_index()
        big_dataframe_players_with_sofifa = pd.concat([big_dataframe_players_with_sofifa, new_row])
    # Remove prefix 1- and 2- from final big datasets
    big_dataframe_players_with_sofifa.rename(columns=lambda x: x[2:], inplace=True)
    return big_dataframe_players_with_sofifa

SOURCE_DATA = "Statsbomb"
if (SOURCE_DATA == "Statsbomb"):
    DIRECTORY_PLAYERS_CSV_DATAS = "data/training_data_players_statsbomb"
else:
    DIRECTORY_PLAYERS_CSV_DATAS = "data/training_data_players_wyscout"
DIRECTORY_SOFIFA_CSV_DATAS = "data/players_skill_dataset/sofifa_dataset_cleaned.csv"
DIRECTORY_WYSCOUT_CSV_DATAS = "data/players_skill_dataset/wyscout_dataset_cleaned.csv"
DIRECTORY_STATSBOMB_CSV_DATAS = "data/players_skill_dataset/statsbomb_dataset_cleaned.csv"
DIRECTORY_FINAL_PLAYERS_CSV_DATAS = "data/players_skill_dataset/final_players_skill_dataset.csv"

# COMMENT BELOW SNIPPET CODES IF FINAL PLAYER DATASETS WITH SKILL ALREADY GENERATED !!
# big_dataframe_players = load_and_concat_players_df_from_csv(DIRECTORY_PLAYERS_CSV_DATAS)
# if (SOURCE_DATA == "Statsbomb"):
#     big_dataframe_players.to_csv(DIRECTORY_STATSBOMB_CSV_DATAS)
# else:
#     big_dataframe_players.to_csv(DIRECTORY_WYSCOUT_CSV_DATAS)
# sofifa_players_dataset = load_csv_players_data_sofifa(DIRECTORY_SOFIFA_CSV_DATAS)

# maps_name_matching_score = create_maps_for_name_matching_scores(big_dataframe_players['player_name'].unique(), sofifa_players_dataset['full_name'].unique())
# maps_name_matching_score = filter_out_maps_for_name_matching_scores(maps_name_matching_score, threshold=80)

# big_dataframe_players_with_sofifa = merge_big_dataframe_wyscout_with_sofifa(big_dataframe_players, sofifa_players_dataset, maps_name_matching_score)
# big_dataframe_players_with_sofifa.reset_index(inplace=True)
# big_dataframe_players_with_sofifa = big_dataframe_players_with_sofifa.drop_duplicates(subset='player_id')
# big_dataframe_players_with_sofifa.to_csv(DIRECTORY_FINAL_PLAYERS_CSV_DATAS)

In [13]:
# MAIN DRIVER (comment it if csv files already loaded)
# collect_raw_dribble_spadl_df(source="Statsbomb")

In [14]:
# Load csv datas already retrieved then concat them into one big dataframe
DIRECTORY_XDRIBBLE_CSV_DATAS = "data/training_data_xdribble"

def load_and_concat_xdribble_df_from_csv():
    list_pass_event_df = []
    for filename in os.listdir(DIRECTORY_XDRIBBLE_CSV_DATAS):
        f = os.path.join(DIRECTORY_XDRIBBLE_CSV_DATAS, filename)
        if os.path.isfile(f):
            pass_event_df = pd.read_csv(f)
            list_pass_event_df.append(pass_event_df)
    return pd.concat(list_pass_event_df)

In [15]:
# JOIN ALREADY CONSTRUCTED PLAYER SKILLS DATASET WITH ORIGIN EVENT DATASET WYSCOUT
player_skills_dataset = pd.read_csv(DIRECTORY_FINAL_PLAYERS_CSV_DATAS)
big_dataframe_xdribble_model = load_and_concat_xdribble_df_from_csv()
big_dataframe_xdribble_model = big_dataframe_xdribble_model.merge(player_skills_dataset, how='inner',on='player_id')
big_dataframe_xdribble_model.head()

Unnamed: 0,Unnamed: 0_x,game_id_x,original_event_id,period_id,time_seconds,team_id_x,player_id,start_x,start_y,end_x,...,LWB,LDM,CDM,RDM,RWB,LB,LCB,CB,RCB,RB
0,1,3788741,bea4235d-7e40-461c-bb82-6d473f5bb324,1,2.0,909,8963.0,27.794118,44.070886,31.058824,...,65+2,67+2,67+2,67+2,65+2,68+2,73+2,73+2,73+2,68+2
1,71,3788741,d4a29d8a-01f6-4ddb-87e1-05d429d81662,1,203.0,909,8963.0,23.558824,47.944304,24.264706,...,65+2,67+2,67+2,67+2,65+2,68+2,73+2,73+2,73+2,68+2
2,288,3788741,ce7d0f67-d9af-495f-83fe-d498f0ce0aed,1,657.0,909,8963.0,14.735294,40.8,13.235294,...,65+2,67+2,67+2,67+2,65+2,68+2,73+2,73+2,73+2,68+2
3,328,3788741,e43c65e9-33f1-4cda-93fe-9a918eb480ca,1,761.0,909,8963.0,43.323529,60.167089,48.0,...,65+2,67+2,67+2,67+2,65+2,68+2,73+2,73+2,73+2,68+2
4,332,3788741,c3bcba92-1a6f-492e-aced-8a64df6a52ea,1,767.0,909,8963.0,44.117647,62.491139,42.176471,...,65+2,67+2,67+2,67+2,65+2,68+2,73+2,73+2,73+2,68+2


In [16]:
# SELECT ONLY FEATURED COLUMN FROM BIG DATASETS
features_column_included = ["start_x", "start_y", "end_x", "end_y", "is_take_on", "distance_opponent", 
                            "num_opponent_closer_goal", "num_opponent_in_path", "result_id"]
player_skills_column_included = ["acceleration", "aggression", "agility", "balance", "ball_control",
                                 "composure", "crossing", "curve", "dribbling", "finishing",
                                 "freekick_accuracy", "heading_accuracy", "interceptions", "jumping", "long_passing",
                                 "long_shots", "marking", "penalties", "positioning", "reactions",
                                 "shot_power", "sliding_tackle", "sprint_speed", "stamina", "short_passing",
                                 "standing_tackle", "strength", "vision", "volleys"]
player_attribute_column_included = ["height_cm", "weight_kgs", "age"]

big_dataframe_xdribble_model = big_dataframe_xdribble_model[[c for c in big_dataframe_xdribble_model.columns if c in (features_column_included + player_skills_column_included + player_attribute_column_included)]]
big_dataframe_xdribble_model.head()

Unnamed: 0,start_x,start_y,end_x,end_y,result_id,is_take_on,distance_opponent,num_opponent_closer_goal,num_opponent_in_path,age,...,long_shots,aggression,interceptions,positioning,vision,penalties,composure,marking,standing_tackle,sliding_tackle
0,27.794118,44.070886,31.058824,42.693671,1,0,17.081977,10,0,22.0,...,42.0,81.0,71.0,42.0,49.0,52.0,59.0,69.0,82.0,79.0
1,23.558824,47.944304,24.264706,49.063291,1,0,14.231871,5,0,22.0,...,42.0,81.0,71.0,42.0,49.0,52.0,59.0,69.0,82.0,79.0
2,14.735294,40.8,13.235294,48.202532,1,0,11.364664,3,0,22.0,...,42.0,81.0,71.0,42.0,49.0,52.0,59.0,69.0,82.0,79.0
3,43.323529,60.167089,48.0,62.663291,1,0,22.928909,7,0,22.0,...,42.0,81.0,71.0,42.0,49.0,52.0,59.0,69.0,82.0,79.0
4,44.117647,62.491139,42.176471,62.491139,1,0,29.146285,8,0,22.0,...,42.0,81.0,71.0,42.0,49.0,52.0,59.0,69.0,82.0,79.0


In [17]:
# CASE 1 : Random Oversample Function
def training_data_random_oversampled(X_train, Y_train):
    ros = RandomOverSampler(random_state=42)
    X_resampled, Y_resampled = ros.fit_resample(X_train, Y_train)
    return (X_resampled, Y_resampled)

# CASE 2 : Random Undersample Function
def training_data_random_undersampled(X_train, Y_train):
    rus = RandomUnderSampler(random_state=42)
    X_resampled, Y_resampled = rus.fit_resample(X_train, Y_train)
    return (X_resampled, Y_resampled)

# CASE 3 : Random SMOTE Oversample Function
def training_data_smote_oversampled(X_train, Y_train):
    X_resampled, Y_resampled = SMOTE().fit_resample(X_train, Y_train)
    return (X_resampled, Y_resampled)

# V CASE 1 : Feature Selection - Pearson Coefficient
def filter_columns_feature_selection_pearson(X_train, Y_train, columns_considered, threshold):
    new_columns_after_selection = []
    for _, skill in enumerate(columns_considered):
        correlation_value, _ = pearsonr(X_train[skill], Y_train)
        if correlation_value >= threshold:
            new_columns_after_selection.append(skill)
    return new_columns_after_selection

def training_data_feature_selection_pearson(X_train, Y_train, columns_considered, threshold):
    columns_selected = filter_columns_feature_selection_pearson(X_train, Y_train, columns_considered, threshold)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# V CASE 2 : Feature Selection - Chi Square
def filter_columns_feature_selection_chisquare(X_train, Y_train, columns_considered, num_of_features):
    chi2_selector = SelectKBest(chi2, k=num_of_features) 
    df_feature = X_train[columns_considered]
    chi2_selector.fit(df_feature, Y_train)
    cols = chi2_selector.get_support(indices=True)
    df_selected_features = df_feature.iloc[:,cols]
    return df_selected_features.columns

def training_data_feature_selection_chisquare(X_train, Y_train, columns_considered, num_of_features):
    columns_selected = filter_columns_feature_selection_chisquare(X_train, Y_train, columns_considered, num_of_features)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# V CASE 3 : Feature Selection - Mutual Information
def filter_columns_feature_selection_mutualinf(X_train, Y_train, columns_considered, num_of_features):
    mi_selector = SelectKBest(mutual_info_classif, k=num_of_features) 
    df_feature = X_train[columns_considered]
    mi_selector.fit(df_feature, Y_train)
    cols = mi_selector.get_support(indices=True)
    df_selected_features = df_feature.iloc[:,cols]
    return df_selected_features.columns

def training_data_feature_selection_mutualinf(X_train, Y_train, columns_considered, num_of_features):
    columns_selected = filter_columns_feature_selection_mutualinf(X_train, Y_train, columns_considered, num_of_features)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# V CASE 4 : Feature Selection - mRMR Selection
def filter_columns_feature_selection_mrmr(X_train, Y_train, columns_considered, num_of_features):
    df_feature = X_train[columns_considered]
    selected_features = mrmr_classif(X=df_feature, y=Y_train, K=num_of_features)
    return selected_features

def training_data_feature_selection_mrmr(X_train, Y_train, columns_considered, num_of_features):
    columns_selected = filter_columns_feature_selection_mrmr(X_train, Y_train, columns_considered, num_of_features)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# X CASE 5 : Feature Selection - Sequential Forward Selection (SFS)
def filter_columns_feature_selection_sfs(X_train, Y_train, columns_considered, num_of_features):
    rf = RandomForestClassifier()
    sfs = SequentialFeatureSelector(rf, n_features_to_select=num_of_features, direction='forward')
    df_feature = X_train[columns_considered]
    sfs.fit(df_feature, Y_train)
    cols = sfs.get_support(indices=True)
    df_selected_features = df_feature.iloc[:,cols]
    return df_selected_features.columns

def training_data_feature_selection_sfs(X_train, Y_train, columns_considered, num_of_features):
    columns_selected = filter_columns_feature_selection_sfs(X_train, Y_train, columns_considered, num_of_features)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# X CASE 6 : Feature Selection - Sequential Backward Elimination (SBE)
def filter_columns_feature_selection_sbe(X_train, Y_train, columns_considered, num_of_features):
    rf = RandomForestClassifier()
    sfs = SequentialFeatureSelector(rf, n_features_to_select=num_of_features, direction='backward')
    df_feature = X_train[columns_considered]
    sfs.fit(df_feature, Y_train)
    cols = sfs.get_support(indices=True)
    df_selected_features = df_feature.iloc[:,cols]
    return df_selected_features.columns

def training_data_feature_selection_sbe(X_train, Y_train, columns_considered, num_of_features):
    columns_selected = filter_columns_feature_selection_sbe(X_train, Y_train, columns_considered, num_of_features)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# X CASE 7 : Feature Selection - Recursive Feature Elimination
def filter_columns_feature_selection_rfe(X_train, Y_train, columns_considered, num_of_features):
    estimator = LinearSVR()
    selector = RFECV(estimator, step=1, cv=num_of_features)
    df_feature = X_train[columns_considered]
    selector.fit(df_feature, Y_train)
    cols = selector.get_support(indices=True)
    df_selected_features = df_feature.iloc[:,cols]
    return df_selected_features.columns

def training_data_feature_selection_rfe(X_train, Y_train, columns_considered, num_of_features):
    columns_selected = filter_columns_feature_selection_rfe(X_train, Y_train, columns_considered, num_of_features)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# V CASE 8 : Feature Selection - Random Forest Embedded (rfembedded)
def filter_columns_feature_selection_rfembedded(X_train, Y_train, columns_considered, num_of_features):
    estimator = RandomForestClassifier()
    selector = SelectFromModel(estimator=estimator, max_features=num_of_features)
    df_feature = X_train[columns_considered]
    selector.fit(df_feature, Y_train)
    cols = selector.get_support(indices=True)
    df_selected_features = df_feature.iloc[:,cols]
    return df_selected_features.columns

def training_data_feature_selection_rfembedded(X_train, Y_train, columns_considered, num_of_features):
    columns_selected = filter_columns_feature_selection_rfembedded(X_train, Y_train, columns_considered, num_of_features)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# V CASE 9 : Feature Selection - LASSO
def filter_columns_feature_selection_lasso(X_train, Y_train, columns_considered, num_of_features):
    estimator = LogisticRegression(penalty='l2', C=0.5, solver='newton-cholesky')
    selector = SelectFromModel(estimator=estimator, max_features=num_of_features)
    df_feature = X_train[columns_considered]
    selector.fit(df_feature, Y_train)
    cols = selector.get_support(indices=True)
    df_selected_features = df_feature.iloc[:,cols]
    return df_selected_features.columns

def training_data_feature_selection_lasso(X_train, Y_train, columns_considered, num_of_features):
    columns_selected = filter_columns_feature_selection_lasso(X_train, Y_train, columns_considered, num_of_features)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# CASE 1 : Train with model XGBRegressor
def fit_and_train_with_model_xgbregressor(X_train, Y_train):
    model = XGBRegressor(objective="reg:logistic")
    model.fit(X_train, Y_train)
    return model

# CASE 2 : Train with model RandomForestRegressor
def fit_and_train_with_model_rfregressor(X_train, Y_train):
    model = RandomForestRegressor()
    model.fit(X_train, Y_train)
    return model

# CASE 3 : Train with model LogisticRegression
def fit_and_train_with_model_logregression(X_train, Y_train):
    model = LogisticRegression()
    model.fit(X_train, Y_train)
    return model

# CASE 4 : Train with model XGBClassifier
def fit_and_train_with_model_xgbclassifier(X_train, Y_train):
    model = XGBClassifier(n_estimators=50, max_depth=3, n_jobs=-3, verbosity=1, enable_categorical=True)
    model.fit(X_train, Y_train)
    return model

# CASE 5 : Train with model Catboost Classifier 
def fit_and_train_with_model_catboostclassifier(X_train, Y_train):
    model = CatBoostClassifier(n_estimators=50, max_depth=3, verbose=1)
    model.fit(X_train, Y_train)
    return model

# CASE 6 : Train with model RandomForest Classifier
def fit_and_train_with_model_rfclassifier(X_train, Y_train):
    model = RandomForestClassifier(n_estimators=50, max_depth=3, n_jobs=-3, verbose=1)
    model.fit(X_train, Y_train)
    return model

In [18]:
# FEATURE PREPROCESSING BIG DATASETS AND CREATE XGBOOST MODEL
# 1. Change all numeric columns with MinMaxScaler
scaler = preprocessing.MinMaxScaler(feature_range=(0,1))
columns_minmax_scaler = player_skills_column_included + player_attribute_column_included + \
                        ["start_x", "start_y", "end_x", "end_y", "distance_opponent", 
                         "num_opponent_closer_goal", "num_opponent_in_path"]
big_dataframe_xdribble_model[columns_minmax_scaler] = scaler.fit_transform(big_dataframe_xdribble_model[columns_minmax_scaler])

# 2. Check if data is unbalanced. If it is unbalanced, then do method to oversize the sample
print(big_dataframe_xdribble_model['result_id'].value_counts())

# 3. Change result_id label into float64 type
# big_dataframe_xdribble_model['result_id'] = big_dataframe_xdribble_model['result_id'].astype('float64')

# 4. Remove dataframe instead of having result_id (0,1) --> (fail, success)
big_dataframe_xdribble_model = big_dataframe_xdribble_model[big_dataframe_xdribble_model['result_id'].isin([0,1])]
print(big_dataframe_xdribble_model['result_id'].value_counts())

# 5. Split train data and test data from Big Datasets
all_feature_columns = columns_minmax_scaler + ["is_take_on"]
X_train = big_dataframe_xdribble_model[all_feature_columns]
Y_train = big_dataframe_xdribble_model["result_id"]

# Empty dataframe for saving test result
empty_test_result = pd.DataFrame(columns=COLUMNS_EXPERIMENT_RESULT, index=[0])

for case_number in sorted(list(CONFIG_EXPERIMENTS_SCENARIO_MAP.keys())):
    sampling_opt = CONFIG_EXPERIMENTS_SCENARIO_MAP[case_number]["sampling_opt"]
    feature_selection_opt = CONFIG_EXPERIMENTS_SCENARIO_MAP[case_number]["feature_selection_opt"]
    algorithm_opt = CONFIG_EXPERIMENTS_SCENARIO_MAP[case_number]["algorithm_opt"]

    # 6. Do oversampling/undersampling and feature selection at same time
    if sampling_opt == "none":
        X_resampled, Y_resampled = X_train, Y_train
    else:
        X_resampled, Y_resampled = globals()["training_data_" + sampling_opt](X_train, Y_train)
    if feature_selection_opt == "pearson":
        threshold = 0.5
        X_feature_sel, Y_feature_sel = globals()["training_data_feature_selection_" + feature_selection_opt](X_resampled, Y_resampled, player_skills_column_included, threshold)
    else:
        num_of_features = 10
        X_feature_sel, Y_feature_sel = globals()["training_data_feature_selection_" + feature_selection_opt](X_resampled, Y_resampled, player_skills_column_included, num_of_features)

    # 7. Do train_test_split on training data
    X_train_split, X_test_split, y_train_split, y_test_split = train_test_split(X_feature_sel, Y_feature_sel, test_size=0.2, random_state=42)

    # 8. Train Model
    model = globals()["fit_and_train_with_model_" + algorithm_opt](X_train_split, y_train_split)

    # 9. Predict Testing Data
    y_predict = model.predict(X_test_split)

    # 10. Save test result experiment
    if (USE_EVALUATION_METRIC_CLASSIFICATION):
        rec_score = recall_score(y_test_split, y_predict)
        prec_score = precision_score(y_test_split, y_predict)
        F1_score = f1_score(y_test_split, y_predict)
        acc_score = accuracy_score(y_test_split, y_predict)
        auc_score = roc_auc_score(y_test_split, y_predict)
        mcc_score = matthews_corrcoef(y_test_split, y_predict)
        brier_score = brier_score_loss(y_test_split, y_predict)
        log_loss_score = log_loss(y_test_split, y_predict)
        balanced_acc_score = balanced_accuracy_score(y_test_split, y_predict)
    else:
        mean_squared_error_score = mean_squared_error(y_test_split, y_predict)
        root_mean_squared_error_score = mean_squared_error(y_test_split, y_predict, squared=False)
        auc_score = roc_auc_score(y_test_split, y_predict)
        brier_score = brier_score_loss(y_test_split, y_predict)
        log_loss_score = log_loss(y_test_split, y_predict)
        mean_absolute_error_score = mean_absolute_error(y_test_split, y_predict)
        r_squared_score = r2_score(y_test_split, y_predict)
        mean_absolute_percentage_error_score = mean_absolute_percentage_error(y_test_split, y_predict)

    maps_new_row = {}
    if USE_EVALUATION_METRIC_CLASSIFICATION:
        eval_metrics_column = COLUMNS_EVALUATION_METRIC_CLASSIFICATION
    else:
        eval_metrics_column = COLUMNS_EVALUATION_METRIC_REGRESSION
    for column in COLUMNS_EXPERIMENT_RESULT:
        if column not in eval_metrics_column:
            if column == "case_number":
                maps_new_row["case_number"] = case_number
            elif column in COLUMNS_SCENARIO_NAME:
                maps_new_row[column] = globals()[column]
        else:
            maps_new_row[column] = globals()[column]     
    new_row = pd.DataFrame(maps_new_row, index=[0])
    empty_test_result = pd.concat([new_row, empty_test_result.loc[:]]).reset_index(drop=True)

    # 11. Save model to external file
    filename = f'xdribble_model_case_{case_number}.sav'
    directory_model = "data/model_xdribble/"
    pickle.dump(model, open(directory_model + filename, 'wb'))

# 12. Save test result experiment to external file
filename = 'xdribble_test_model_experiment_result.csv'
directory_model = "data/model_xdribble/"
empty_test_result.to_csv(directory_model + filename)

1    40194
0      542
Name: result_id, dtype: int64
1    40194
0      542
Name: result_id, dtype: int64
['height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.3332593	total: 103ms	remaining: 5.04s
1:	learn: 0.2003319	total: 110ms	remaining: 2.65s
2:	learn: 0.1355954	total: 116ms	remaining: 1.82s
3:	learn: 0.0962761	total: 122ms	remaining: 1.41s
4:	learn: 0.0779829	total: 128ms	remaining: 1.15s
5:	learn: 0.0684673	total: 136ms	remaining: 997ms
6:	learn: 0.0640005	total: 142ms	remaining: 870ms
7:	learn: 0.0614631	total: 147ms	remaining: 769ms
8:	learn: 0.0597923	total: 151ms	remaining: 690ms
9:	learn: 0.0571168	total: 158ms	remaining: 631ms
10:	learn: 0.0564550	total: 162ms	remaining: 57

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.2s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'agility', 'finishing', 'interceptions', 'long_shots', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['acceleration', 'agility', 'finishing', 'interceptions', 'long_shots', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.1358114	total: 35.3ms	remaining: 1.73s
1:	learn: 0.0791825	total: 54.6ms	remaining: 1.31s
2:	learn: 0.0638389	total: 70.2ms	remaining: 1.1s
3:	learn: 0.0614810	total: 84.2ms	remaining: 969ms
4:	learn: 0.0606027	total: 97.2ms	remaining: 875ms
5:	learn: 0.0590289	total: 112ms	remaining: 820ms
6:	learn: 0.0573161	total: 126ms	remaining: 774ms
7:	lear

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.2s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'agility', 'crossing', 'finishing', 'interceptions', 'marking', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['acceleration', 'crossing', 'finishing', 'heading_accuracy', 'interceptions', 'marking', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.1358114	total: 10.2ms	remaining: 501ms
1:	learn: 0.0789256	total: 19.8ms	remaining: 476ms
2:	learn: 0.0682773	total: 29.4ms	remaining: 461ms
3:	learn: 0.0658346	total: 38.4ms	remaining: 442ms
4:	learn: 0.0568748	total: 47.1ms	remaining: 424ms
5:	learn: 0.0547306	total: 56.9ms	remaining: 417ms
6:	learn: 0.0540808	total: 65.7ms	remaining: 404

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.2s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished
100%|██████████| 10/10 [00:00<00:00, 10.60it/s]


['acceleration', 'agility', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']


100%|██████████| 10/10 [00:00<00:00, 10.83it/s]


['acceleration', 'agility', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.1351647	total: 11.4ms	remaining: 560ms
1:	learn: 0.0775605	total: 20.9ms	remaining: 502ms
2:	learn: 0.0679938	total: 30.8ms	remaining: 483ms
3:	learn: 0.0650366	total: 40.1ms	remaining: 462ms
4:	learn: 0.0570334	total: 49.4ms	remaining: 445ms
5:	learn: 0.0557959	total: 58.3ms	remaining: 428ms
6:	learn: 0.0541229	total: 68.6ms	remaining: 421ms
7:	learn: 0.0536387	total: 78.2ms	remaining: 411ms
8:	learn: 0.0527452	total: 88.3ms	remaining: 402ms
9:	learn: 0.0524259	total: 97ms	remaining: 388ms
10:	learn: 0.0521285	total: 107ms	remaining: 378ms
11:	learn: 0.0514862	total: 117ms	remaining: 369ms
12:	learn: 0.0510970	total: 126ms	remaining: 357ms
13:	l

100%|██████████| 10/10 [00:00<00:00, 10.26it/s]
[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.


['acceleration', 'agility', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']


[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.2s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'dribbling', 'finishing', 'freekick_accuracy', 'interceptions', 'marking', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['acceleration', 'agility', 'finishing', 'freekick_accuracy', 'interceptions', 'marking', 'sliding_tackle', 'standing_tackle', 'strength', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.1358114	total: 10.4ms	remaining: 508ms
1:	learn: 0.0789256	total: 20.6ms	remaining: 495ms
2:	learn: 0.0646908	total: 29.5ms	remaining: 462ms
3:	learn: 0.0609110	total: 39.4ms	remaining: 453ms
4:	learn: 0.0600453	total: 49.7ms	remaining: 448ms
5:	learn: 0.0589076	total: 59.7ms	remaining: 438ms
6:	learn: 0.0571896	total: 70ms	remaining: 430ms
7:	

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.2s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'aggression', 'crossing', 'dribbling', 'freekick_accuracy', 'interceptions', 'marking', 'reactions', 'stamina', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['acceleration', 'aggression', 'crossing', 'dribbling', 'freekick_accuracy', 'interceptions', 'marking', 'reactions', 'stamina', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.1356887	total: 14.5ms	remaining: 709ms
1:	learn: 0.0791993	total: 27.3ms	remaining: 654ms
2:	learn: 0.0678574	total: 38ms	remaining: 596ms
3:	learn: 0.0653192	total: 51.9ms	remaining: 597ms
4:	learn: 0.0640739	total: 73.8ms	remaining: 665ms
5:	learn: 0.0633460	total: 85.2ms	remaining: 625ms
6:	learn: 0.0625686	total: 104ms	remaining: 642ms
7:	learn: 0.0617608

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.2s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.6016985	total: 6.36ms	remaining: 311ms
1:	learn: 0.5734223	total: 12.3ms	remaining: 294ms
2:	learn: 0.5554318	total: 18ms	remaining: 281ms
3:	learn: 0.5421125	total: 24ms	remaining: 276ms
4:	learn: 0.5300425	total: 30.1ms	remaining: 271ms
5:	learn: 0.5040014	total: 36.3ms	remaining: 267ms
6:	learn: 0.4943938	total: 42.8ms	remaining: 263ms
7:	learn: 0.4850209	total: 48.7ms	remaining: 256ms
8:	learn: 0.4757131	total: 55.2ms	remaining: 251ms
9:	learn: 0.4680002	total: 60.7ms	remaining: 243ms
10:	learn: 0.4643054	total: 66.8ms	remaining: 237ms
11:	learn: 0.4566392	total: 73.4ms	remaining: 233ms
12:	learn: 0.4493689	total: 78.7ms	remain

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.4s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'agility', 'finishing', 'interceptions', 'long_shots', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['acceleration', 'agility', 'finishing', 'interceptions', 'long_shots', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.6272655	total: 13.9ms	remaining: 682ms
1:	learn: 0.5504842	total: 27.9ms	remaining: 670ms
2:	learn: 0.5246722	total: 41.2ms	remaining: 646ms
3:	learn: 0.5120492	total: 56.8ms	remaining: 653ms
4:	learn: 0.5012200	total: 70.6ms	remaining: 635ms
5:	learn: 0.4922661	total: 84ms	remaining: 616ms
6:	learn: 0.4787744	total: 95.6ms	remaining: 587ms
7:	lea

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.4s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'agility', 'dribbling', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['acceleration', 'agility', 'dribbling', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.6245520	total: 15.6ms	remaining: 762ms
1:	learn: 0.5926142	total: 29.8ms	remaining: 715ms
2:	learn: 0.5687628	total: 40.2ms	remaining: 629ms
3:	learn: 0.5474386	total: 52.6ms	remaining: 605ms
4:	learn: 0.5369333	total: 63.9ms	remaining: 575ms
5:	learn: 0.5118656	total: 75ms	remaining: 550ms
6:	learn: 0.4979991	total: 86.9ms	remaining: 534ms
7:	learn

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.4s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished
100%|██████████| 10/10 [00:02<00:00,  4.74it/s]


['acceleration', 'agility', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']


100%|██████████| 10/10 [00:02<00:00,  4.79it/s]


['acceleration', 'agility', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.6272655	total: 14.3ms	remaining: 700ms
1:	learn: 0.5504842	total: 26.5ms	remaining: 635ms
2:	learn: 0.5246722	total: 39.8ms	remaining: 623ms
3:	learn: 0.5121315	total: 52.1ms	remaining: 599ms
4:	learn: 0.4999498	total: 63.1ms	remaining: 568ms
5:	learn: 0.4892225	total: 75.1ms	remaining: 551ms
6:	learn: 0.4736606	total: 87.3ms	remaining: 536ms
7:	learn: 0.4517469	total: 99.8ms	remaining: 524ms
8:	learn: 0.4473388	total: 113ms	remaining: 517ms
9:	learn: 0.4415842	total: 126ms	remaining: 502ms
10:	learn: 0.4314275	total: 138ms	remaining: 491ms
11:	learn: 0.4271902	total: 148ms	remaining: 470ms
12:	learn: 0.4120801	total: 159ms	remaining: 454ms
13:	l

100%|██████████| 10/10 [00:02<00:00,  4.79it/s]
[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.


['acceleration', 'agility', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']


[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.3s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'agility', 'crossing', 'finishing', 'heading_accuracy', 'interceptions', 'marking', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['acceleration', 'agility', 'finishing', 'heading_accuracy', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.6276248	total: 14.9ms	remaining: 732ms
1:	learn: 0.5935770	total: 31.4ms	remaining: 753ms
2:	learn: 0.5013446	total: 47.1ms	remaining: 739ms
3:	learn: 0.4784176	total: 59.3ms	remaining: 682ms
4:	learn: 0.4619078	total: 73.7ms	remaining: 664ms
5:	learn: 0.4431100	total: 87.6ms	remaining: 642ms
6:	learn: 0.4349112	total: 101ms	rem

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.4s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['aggression', 'ball_control', 'crossing', 'curve', 'dribbling', 'marking', 'reactions', 'stamina', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['aggression', 'ball_control', 'crossing', 'curve', 'dribbling', 'marking', 'reactions', 'stamina', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.6309868	total: 32.3ms	remaining: 1.58s
1:	learn: 0.6047161	total: 57ms	remaining: 1.37s
2:	learn: 0.5778781	total: 77.8ms	remaining: 1.22s
3:	learn: 0.5608970	total: 95.1ms	remaining: 1.09s
4:	learn: 0.5422959	total: 112ms	remaining: 1.01s
5:	learn: 0.5166873	total: 128ms	remaining: 940ms
6:	learn: 0.5007302	total: 143ms	remaining: 880ms
7:	learn: 0.4847725	total: 156ms	remaining: 820ms
8:	learn: 0.4772029	total: 16

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.4s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.151186
0:	learn: 0.6671805	total: 1.21ms	remaining: 59.5ms
1:	learn: 0.6460357	total: 2.28ms	remaining: 54.7ms
2:	learn: 0.6400293	total: 3.35ms	remaining: 52.5ms
3:	learn: 0.6220983	total: 4.8ms	remaining: 55.2ms
4:	learn: 0.6068546	total: 6.02ms	remaining: 54.2ms
5:	learn: 0.6034946	total: 6.94ms	remaining: 50.9ms
6:	learn: 0.5991608	total: 7.79ms	remaining: 47.9ms
7:	learn: 0.5946991	total: 8.69ms	remaining: 45.6ms
8:	learn: 0.5910837	total: 9.75ms	remaining: 44.4ms
9:	learn: 0.5886159	total: 10.9ms	remaining: 43.4ms
10:	learn: 0.5844085	total: 11.8ms	remaining: 42ms
11:	learn: 0.5811150	total: 12.7ms	remaining: 40.2ms
12:	learn: 0.5782236	to

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.0s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'agility', 'finishing', 'interceptions', 'long_shots', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['acceleration', 'agility', 'finishing', 'interceptions', 'long_shots', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.151186
0:	learn: 0.6706342	total: 2.64ms	remaining: 130ms
1:	learn: 0.6551115	total: 5.72ms	remaining: 137ms
2:	learn: 0.6417822	total: 8.32ms	remaining: 130ms
3:	learn: 0.6300173	total: 10.5ms	remaining: 121ms
4:	learn: 0.6045412	total: 12.8ms	remaining: 115ms
5:	learn: 0.5807123	total: 15.3ms	remaining: 112ms
6:	learn: 0.5735208	total: 17.6ms	remaining: 108ms

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.0s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'agility', 'dribbling', 'finishing', 'interceptions', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['acceleration', 'balance', 'dribbling', 'finishing', 'freekick_accuracy', 'interceptions', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.151186
0:	learn: 0.6742299	total: 2.52ms	remaining: 124ms
1:	learn: 0.6578025	total: 5.02ms	remaining: 121ms
2:	learn: 0.6226506	total: 7.54ms	remaining: 118ms
3:	learn: 0.6123764	total: 9.85ms	remaining: 113ms
4:	learn: 0.6012897	total: 12.9ms	remaining: 116ms
5:	learn: 0.5927993	total: 15.7ms	remaining: 115ms
6:	learn: 0.5839849	total: 18.6ms	rem

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.0s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished
100%|██████████| 10/10 [00:00<00:00, 27.77it/s]


['acceleration', 'agility', 'finishing', 'interceptions', 'jumping', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']


100%|██████████| 10/10 [00:00<00:00, 27.66it/s]


['acceleration', 'agility', 'finishing', 'interceptions', 'jumping', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.151186
0:	learn: 0.6706342	total: 2.8ms	remaining: 137ms
1:	learn: 0.6551115	total: 5.41ms	remaining: 130ms
2:	learn: 0.6417822	total: 8ms	remaining: 125ms
3:	learn: 0.6300173	total: 10.8ms	remaining: 124ms
4:	learn: 0.6045412	total: 13.7ms	remaining: 124ms
5:	learn: 0.5807123	total: 16.2ms	remaining: 119ms
6:	learn: 0.5748267	total: 19.3ms	remaining: 119ms
7:	learn: 0.5663381	total: 21.8ms	remaining: 115ms
8:	learn: 0.5616188	total: 24.4ms	remaining: 111ms
9:	learn: 0.5543960	total: 27.1ms	remaining: 109ms
10:	learn: 0.5507074	total: 29.7ms	remaining: 105ms
11:	learn: 0.5416162	total: 32.5ms	remaining: 103ms
12:	learn: 0.5389668	total: 35ms	remaining: 99.7ms
13:	l

100%|██████████| 10/10 [00:00<00:00, 26.97it/s]
[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.0s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'agility', 'finishing', 'interceptions', 'jumping', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['agility', 'finishing', 'heading_accuracy', 'interceptions', 'jumping', 'marking', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['acceleration', 'agility', 'finishing', 'heading_accuracy', 'interceptions', 'jumping', 'marking', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.151186
0:	learn: 0.6704819	total: 3.87ms	remaining

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.0s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'aggression', 'curve', 'dribbling', 'interceptions', 'jumping', 'reactions', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['acceleration', 'aggression', 'curve', 'dribbling', 'interceptions', 'jumping', 'reactions', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.151186
0:	learn: 0.6727982	total: 3.17ms	remaining: 155ms
1:	learn: 0.6581404	total: 5.51ms	remaining: 132ms
2:	learn: 0.6420840	total: 7.65ms	remaining: 120ms
3:	learn: 0.6096812	total: 9.93ms	remaining: 114ms
4:	learn: 0.5858028	total: 12.1ms	remaining: 109ms
5:	learn: 0.5811324	total: 14.5ms	remaining: 106ms
6:	learn: 0.5728137	total: 17.2ms	remaining: 106ms
7:	learn: 0

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.0s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.6449693	total: 7.72ms	remaining: 378ms
1:	learn: 0.6148959	total: 16.2ms	remaining: 390ms
2:	learn: 0.5423283	total: 23.1ms	remaining: 363ms
3:	learn: 0.5262783	total: 30.2ms	remaining: 347ms
4:	learn: 0.5093886	total: 38.2ms	remaining: 343ms
5:	learn: 0.4825850	total: 45.9ms	remaining: 337ms
6:	learn: 0.4680641	total: 53.7ms	remaining: 330ms
7:	learn: 0.4553023	total: 60.6ms	remaining: 318ms
8:	learn: 0.4437783	total: 68ms	remaining: 310ms
9:	learn: 0.4213482	total: 73.5ms	remaining: 294ms
10:	learn: 0.4125722	total: 79.3ms	remaining: 281ms
11:	learn: 0.3906642	total: 84.9ms	remaining: 269ms
12:	learn: 0.3793938	total: 92.1ms	rema

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.7s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'agility', 'finishing', 'interceptions', 'long_shots', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['acceleration', 'agility', 'finishing', 'interceptions', 'long_shots', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.6089404	total: 12.6ms	remaining: 619ms
1:	learn: 0.5477111	total: 27ms	remaining: 649ms
2:	learn: 0.5242808	total: 39.1ms	remaining: 612ms
3:	learn: 0.4985760	total: 50.5ms	remaining: 581ms
4:	learn: 0.4864122	total: 62.9ms	remaining: 566ms
5:	learn: 0.4245552	total: 80.1ms	remaining: 587ms
6:	learn: 0.3983610	total: 111ms	remaining: 681ms
7:	lear

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    1.0s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['finishing', 'freekick_accuracy', 'heading_accuracy', 'interceptions', 'long_shots', 'marking', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['curve', 'finishing', 'freekick_accuracy', 'interceptions', 'long_shots', 'marking', 'penalties', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.6144798	total: 14.8ms	remaining: 727ms
1:	learn: 0.5706796	total: 31.1ms	remaining: 747ms
2:	learn: 0.5191506	total: 47.8ms	remaining: 748ms
3:	learn: 0.4881575	total: 63.5ms	remaining: 730ms
4:	learn: 0.4738689	total: 78.6ms	remaining: 707ms
5:	learn: 0.4607325	total: 93ms	remaining: 682ms
6:	learn: 0.4179941	total: 107ms	remainin

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.9s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished
100%|██████████| 10/10 [00:02<00:00,  4.58it/s]


['acceleration', 'agility', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']


100%|██████████| 10/10 [00:01<00:00,  5.06it/s]


['acceleration', 'agility', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.6079631	total: 14.6ms	remaining: 717ms
1:	learn: 0.5750164	total: 28.5ms	remaining: 685ms
2:	learn: 0.5440332	total: 42.3ms	remaining: 662ms
3:	learn: 0.4974549	total: 55.7ms	remaining: 641ms
4:	learn: 0.4271364	total: 67.1ms	remaining: 604ms
5:	learn: 0.4100291	total: 80.6ms	remaining: 591ms
6:	learn: 0.4003386	total: 93.1ms	remaining: 572ms
7:	learn: 0.3910228	total: 107ms	remaining: 563ms
8:	learn: 0.3751521	total: 122ms	remaining: 557ms
9:	learn: 0.3655714	total: 136ms	remaining: 543ms
10:	learn: 0.3593936	total: 148ms	remaining: 526ms
11:	learn: 0.3415689	total: 163ms	remaining: 515ms
12:	learn: 0.3357073	total: 175ms	remaining: 499ms
13:	le

100%|██████████| 10/10 [00:02<00:00,  4.93it/s]
[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.


['acceleration', 'agility', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']


[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    1.0s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'agility', 'finishing', 'heading_accuracy', 'interceptions', 'marking', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['acceleration', 'agility', 'finishing', 'heading_accuracy', 'interceptions', 'marking', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.6163875	total: 15.1ms	remaining: 741ms
1:	learn: 0.5805982	total: 31.2ms	remaining: 749ms
2:	learn: 0.5467584	total: 47.4ms	remaining: 743ms
3:	learn: 0.5000534	total: 65.5ms	remaining: 753ms
4:	learn: 0.4830597	total: 81.3ms	remaining: 732ms
5:	learn: 0.4548487	total: 98.1ms	remaining: 720ms
6:	learn: 0.4473268	total: 115ms	remaining: 705ms
7:

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.9s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['aggression', 'ball_control', 'crossing', 'dribbling', 'finishing', 'freekick_accuracy', 'long_passing', 'reactions', 'stamina', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
['aggression', 'ball_control', 'crossing', 'curve', 'dribbling', 'long_passing', 'reactions', 'stamina', 'short_passing', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.6239742	total: 11.8ms	remaining: 580ms
1:	learn: 0.5840119	total: 25.4ms	remaining: 610ms
2:	learn: 0.5308076	total: 39.1ms	remaining: 613ms
3:	learn: 0.4445997	total: 50.5ms	remaining: 581ms
4:	learn: 0.4157644	total: 62.6ms	remaining: 564ms
5:	learn: 0.3919004	total: 75.4ms	remaining: 553ms
6:	learn: 0.3791936	total: 87.9ms	remaining: 540ms
7:	learn: 0.3624751	to

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.9s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished
