In [1]:
from socceraction.data.wyscout import PublicWyscoutLoader
from socceraction.spadl.wyscout import convert_to_actions as convert_to_actions_wyscout
from socceraction.spadl.statsbomb import convert_to_actions as convert_to_actions_statsbomb
from socceraction.data.opta import OptaLoader
from socceraction.data.statsbomb import StatsBombLoader
from socceraction.spadl.config import actiontypes, bodyparts
import socceraction.spadl as spadl
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, roc_auc_score, brier_score_loss, log_loss, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, balanced_accuracy_score
from xgboost import XGBClassifier, XGBRegressor
from catboost import CatBoostClassifier
import math
import pickle
import os
from name_matching.name_matcher import NameMatcher
from rapidfuzz import fuzz
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.feature_selection import r_regression, SelectKBest, chi2, mutual_info_classif, SequentialFeatureSelector, RFECV, SelectFromModel
from scipy.stats import pearsonr, chisquare
from mrmr import mrmr_classif
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.svm import LinearSVR
from sklearn.linear_model import Lasso, LogisticRegression

In [2]:
# CONFIG FOR EXPERIMENTS SCENARIO
USE_EVALUATION_METRIC_CLASSIFICATION = True
SAMPLING_OPTIONS = [
    "none",
    "random_oversampled",
    "random_undersampled",
    "smote_oversampled"
]
FEATURE_SELECTION_OPTIONS = [
    "pearson",
    "chisquare",
    "mutualinf",
    "mrmr",
    "rfembedded",
    "lasso"
]
if USE_EVALUATION_METRIC_CLASSIFICATION:
    MODEL_ALGORITHM_OPTIONS = [
        "xgbclassifier",
        "catboostclassifier",
        "rfclassifier"
    ]
else:
    MODEL_ALGORITHM_OPTIONS = [
        "xgbregressor",
        # "rfregressor",
        # "logregression"
    ]
CONFIG_EXPERIMENTS_SCENARIO_MAP = {}

def construct_config_experiments_scenario_map():
    index_counter = 1
    for sampling_opt in SAMPLING_OPTIONS:
        for feature_selection_opt in FEATURE_SELECTION_OPTIONS:
            for algorithm_opt in MODEL_ALGORITHM_OPTIONS:
                CONFIG_EXPERIMENTS_SCENARIO_MAP[index_counter] = \
                    {"sampling_opt" : sampling_opt, \
                    "feature_selection_opt" : feature_selection_opt, \
                    "algorithm_opt" : algorithm_opt}
                index_counter += 1

construct_config_experiments_scenario_map()

In [3]:
# COLUMNS FOR TEST EXPERIMENT RESULT
COLUMNS_EVALUATION_METRIC_CLASSIFICATION = [
    "rec_score",
    "prec_score",
    "F1_score",
    "acc_score",
    "auc_score",
    "mcc_score",
    "brier_score",
    "log_loss_score",
    "balanced_acc_score"
]
COLUMNS_EVALUATION_METRIC_REGRESSION = [
    "mean_squared_error_score",
    "root_mean_squared_error_score",
    "auc_score",
    "brier_score",
    "log_loss_score",
    "mean_absolute_error_score",
    "r_squared_score",
    "mean_absolute_percentage_error_score"
]
COLUMNS_SCENARIO_NAME = [
    "sampling_opt",
    "feature_selection_opt",
    "algorithm_opt"
]
if (USE_EVALUATION_METRIC_CLASSIFICATION):
    COLUMNS_EXPERIMENT_RESULT = ["case_number"] + COLUMNS_SCENARIO_NAME + COLUMNS_EVALUATION_METRIC_CLASSIFICATION
else:
    COLUMNS_EXPERIMENT_RESULT = ["case_number"] + COLUMNS_SCENARIO_NAME + COLUMNS_EVALUATION_METRIC_REGRESSION

In [4]:
api_wyscout = PublicWyscoutLoader(root="data/wyscout")
api_opta = OptaLoader(root="data/opta")
api_statsbomb = StatsBombLoader(root="data/statsbomb", getter="local")

In [5]:
for idx, action_name in enumerate(actiontypes):
    print(f'action_id : {idx}   action_name : {action_name}')

action_id : 0   action_name : pass
action_id : 1   action_name : cross
action_id : 2   action_name : throw_in
action_id : 3   action_name : freekick_crossed
action_id : 4   action_name : freekick_short
action_id : 5   action_name : corner_crossed
action_id : 6   action_name : corner_short
action_id : 7   action_name : take_on
action_id : 8   action_name : foul
action_id : 9   action_name : tackle
action_id : 10   action_name : interception
action_id : 11   action_name : shot
action_id : 12   action_name : shot_penalty
action_id : 13   action_name : shot_freekick
action_id : 14   action_name : keeper_save
action_id : 15   action_name : keeper_claim
action_id : 16   action_name : keeper_punch
action_id : 17   action_name : keeper_pick_up
action_id : 18   action_name : clearance
action_id : 19   action_name : bad_touch
action_id : 20   action_name : non_action
action_id : 21   action_name : dribble
action_id : 22   action_name : goalkick


In [6]:
for idx, bodypart_name in enumerate(bodyparts):
    print(f'bodypart_id : {idx}   bodypart_name : {bodypart_name}')

bodypart_id : 0   bodypart_name : foot
bodypart_id : 1   bodypart_name : head
bodypart_id : 2   bodypart_name : other
bodypart_id : 3   bodypart_name : head/other
bodypart_id : 4   bodypart_name : foot_left
bodypart_id : 5   bodypart_name : foot_right


In [7]:
def convert_events_df_to_spadl(events_df, home_team_id, source):
    if (source == "Statsbomb"):
        spadl_events_df = convert_to_actions_statsbomb(events_df, home_team_id)
    else:
        spadl_events_df = convert_to_actions_wyscout(events_df, home_team_id)
    spadl_events_df['time_seconds'] = spadl_events_df['time_seconds'].astype('float64')
    spadl_events_df['timestamp'] = pd.to_datetime(spadl_events_df['time_seconds'], unit='s')
    return spadl_events_df

In [8]:
# FUNCTION TO ADD ADDITIONAL INFO IN RAW SPADL DATAFRAME
STANDARD_LENGTH_COURT = 105
STANDARD_WIDTH_COURT = 68
STANDARD_GOALLINE_WIDTH = 7.32
STANDARD_LENGTH_COURT_STATSBOMB = 120
STANDARD_WIDTH_COURT_STATSBOMB = 80

# Helper Functions
def filter_out_is_home_team_apply_df(row, home_team_id):
    return 1 if row['team_id'] == home_team_id else 0

def filter_out_take_on_or_dribble_apply_df(row, take_on_action_id):
    return 1 if row['action_id'] == take_on_action_id else 0

# Helper functions specific to statsbomb opponent data
def calculate_distance_between_two_coordinates(x1, y1, x2, y2):
    return math.sqrt(abs(x2-x1) ** 2 + abs(y2-y1) ** 2)

def filter_out_non_opponent_coordinate_freeze_frame(freeze_frame_360_list):
    if (freeze_frame_360_list == None or not isinstance(freeze_frame_360_list, list)):
        return []
    return [x for x in freeze_frame_360_list if x['teammate'] == False and x['actor'] == False]

def convert_statsbomb_coordinate_to_spadl_coordinate(coordinate_x, coordinate_y):
    converted_coordinate_x = (STANDARD_LENGTH_COURT / STANDARD_LENGTH_COURT_STATSBOMB) * coordinate_x
    converted_coordinate_y = (STANDARD_WIDTH_COURT / STANDARD_WIDTH_COURT_STATSBOMB) * coordinate_y
    return (converted_coordinate_x, converted_coordinate_y)

def calculate_distance_opponent_apply_df(row):
    freeze_frame_360_opponents = filter_out_non_opponent_coordinate_freeze_frame(row['freeze_frame_360'])
    list_distance_opponent = []
    for object_loc in freeze_frame_360_opponents:
        opponent_x, opponent_y = convert_statsbomb_coordinate_to_spadl_coordinate(object_loc['location'][0], object_loc['location'][1])
        distance_opponent = calculate_distance_between_two_coordinates(row['start_x'], row['start_y'], opponent_x, opponent_y)
        list_distance_opponent.append(distance_opponent)
    return min(list_distance_opponent) if len(list_distance_opponent) > 0 else 0

def calculate_num_opponent_closer_goal(start_x, start_y, freeze_frame_360, is_home_team):
    freeze_frame_360_opponents = filter_out_non_opponent_coordinate_freeze_frame(freeze_frame_360)
    if (is_home_team):
        coordinate_x_goal = STANDARD_LENGTH_COURT
    else:
        coordinate_x_goal = 0
    coordinate_y_goal = STANDARD_WIDTH_COURT / 2

    num_opponent_closer_to_goal = 0
    for object_loc in freeze_frame_360_opponents:
        opponent_x, opponent_y = convert_statsbomb_coordinate_to_spadl_coordinate(object_loc['location'][0], object_loc['location'][1])
        distance_passer_to_goal = calculate_distance_between_two_coordinates(start_x, start_y, coordinate_x_goal, coordinate_y_goal)
        distance_opponent_to_goal = calculate_distance_between_two_coordinates(opponent_x, opponent_y, coordinate_x_goal, coordinate_y_goal)
        if (distance_opponent_to_goal < distance_passer_to_goal):
            num_opponent_closer_to_goal += 1
    return num_opponent_closer_to_goal

def calculate_num_opponent_closer_goal_apply_df(row, home_team_id):
    return calculate_num_opponent_closer_goal(row['start_x'], row['start_y'], row['freeze_frame_360'], (row['team_id'] == home_team_id))

def calculate_num_opponent_in_path(start_x, start_y, freeze_frame_360):
    path_distance = 10
    freeze_frame_360_opponents = filter_out_non_opponent_coordinate_freeze_frame(freeze_frame_360)
    num_opponent_in_path = 0
    for object_loc in freeze_frame_360_opponents:
        opponent_x, opponent_y = convert_statsbomb_coordinate_to_spadl_coordinate(object_loc['location'][0], object_loc['location'][1])
        distance_with_opponent = calculate_distance_between_two_coordinates(start_x, start_y, opponent_x, opponent_y)
        if (distance_with_opponent <= path_distance):
            num_opponent_in_path += 1
    return num_opponent_in_path

def calculate_num_opponent_in_path_apply_df(row):
    return calculate_num_opponent_in_path(row['start_x'], row['start_y'], row['freeze_frame_360'])

def calculate_distance_dribble(coordinate_x, coordinate_y, end_x, end_y):
    distance_passing = math.sqrt((abs(end_x - coordinate_x)) ** 2 + (abs(end_y - coordinate_y)) ** 2)
    return distance_passing

def calculate_distance_dribble_apply_df(row):
    return calculate_distance_dribble(row['start_x'], row['start_y'], row['end_x'], row['end_y'])

# Add is_home_team column (boolean 0/1)
def add_is_home_team_column_to_spadl_df(spadl_df, home_team_id):
    spadl_df['is_home_team'] = spadl_df.apply(lambda x : filter_out_is_home_team_apply_df(x, home_team_id), axis=1)
    return spadl_df

# Add is_take_on column (boolean 0/1)
def add_is_take_on_column_to_spadl_df(spadl_df, take_on_action_id):
    spadl_df['is_take_on'] = spadl_df.apply(lambda x : filter_out_take_on_or_dribble_apply_df(x, take_on_action_id), axis=1)
    return spadl_df

# Feature 1 : distance dribble
def add_distance_dribble_to_spadl_df(spadl_df):
    spadl_df['distance_dribble'] = spadl_df.apply(calculate_distance_dribble_apply_df, axis=1)
    return spadl_df

# Opponent Feature 1 : distance opponent
def add_distance_opponent_column_to_spadl_df(spadl_df):
    spadl_df['distance_opponent'] = spadl_df.apply(calculate_distance_opponent_apply_df, axis=1)
    return spadl_df

# Opponent Feature 2 : opponents closer to goal
def add_num_opponent_closer_goal_column_to_spadl_df(spadl_df, home_team_id):
    spadl_df['num_opponent_closer_goal'] = spadl_df.apply(lambda x : calculate_num_opponent_closer_goal_apply_df(x, home_team_id), axis=1)
    return spadl_df

# Opponent Feature 3 : opponents in path
def add_num_opponent_in_path_column_to_spadl_df(spadl_df):
    spadl_df['num_opponent_in_path'] = spadl_df.apply(calculate_num_opponent_in_path_apply_df, axis=1)
    return spadl_df

In [9]:
# Collect all dataset action specific type, export them to csv files
# Take_on (action_id = 7), Dribble (action_id = 21)
DRIBBLE_ACTION_ID = [7, 21] 
TAKE_ON_ACTION_ID = 7

def collect_raw_dribble_spadl_df(source="Wyscout", period=1):
    if source == "Statsbomb":
        api = api_statsbomb
    else:
        api = api_wyscout
    list_competitions_ids = []
    list_game_ids = []

    competitions_df = api.competitions()
    for _, row in competitions_df.iterrows():
        if source == "Statsbomb":
            if row['competition_gender'] == 'male':
                list_competitions_ids.append((row['competition_id'], row['season_id']))
        else:
            list_competitions_ids.append((row['competition_id'], row['season_id']))
        
    for competition_id, season_id in list_competitions_ids:
        games_df = api.games(competition_id, season_id)
        for _, row in games_df.iterrows():
            list_game_ids.append((row['game_id'], row['home_team_id'], row['away_team_id']))
            
    for game_id, home_team_id, away_team_id in list_game_ids:
        try:
            if (source == "Statsbomb"):
                this_game_events_df = api.events(game_id, load_360=True)
            else:
                this_game_events_df = api.events(game_id)
            this_game_events_spadl_df = convert_events_df_to_spadl(this_game_events_df, home_team_id, source)

            # Add column 360 data into events spadl data (Statsbomb)
            if (source == "Statsbomb"):
                this_game_events_spadl_df = pd.merge(this_game_events_spadl_df, this_game_events_df[["event_id", "visible_area_360", "freeze_frame_360"]], how="inner", left_on="original_event_id", right_on="event_id")
                this_game_events_spadl_df.dropna(subset=["freeze_frame_360"])
            
            # Filter action id with type dribble only, pick only data from first period
            this_game_events_spadl_df = this_game_events_spadl_df[this_game_events_spadl_df['type_id'].isin(DRIBBLE_ACTION_ID)]
            # if (period != None):
            #     this_game_events_spadl_df = this_game_events_spadl_df[this_game_events_spadl_df['period_id'] == period]
            # else:
            #     this_game_events_spadl_df = this_game_events_spadl_df[this_game_events_spadl_df['period_id'] == 1]
            
            # Add additional computed column to support xDribble model
            this_game_events_spadl_df = add_is_home_team_column_to_spadl_df(this_game_events_spadl_df, home_team_id)
            this_game_events_spadl_df = add_is_take_on_column_to_spadl_df(this_game_events_spadl_df, TAKE_ON_ACTION_ID)
            this_game_events_spadl_df = add_distance_dribble_to_spadl_df(this_game_events_spadl_df)
            if (source == "Statsbomb"):
                this_game_events_spadl_df = add_distance_opponent_column_to_spadl_df(this_game_events_spadl_df)
                this_game_events_spadl_df = add_num_opponent_closer_goal_column_to_spadl_df(this_game_events_spadl_df, home_team_id)
                this_game_events_spadl_df = add_num_opponent_in_path_column_to_spadl_df(this_game_events_spadl_df)
                
            # Export to external csv iteratively
            this_game_events_spadl_df.to_csv(f'data/training_data_xdribble/{game_id}_{home_team_id}_{away_team_id}_xdribble_data.csv')
        
        except FileNotFoundError:
            print(f'File 360 data not found {game_id}-{home_team_id}-{away_team_id}') 

In [10]:
# FUNCTIONS TO CREATE ALL DATASET PLAYERS
def collect_raw_all_players_df(source="Wyscout"):
    if (source == "Statsbomb"):
        api = api_statsbomb
    else:
        api = api_wyscout
    list_competitions_ids = []
    list_game_ids = []

    competitions_df = api.competitions()
    for _, row in competitions_df.iterrows():
        if (source == "Statsbomb"):
            if (row['competition_gender'] == 'male'):
                list_competitions_ids.append((row['competition_id'], row['season_id']))
        else:
            list_competitions_ids.append((row['competition_id'], row['season_id']))
        
    for competition_id, season_id in list_competitions_ids:
        games_df = api.games(competition_id, season_id)
        for _, row in games_df.iterrows():
            list_game_ids.append((row['game_id'], row['home_team_id'], row['away_team_id']))

    for game_id, home_team_id, away_team_id in list_game_ids:
        players_df = api.players(game_id)
        if (source == "Statsbomb"):
            players_df.to_csv(f'data/training_data_players_statsbomb/{game_id}_{home_team_id}_{away_team_id}_players_data.csv')
        else:
            players_df.to_csv(f'data/training_data_players_wyscout/{game_id}_{home_team_id}_{away_team_id}_players_data.csv')

def load_and_concat_players_df_from_csv(path_to_raw_players_df):
    list_raw_players_df = []
    for filename in os.listdir(path_to_raw_players_df):
        f = os.path.join(path_to_raw_players_df, filename)
        if os.path.isfile(f):
            players_df = pd.read_csv(f)
            list_raw_players_df.append(players_df)
    merged_players_df = pd.concat(list_raw_players_df)
    merged_players_df = merged_players_df.drop_duplicates(subset='player_id').reset_index()
    return merged_players_df

def load_csv_players_data_sofifa(path_to_sofifa_file):
    return pd.read_csv(path_to_sofifa_file)

In [11]:
# Comment it if players dataset already loaded
# collect_raw_all_players_df(source="Statsbomb")

In [12]:
# Merge wyscout player datasets with sofifa datasets by matching string name
def create_maps_for_name_matching_scores(list_unique_names_df_1, list_unique_names_df_2):
    maps_name_matching_score = {}
    for name_1 in list_unique_names_df_1:
        for name_2 in list_unique_names_df_2:
            maps_name_matching_score[(name_1, name_2)] = fuzz.ratio(name_1, name_2)
    return maps_name_matching_score

def filter_out_maps_for_name_matching_scores(maps_name_matching, threshold):
    filtered_maps_name_matching = {}
    for name_1, name_2 in maps_name_matching:
        if (maps_name_matching[(name_1, name_2)] >= threshold):
            filtered_maps_name_matching[(name_1, name_2)] = maps_name_matching[(name_1, name_2)]
    return filtered_maps_name_matching

def merge_big_dataframe_wyscout_with_sofifa(big_dataframe_players, sofifa_players_dataset, maps_name_matching_score):
    # Preprocess both dataframes and add prefix 1- and 2- to all column names to avoid duplicate column names
    big_dataframe_players.dropna(subset=['player_name'], inplace=True)
    big_dataframe_players.rename(columns=lambda x: '1-'+x, inplace=True)
    sofifa_players_dataset.dropna(subset=['full_name'], inplace=True)
    sofifa_players_dataset.rename(columns=lambda x: '2-'+x, inplace=True)
    # Merge into new empty dataframe one by one by iterating maps name matching score
    big_dataframe_players_with_sofifa = pd.DataFrame(columns=list(big_dataframe_players.columns)+list(sofifa_players_dataset.columns), index=[0])
    big_dataframe_players_with_sofifa.reset_index(inplace=True)
    for name_1, name_2 in maps_name_matching_score:
        row_from_big_dataframe_players = big_dataframe_players[big_dataframe_players['1-player_name'] == name_1].iloc[0]
        row_from_sofifa_players_dataset = sofifa_players_dataset[sofifa_players_dataset['2-full_name'] == name_2].iloc[0]
        new_row = pd.concat([row_from_big_dataframe_players, row_from_sofifa_players_dataset], axis=0, ignore_index=False)
        new_row = pd.DataFrame([new_row]).reset_index()
        big_dataframe_players_with_sofifa = pd.concat([big_dataframe_players_with_sofifa, new_row])
    # Remove prefix 1- and 2- from final big datasets
    big_dataframe_players_with_sofifa.rename(columns=lambda x: x[2:], inplace=True)
    return big_dataframe_players_with_sofifa

SOURCE_DATA = "Statsbomb"
if (SOURCE_DATA == "Statsbomb"):
    DIRECTORY_PLAYERS_CSV_DATAS = "data/training_data_players_statsbomb"
else:
    DIRECTORY_PLAYERS_CSV_DATAS = "data/training_data_players_wyscout"
DIRECTORY_SOFIFA_CSV_DATAS = "data/players_skill_dataset/sofifa_dataset_cleaned.csv"
DIRECTORY_WYSCOUT_CSV_DATAS = "data/players_skill_dataset/wyscout_dataset_cleaned.csv"
DIRECTORY_STATSBOMB_CSV_DATAS = "data/players_skill_dataset/statsbomb_dataset_cleaned.csv"
DIRECTORY_FINAL_PLAYERS_CSV_DATAS = "data/players_skill_dataset/final_players_skill_dataset.csv"

# COMMENT BELOW SNIPPET CODES IF FINAL PLAYER DATASETS WITH SKILL ALREADY GENERATED !!
# big_dataframe_players = load_and_concat_players_df_from_csv(DIRECTORY_PLAYERS_CSV_DATAS)
# if (SOURCE_DATA == "Statsbomb"):
#     big_dataframe_players.to_csv(DIRECTORY_STATSBOMB_CSV_DATAS)
# else:
#     big_dataframe_players.to_csv(DIRECTORY_WYSCOUT_CSV_DATAS)
# sofifa_players_dataset = load_csv_players_data_sofifa(DIRECTORY_SOFIFA_CSV_DATAS)

# maps_name_matching_score = create_maps_for_name_matching_scores(big_dataframe_players['player_name'].unique(), sofifa_players_dataset['full_name'].unique())
# maps_name_matching_score = filter_out_maps_for_name_matching_scores(maps_name_matching_score, threshold=80)

# big_dataframe_players_with_sofifa = merge_big_dataframe_wyscout_with_sofifa(big_dataframe_players, sofifa_players_dataset, maps_name_matching_score)
# big_dataframe_players_with_sofifa.reset_index(inplace=True)
# big_dataframe_players_with_sofifa = big_dataframe_players_with_sofifa.drop_duplicates(subset='player_id')
# big_dataframe_players_with_sofifa.to_csv(DIRECTORY_FINAL_PLAYERS_CSV_DATAS)

In [13]:
# MAIN DRIVER (comment it if csv files already loaded)
# collect_raw_dribble_spadl_df(source="Statsbomb")

In [14]:
# Load csv datas already retrieved then concat them into one big dataframe
DIRECTORY_XDRIBBLE_CSV_DATAS = "data/training_data_xdribble"

def load_and_concat_xdribble_df_from_csv():
    list_pass_event_df = []
    for filename in os.listdir(DIRECTORY_XDRIBBLE_CSV_DATAS):
        f = os.path.join(DIRECTORY_XDRIBBLE_CSV_DATAS, filename)
        if os.path.isfile(f):
            pass_event_df = pd.read_csv(f)
            list_pass_event_df.append(pass_event_df)
    return pd.concat(list_pass_event_df)

In [15]:
# JOIN ALREADY CONSTRUCTED PLAYER SKILLS DATASET WITH ORIGIN EVENT DATASET WYSCOUT
player_skills_dataset = pd.read_csv(DIRECTORY_FINAL_PLAYERS_CSV_DATAS)
big_dataframe_xdribble_model = load_and_concat_xdribble_df_from_csv()
big_dataframe_xdribble_model = big_dataframe_xdribble_model.merge(player_skills_dataset, how='inner',on='player_id')
big_dataframe_xdribble_model.head()

Unnamed: 0,Unnamed: 0_x,game_id_x,original_event_id,period_id,time_seconds,team_id_x,player_id,start_x,start_y,end_x,...,LWB,LDM,CDM,RDM,RWB,LB,LCB,CB,RCB,RB
0,1,3788741,bea4235d-7e40-461c-bb82-6d473f5bb324,1,2.0,909,8963.0,27.794118,44.070886,31.058824,...,65+2,67+2,67+2,67+2,65+2,68+2,73+2,73+2,73+2,68+2
1,71,3788741,d4a29d8a-01f6-4ddb-87e1-05d429d81662,1,203.0,909,8963.0,23.558824,47.944304,24.264706,...,65+2,67+2,67+2,67+2,65+2,68+2,73+2,73+2,73+2,68+2
2,288,3788741,ce7d0f67-d9af-495f-83fe-d498f0ce0aed,1,657.0,909,8963.0,14.735294,40.8,13.235294,...,65+2,67+2,67+2,67+2,65+2,68+2,73+2,73+2,73+2,68+2
3,328,3788741,e43c65e9-33f1-4cda-93fe-9a918eb480ca,1,761.0,909,8963.0,43.323529,60.167089,48.0,...,65+2,67+2,67+2,67+2,65+2,68+2,73+2,73+2,73+2,68+2
4,332,3788741,c3bcba92-1a6f-492e-aced-8a64df6a52ea,1,767.0,909,8963.0,44.117647,62.491139,42.176471,...,65+2,67+2,67+2,67+2,65+2,68+2,73+2,73+2,73+2,68+2


In [16]:
# SELECT ONLY FEATURED COLUMN FROM BIG DATASETS
features_column_included = ["start_x", "start_y", "end_x", "end_y", "is_take_on", "distance_opponent", 
                            "num_opponent_closer_goal", "num_opponent_in_path", "result_id", "distance_dribble"]
player_skills_column_included = ["acceleration", "aggression", "agility", "balance", "ball_control",
                                 "composure", "crossing", "curve", "dribbling", "finishing",
                                 "freekick_accuracy", "heading_accuracy", "interceptions", "jumping", "long_passing",
                                 "long_shots", "marking", "penalties", "positioning", "reactions",
                                 "shot_power", "sliding_tackle", "sprint_speed", "stamina", "short_passing",
                                 "standing_tackle", "strength", "vision", "volleys"]
player_attribute_column_included = ["height_cm", "weight_kgs", "age"]

big_dataframe_xdribble_model = big_dataframe_xdribble_model[[c for c in big_dataframe_xdribble_model.columns if c in (features_column_included + player_skills_column_included + player_attribute_column_included)]]
big_dataframe_xdribble_model.head()

Unnamed: 0,start_x,start_y,end_x,end_y,result_id,is_take_on,distance_dribble,distance_opponent,num_opponent_closer_goal,num_opponent_in_path,...,long_shots,aggression,interceptions,positioning,vision,penalties,composure,marking,standing_tackle,sliding_tackle
0,27.794118,44.070886,31.058824,42.693671,1,0,3.543307,17.081977,10,0,...,42.0,81.0,71.0,42.0,49.0,52.0,59.0,69.0,82.0,79.0
1,23.558824,47.944304,24.264706,49.063291,1,0,1.323028,14.231871,5,0,...,42.0,81.0,71.0,42.0,49.0,52.0,59.0,69.0,82.0,79.0
2,14.735294,40.8,13.235294,48.202532,1,0,7.552978,11.364664,3,0,...,42.0,81.0,71.0,42.0,49.0,52.0,59.0,69.0,82.0,79.0
3,43.323529,60.167089,48.0,62.663291,1,0,5.300981,22.928909,7,0,...,42.0,81.0,71.0,42.0,49.0,52.0,59.0,69.0,82.0,79.0
4,44.117647,62.491139,42.176471,62.491139,1,0,1.941176,29.146285,8,0,...,42.0,81.0,71.0,42.0,49.0,52.0,59.0,69.0,82.0,79.0


In [17]:
# CASE 1 : Random Oversample Function
def training_data_random_oversampled(X_train, Y_train):
    ros = RandomOverSampler(random_state=42)
    X_resampled, Y_resampled = ros.fit_resample(X_train, Y_train)
    return (X_resampled, Y_resampled)

# CASE 2 : Random Undersample Function
def training_data_random_undersampled(X_train, Y_train):
    rus = RandomUnderSampler(random_state=42)
    X_resampled, Y_resampled = rus.fit_resample(X_train, Y_train)
    return (X_resampled, Y_resampled)

# CASE 3 : Random SMOTE Oversample Function
def training_data_smote_oversampled(X_train, Y_train):
    X_resampled, Y_resampled = SMOTE().fit_resample(X_train, Y_train)
    return (X_resampled, Y_resampled)

# V CASE 1 : Feature Selection - Pearson Coefficient
def filter_columns_feature_selection_pearson(X_train, Y_train, columns_considered, threshold):
    new_columns_after_selection = []
    for _, skill in enumerate(columns_considered):
        correlation_value, _ = pearsonr(X_train[skill], Y_train)
        if correlation_value >= threshold:
            new_columns_after_selection.append(skill)
    return new_columns_after_selection

def training_data_feature_selection_pearson(X_train, Y_train, columns_considered, threshold):
    columns_selected = filter_columns_feature_selection_pearson(X_train, Y_train, columns_considered, threshold)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# V CASE 2 : Feature Selection - Chi Square
def filter_columns_feature_selection_chisquare(X_train, Y_train, columns_considered, num_of_features):
    chi2_selector = SelectKBest(chi2, k=num_of_features) 
    df_feature = X_train[columns_considered]
    chi2_selector.fit(df_feature, Y_train)
    cols = chi2_selector.get_support(indices=True)
    df_selected_features = df_feature.iloc[:,cols]
    return df_selected_features.columns

def training_data_feature_selection_chisquare(X_train, Y_train, columns_considered, num_of_features):
    columns_selected = filter_columns_feature_selection_chisquare(X_train, Y_train, columns_considered, num_of_features)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# V CASE 3 : Feature Selection - Mutual Information
def filter_columns_feature_selection_mutualinf(X_train, Y_train, columns_considered, num_of_features):
    mi_selector = SelectKBest(mutual_info_classif, k=num_of_features) 
    df_feature = X_train[columns_considered]
    mi_selector.fit(df_feature, Y_train)
    cols = mi_selector.get_support(indices=True)
    df_selected_features = df_feature.iloc[:,cols]
    return df_selected_features.columns

def training_data_feature_selection_mutualinf(X_train, Y_train, columns_considered, num_of_features):
    columns_selected = filter_columns_feature_selection_mutualinf(X_train, Y_train, columns_considered, num_of_features)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# V CASE 4 : Feature Selection - mRMR Selection
def filter_columns_feature_selection_mrmr(X_train, Y_train, columns_considered, num_of_features):
    df_feature = X_train[columns_considered]
    selected_features = mrmr_classif(X=df_feature, y=Y_train, K=num_of_features)
    return selected_features

def training_data_feature_selection_mrmr(X_train, Y_train, columns_considered, num_of_features):
    columns_selected = filter_columns_feature_selection_mrmr(X_train, Y_train, columns_considered, num_of_features)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# X CASE 5 : Feature Selection - Sequential Forward Selection (SFS)
def filter_columns_feature_selection_sfs(X_train, Y_train, columns_considered, num_of_features):
    rf = RandomForestClassifier()
    sfs = SequentialFeatureSelector(rf, n_features_to_select=num_of_features, direction='forward')
    df_feature = X_train[columns_considered]
    sfs.fit(df_feature, Y_train)
    cols = sfs.get_support(indices=True)
    df_selected_features = df_feature.iloc[:,cols]
    return df_selected_features.columns

def training_data_feature_selection_sfs(X_train, Y_train, columns_considered, num_of_features):
    columns_selected = filter_columns_feature_selection_sfs(X_train, Y_train, columns_considered, num_of_features)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# X CASE 6 : Feature Selection - Sequential Backward Elimination (SBE)
def filter_columns_feature_selection_sbe(X_train, Y_train, columns_considered, num_of_features):
    rf = RandomForestClassifier()
    sfs = SequentialFeatureSelector(rf, n_features_to_select=num_of_features, direction='backward')
    df_feature = X_train[columns_considered]
    sfs.fit(df_feature, Y_train)
    cols = sfs.get_support(indices=True)
    df_selected_features = df_feature.iloc[:,cols]
    return df_selected_features.columns

def training_data_feature_selection_sbe(X_train, Y_train, columns_considered, num_of_features):
    columns_selected = filter_columns_feature_selection_sbe(X_train, Y_train, columns_considered, num_of_features)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# X CASE 7 : Feature Selection - Recursive Feature Elimination
def filter_columns_feature_selection_rfe(X_train, Y_train, columns_considered, num_of_features):
    estimator = LinearSVR()
    selector = RFECV(estimator, step=1, cv=num_of_features)
    df_feature = X_train[columns_considered]
    selector.fit(df_feature, Y_train)
    cols = selector.get_support(indices=True)
    df_selected_features = df_feature.iloc[:,cols]
    return df_selected_features.columns

def training_data_feature_selection_rfe(X_train, Y_train, columns_considered, num_of_features):
    columns_selected = filter_columns_feature_selection_rfe(X_train, Y_train, columns_considered, num_of_features)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# V CASE 8 : Feature Selection - Random Forest Embedded (rfembedded)
def filter_columns_feature_selection_rfembedded(X_train, Y_train, columns_considered, num_of_features):
    estimator = RandomForestClassifier()
    selector = SelectFromModel(estimator=estimator, max_features=num_of_features)
    df_feature = X_train[columns_considered]
    selector.fit(df_feature, Y_train)
    cols = selector.get_support(indices=True)
    df_selected_features = df_feature.iloc[:,cols]
    return df_selected_features.columns

def training_data_feature_selection_rfembedded(X_train, Y_train, columns_considered, num_of_features):
    columns_selected = filter_columns_feature_selection_rfembedded(X_train, Y_train, columns_considered, num_of_features)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# V CASE 9 : Feature Selection - LASSO
def filter_columns_feature_selection_lasso(X_train, Y_train, columns_considered, num_of_features):
    estimator = LogisticRegression(penalty='l2', C=0.5, solver='newton-cholesky')
    selector = SelectFromModel(estimator=estimator, max_features=num_of_features)
    df_feature = X_train[columns_considered]
    selector.fit(df_feature, Y_train)
    cols = selector.get_support(indices=True)
    df_selected_features = df_feature.iloc[:,cols]
    return df_selected_features.columns

def training_data_feature_selection_lasso(X_train, Y_train, columns_considered, num_of_features):
    columns_selected = filter_columns_feature_selection_lasso(X_train, Y_train, columns_considered, num_of_features)
    columns_omitted = [x for x in columns_considered if x not in columns_selected]
    final_columns = [x for x in list(X_train.columns) if x not in columns_omitted]
    print(final_columns)
    return (X_train[final_columns], Y_train)

# CASE 1 : Train with model XGBRegressor
def fit_and_train_with_model_xgbregressor(X_train, Y_train):
    model = XGBRegressor(objective="reg:logistic")
    model.fit(X_train, Y_train)
    return model

# CASE 2 : Train with model RandomForestRegressor
def fit_and_train_with_model_rfregressor(X_train, Y_train):
    model = RandomForestRegressor()
    model.fit(X_train, Y_train)
    return model

# CASE 3 : Train with model LogisticRegression
def fit_and_train_with_model_logregression(X_train, Y_train):
    model = LogisticRegression()
    model.fit(X_train, Y_train)
    return model

# CASE 4 : Train with model XGBClassifier
def fit_and_train_with_model_xgbclassifier(X_train, Y_train):
    model = XGBClassifier(n_estimators=50, max_depth=3, n_jobs=-3, verbosity=1, enable_categorical=True)
    model.fit(X_train, Y_train)
    return model

# CASE 5 : Train with model Catboost Classifier 
def fit_and_train_with_model_catboostclassifier(X_train, Y_train):
    model = CatBoostClassifier(n_estimators=50, max_depth=3, verbose=1)
    model.fit(X_train, Y_train)
    return model

# CASE 6 : Train with model RandomForest Classifier
def fit_and_train_with_model_rfclassifier(X_train, Y_train):
    model = RandomForestClassifier(n_estimators=50, max_depth=3, n_jobs=-3, verbose=1)
    model.fit(X_train, Y_train)
    return model

In [18]:
# FEATURE PREPROCESSING BIG DATASETS AND CREATE XGBOOST MODEL
# 1. Change all numeric columns with MinMaxScaler
scaler = preprocessing.MinMaxScaler(feature_range=(0,1))
columns_minmax_scaler = player_skills_column_included + player_attribute_column_included + \
                        ["start_x", "start_y", "end_x", "end_y", "distance_opponent", 
                         "num_opponent_closer_goal", "num_opponent_in_path", "distance_dribble"]
# big_dataframe_xdribble_model[columns_minmax_scaler] = scaler.fit_transform(big_dataframe_xdribble_model[columns_minmax_scaler])

# 2. Check if data is unbalanced. If it is unbalanced, then do method to oversize the sample
print(big_dataframe_xdribble_model['result_id'].value_counts())

# 3. Change result_id label into float64 type
# big_dataframe_xdribble_model['result_id'] = big_dataframe_xdribble_model['result_id'].astype('float64')

# 4. Remove dataframe instead of having result_id (0,1) --> (fail, success)
big_dataframe_xdribble_model = big_dataframe_xdribble_model[big_dataframe_xdribble_model['result_id'].isin([0,1])]
print(big_dataframe_xdribble_model['result_id'].value_counts())

# 5. Split train data and test data from Big Datasets
all_feature_columns = columns_minmax_scaler + ["is_take_on"]
X_train = big_dataframe_xdribble_model[all_feature_columns]
Y_train = big_dataframe_xdribble_model["result_id"]

# Empty dataframe for saving test result
empty_test_result = pd.DataFrame(columns=COLUMNS_EXPERIMENT_RESULT, index=[0])

for case_number in sorted(list(CONFIG_EXPERIMENTS_SCENARIO_MAP.keys())):
    sampling_opt = CONFIG_EXPERIMENTS_SCENARIO_MAP[case_number]["sampling_opt"]
    feature_selection_opt = CONFIG_EXPERIMENTS_SCENARIO_MAP[case_number]["feature_selection_opt"]
    algorithm_opt = CONFIG_EXPERIMENTS_SCENARIO_MAP[case_number]["algorithm_opt"]

    # 6. Do oversampling/undersampling and feature selection at same time
    if sampling_opt == "none":
        X_resampled, Y_resampled = X_train, Y_train
    else:
        X_resampled, Y_resampled = globals()["training_data_" + sampling_opt](X_train, Y_train)
    if feature_selection_opt == "pearson":
        threshold = 0.5
        X_feature_sel, Y_feature_sel = globals()["training_data_feature_selection_" + feature_selection_opt](X_resampled, Y_resampled, player_skills_column_included, threshold)
    else:
        num_of_features = 10
        X_feature_sel, Y_feature_sel = globals()["training_data_feature_selection_" + feature_selection_opt](X_resampled, Y_resampled, player_skills_column_included, num_of_features)

    # 7. Do train_test_split on training data
    X_train_split, X_test_split, y_train_split, y_test_split = train_test_split(X_feature_sel, Y_feature_sel, test_size=0.2, random_state=42)

    # 8. Train Model
    model = globals()["fit_and_train_with_model_" + algorithm_opt](X_train_split, y_train_split)

    # 9. Predict Testing Data
    y_predict = model.predict(X_test_split)

    # 10. Save test result experiment
    if (USE_EVALUATION_METRIC_CLASSIFICATION):
        rec_score = recall_score(y_test_split, y_predict)
        prec_score = precision_score(y_test_split, y_predict)
        F1_score = f1_score(y_test_split, y_predict)
        acc_score = accuracy_score(y_test_split, y_predict)
        auc_score = roc_auc_score(y_test_split, y_predict)
        mcc_score = matthews_corrcoef(y_test_split, y_predict)
        brier_score = brier_score_loss(y_test_split, y_predict)
        log_loss_score = log_loss(y_test_split, y_predict)
        balanced_acc_score = balanced_accuracy_score(y_test_split, y_predict)
    else:
        mean_squared_error_score = mean_squared_error(y_test_split, y_predict)
        root_mean_squared_error_score = mean_squared_error(y_test_split, y_predict, squared=False)
        auc_score = roc_auc_score(y_test_split, y_predict)
        brier_score = brier_score_loss(y_test_split, y_predict)
        log_loss_score = log_loss(y_test_split, y_predict)
        mean_absolute_error_score = mean_absolute_error(y_test_split, y_predict)
        r_squared_score = r2_score(y_test_split, y_predict)
        mean_absolute_percentage_error_score = mean_absolute_percentage_error(y_test_split, y_predict)

    maps_new_row = {}
    if USE_EVALUATION_METRIC_CLASSIFICATION:
        eval_metrics_column = COLUMNS_EVALUATION_METRIC_CLASSIFICATION
    else:
        eval_metrics_column = COLUMNS_EVALUATION_METRIC_REGRESSION
    for column in COLUMNS_EXPERIMENT_RESULT:
        if column not in eval_metrics_column:
            if column == "case_number":
                maps_new_row["case_number"] = case_number
            elif column in COLUMNS_SCENARIO_NAME:
                maps_new_row[column] = globals()[column]
        else:
            maps_new_row[column] = globals()[column]     
    new_row = pd.DataFrame(maps_new_row, index=[0])
    empty_test_result = pd.concat([new_row, empty_test_result.loc[:]]).reset_index(drop=True)

    # 11. Save model to external file
    filename = f'xdribble_model_case_{case_number}.sav'
    directory_model = "data/model_xdribble/"
    pickle.dump(model, open(directory_model + filename, 'wb'))

# 12. Save test result experiment to external file
filename = 'xdribble_test_model_experiment_result.csv'
directory_model = "data/model_xdribble/"
empty_test_result.to_csv(directory_model + filename)

1    79648
0     1162
Name: result_id, dtype: int64
1    79648
0     1162
Name: result_id, dtype: int64
['height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.3356371	total: 148ms	remaining: 7.25s
1:	learn: 0.2016124	total: 151ms	remaining: 3.62s
2:	learn: 0.1375035	total: 155ms	remaining: 2.42s
3:	learn: 0.0997867	total: 157ms	remaining: 1.81s
4:	learn: 0.0777750	total: 160ms	remaining: 1.44s
5:	learn: 0.0645328	total: 163ms	remaining: 1.2s
6:	learn: 0.0562663	total: 166ms	remaining: 1.02s
7:	learn: 0.0478056	total: 169ms	remaining: 886ms
8:	learn: 0.0442258	total: 171ms	remaining: 781ms
9:	learn: 0.0420309	total: 174ms	remaining: 697ms
10:	lear

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.2s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['curve', 'dribbling', 'finishing', 'interceptions', 'long_shots', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['curve', 'dribbling', 'finishing', 'interceptions', 'long_shots', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.1382095	total: 10.7ms	remaining: 527ms
1:	learn: 0.0489578	total: 21.1ms	remaining: 506ms
2:	learn: 0.0460732	total: 27.9ms	remaining: 437ms
3:	learn: 0.0451848	total: 35ms	remaining: 402ms
4:	learn: 0.0443536	total: 41.2ms	remaining: 370ms
5:	learn: 0.0408088	total: 48.6ms	remaining: 356ms
6:	learn: 0.0400112	total: 

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.2s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'agility', 'ball_control', 'dribbling', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['acceleration', 'crossing', 'dribbling', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.1382095	total: 7.25ms	remaining: 355ms
1:	learn: 0.0489236	total: 14.5ms	remaining: 347ms
2:	learn: 0.0458848	total: 20.7ms	remaining: 324ms
3:	learn: 0.0449128	total: 26.3ms	remaining: 302ms
4:	learn: 0.0442282	total: 32ms	remaining: 288ms
5:	learn: 0.0401637	total: 38.9ms	remaining: 285ms
6:	learn: 0.0

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.1s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished
100%|██████████| 10/10 [00:02<00:00,  4.92it/s]


['acceleration', 'finishing', 'interceptions', 'long_shots', 'marking', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'strength', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']


100%|██████████| 10/10 [00:01<00:00,  5.53it/s]


['acceleration', 'finishing', 'interceptions', 'long_shots', 'marking', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'strength', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.1407343	total: 10.7ms	remaining: 525ms
1:	learn: 0.0505039	total: 19.9ms	remaining: 477ms
2:	learn: 0.0458579	total: 28.4ms	remaining: 444ms
3:	learn: 0.0429841	total: 35.9ms	remaining: 413ms
4:	learn: 0.0420521	total: 41.7ms	remaining: 376ms
5:	learn: 0.0415029	total: 47.9ms	remaining: 351ms
6:	learn: 0.0383065	total: 55ms	remaining: 338ms
7:	learn: 0.0379242	total: 61.6ms	remaining: 323ms
8:	learn: 0.0374112	total: 67ms	remaining: 305ms
9:	learn: 0.0371880	total: 73ms	remaining: 292ms
10:	learn: 0.0369182	total: 78.3ms	remaining: 278ms
11:	learn: 0.0368009	total: 85.1ms	remaining: 270ms
12:	learn: 0.0366083	total: 90.8ms	rem

100%|██████████| 10/10 [00:01<00:00,  5.42it/s]
[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.1s


['acceleration', 'finishing', 'interceptions', 'long_shots', 'marking', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'strength', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']


[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.2s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'finishing', 'interceptions', 'marking', 'sliding_tackle', 'sprint_speed', 'stamina', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['acceleration', 'dribbling', 'finishing', 'interceptions', 'marking', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.1412094	total: 11.5ms	remaining: 564ms
1:	learn: 0.0493912	total: 24.8ms	remaining: 595ms
2:	learn: 0.0460727	total: 37.8ms	remaining: 593ms
3:	learn: 0.0425641	total: 47.2ms	remaining: 543ms
4:	learn: 0.0415127	total: 55.5ms	remaining: 499ms
5:	learn: 0.0411572	total: 62ms	remaining: 454ms
6:	learn: 0.0383976	total: 69.7ms	remaining: 428ms
7:	learn: 0.

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.2s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'crossing', 'dribbling', 'freekick_accuracy', 'long_passing', 'positioning', 'reactions', 'sliding_tackle', 'short_passing', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['acceleration', 'crossing', 'dribbling', 'freekick_accuracy', 'long_passing', 'positioning', 'reactions', 'sliding_tackle', 'short_passing', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.1404013	total: 7.31ms	remaining: 358ms
1:	learn: 0.0504079	total: 14.8ms	remaining: 354ms
2:	learn: 0.0459279	total: 21.1ms	remaining: 330ms
3:	learn: 0.0421774	total: 28ms	remaining: 323ms
4:	learn: 0.0412879	total: 34.1ms	remaining: 307ms
5:	learn: 0.0382732	total: 41.8ms	remaining: 306ms
6:	

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.2s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.4130528	total: 6.03ms	remaining: 295ms
1:	learn: 0.3129966	total: 11.7ms	remaining: 282ms
2:	learn: 0.2676322	total: 17.1ms	remaining: 268ms
3:	learn: 0.2436042	total: 22.7ms	remaining: 261ms
4:	learn: 0.2295996	total: 28.6ms	remaining: 258ms
5:	learn: 0.2212242	total: 33.8ms	remaining: 248ms
6:	learn: 0.2031474	total: 40.4ms	remaining: 248ms
7:	learn: 0.1975909	total: 45.9ms	remaining: 241ms
8:	learn: 0.1955964	total: 51.2ms	remaining: 233ms
9:	learn: 0.1928121	total: 57.9ms	remaining: 231ms
10:	learn: 0.1906205	total: 62.9ms	remaining: 223ms
11:	learn: 0.1851469	total: 69.3ms	remaining: 220

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.5s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['curve', 'dribbling', 'finishing', 'interceptions', 'long_shots', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['curve', 'dribbling', 'finishing', 'interceptions', 'long_shots', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.2493514	total: 10.7ms	remaining: 522ms
1:	learn: 0.2135668	total: 20.2ms	remaining: 485ms
2:	learn: 0.2000012	total: 30.1ms	remaining: 472ms
3:	learn: 0.1936872	total: 38.8ms	remaining: 447ms
4:	learn: 0.1764913	total: 48.7ms	remaining: 438ms
5:	learn: 0.1714659	total: 58.3ms	remaining: 427ms
6:	learn: 0.1691989	total

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.6s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'agility', 'dribbling', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['acceleration', 'agility', 'dribbling', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.2594614	total: 9.69ms	remaining: 475ms
1:	learn: 0.2153049	total: 19.7ms	remaining: 473ms
2:	learn: 0.2033365	total: 29.1ms	remaining: 457ms
3:	learn: 0.1985681	total: 39.5ms	remaining: 455ms
4:	learn: 0.1916189	total: 51.4ms	remaining: 462ms
5:	learn: 0.1896178	total: 60.6ms	remaining: 444ms
6:	learn: 0.18543

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.5s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished
100%|██████████| 10/10 [00:01<00:00,  5.70it/s]


['acceleration', 'aggression', 'agility', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']


100%|██████████| 10/10 [00:01<00:00,  5.59it/s]


['acceleration', 'aggression', 'agility', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.2594614	total: 10.2ms	remaining: 499ms
1:	learn: 0.2158958	total: 20.2ms	remaining: 485ms
2:	learn: 0.2044334	total: 28.4ms	remaining: 446ms
3:	learn: 0.1978528	total: 37ms	remaining: 426ms
4:	learn: 0.1907494	total: 45.1ms	remaining: 406ms
5:	learn: 0.1795258	total: 53.6ms	remaining: 393ms
6:	learn: 0.1775166	total: 62.6ms	remaining: 384ms
7:	learn: 0.1762021	total: 72ms	remaining: 378ms
8:	learn: 0.1736527	total: 81.6ms	remaining: 372ms
9:	learn: 0.1716980	total: 89.4ms	remaining: 358ms
10:	learn: 0.1665152	total: 97.3ms	remaining: 345ms
11:	learn: 0.1654343	total: 106ms	remaining: 336ms
12:	learn: 0.1643928	total: 115ms	

100%|██████████| 10/10 [00:01<00:00,  5.64it/s]
[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.


['acceleration', 'aggression', 'agility', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']


[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.5s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'agility', 'crossing', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['acceleration', 'agility', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.2528144	total: 10.2ms	remaining: 500ms
1:	learn: 0.2133158	total: 19.5ms	remaining: 468ms
2:	learn: 0.1889540	total: 28.5ms	remaining: 446ms
3:	learn: 0.1833686	total: 37.2ms	remaining: 427ms
4:	learn: 0.1763956	total: 46.3ms	remaining: 417ms
5:	learn: 0.1743782	total: 55.3ms	remaining: 405ms
6:	learn: 0.1718701	t

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.5s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['aggression', 'ball_control', 'crossing', 'dribbling', 'long_passing', 'positioning', 'reactions', 'sliding_tackle', 'short_passing', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['aggression', 'ball_control', 'crossing', 'dribbling', 'long_passing', 'positioning', 'reactions', 'sliding_tackle', 'short_passing', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.2502073	total: 10.1ms	remaining: 495ms
1:	learn: 0.2158460	total: 19.1ms	remaining: 458ms
2:	learn: 0.2050627	total: 27.4ms	remaining: 429ms
3:	learn: 0.2005801	total: 34.9ms	remaining: 401ms
4:	learn: 0.1932818	total: 42.8ms	remaining: 385ms
5:	learn: 0.1869965	total: 50.2ms	remaining: 368ms
6:	learn: 0.183

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.3s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.5s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.209392
0:	learn: 0.5536707	total: 844us	remaining: 41.4ms
1:	learn: 0.4627609	total: 1.54ms	remaining: 36.9ms
2:	learn: 0.4064469	total: 2.19ms	remaining: 34.4ms
3:	learn: 0.3600303	total: 2.82ms	remaining: 32.5ms
4:	learn: 0.3247995	total: 3.44ms	remaining: 31ms
5:	learn: 0.2974647	total: 4.01ms	remaining: 29.4ms
6:	learn: 0.2774628	total: 4.57ms	remaining: 28.1ms
7:	learn: 0.2618608	total: 5.14ms	remaining: 27ms
8:	learn: 0.2502677	total: 5.84ms	remaining: 26.6ms
9:	learn: 0.2392708	total: 7.16ms	remaining: 28.6ms
10:	learn: 0.2315274	total: 7.8ms	remaining: 27.7ms
11:	learn: 0.2214625	total: 8.61ms	rema

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.0s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


Learning rate set to 0.209392
0:	learn: 0.4080848	total: 1.65ms	remaining: 80.9ms
1:	learn: 0.3038124	total: 3.24ms	remaining: 77.7ms
2:	learn: 0.2496843	total: 4.53ms	remaining: 70.9ms
3:	learn: 0.2257591	total: 5.78ms	remaining: 66.4ms
4:	learn: 0.2101475	total: 7.09ms	remaining: 63.8ms
5:	learn: 0.2020379	total: 8.39ms	remaining: 61.5ms
6:	learn: 0.1947711	total: 9.66ms	remaining: 59.3ms
7:	learn: 0.1909709	total: 11ms	remaining: 57.6ms
8:	learn: 0.1805366	total: 12.8ms	remaining: 58.3ms
9:	learn: 0.1783230	total: 14.3ms	remaining: 57.1ms
10:	learn: 0.1746977	total: 15.7ms	remaining: 55.5ms
11:	learn: 0.1698843	total: 17ms	remaining: 53.9ms
12:	learn: 0.1686405	total: 18.4ms	remaining: 52.3ms
13:	learn: 0.1639204	total: 19.6ms	remaining: 50.4ms
14:	learn: 0.1633413	total: 20.9ms	remaining: 48.7ms
15:	learn: 0.1613254	total: 22.1ms	remaining: 46.9ms
16:	learn: 0.1602830	total: 23.4ms	remaining: 45.5ms
17:	learn: 0.1589939	total: 24.8ms	remaining: 44.1ms
18:	learn: 0.1584951	total: 26

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.0s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'crossing', 'dribbling', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['acceleration', 'agility', 'crossing', 'dribbling', 'finishing', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.209392
0:	learn: 0.4080848	total: 1.37ms	remaining: 67ms
1:	learn: 0.3038124	total: 2.65ms	remaining: 63.5ms
2:	learn: 0.2496843	total: 3.75ms	remaining: 58.8ms
3:	learn: 0.2257591	total: 4.77ms	remaining: 54.8ms
4:	learn: 0.2101475	total: 5.96ms	remaining: 53.6ms
5:	learn: 0.2020379	total: 7.03ms	remaining: 51.5ms
6:	learn: 0.

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.0s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'agility', 'balance', 'dribbling', 'finishing', 'interceptions', 'marking', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']


100%|██████████| 10/10 [00:00<00:00, 40.85it/s]


['acceleration', 'agility', 'finishing', 'interceptions', 'jumping', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']


100%|██████████| 10/10 [00:00<00:00, 31.91it/s]


['acceleration', 'agility', 'finishing', 'interceptions', 'jumping', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.209392
0:	learn: 0.4080848	total: 1.48ms	remaining: 72.5ms
1:	learn: 0.3038124	total: 2.97ms	remaining: 71.2ms
2:	learn: 0.2488519	total: 4.63ms	remaining: 72.5ms
3:	learn: 0.2229313	total: 5.79ms	remaining: 66.7ms
4:	learn: 0.2063181	total: 6.95ms	remaining: 62.6ms
5:	learn: 0.1986307	total: 8.06ms	remaining: 59.1ms
6:	learn: 0.1916147	total: 9.13ms	remaining: 56.1ms
7:	learn: 0.1878090	total: 10.2ms	remaining: 53.4ms
8:	learn: 0.1798967	total: 11.5ms	remaining: 52.6ms
9:	learn: 0.1773761	total: 12.7ms	remaining: 51ms
10:	learn: 0.1733366	total: 14ms	remaining: 49.5ms
11:	learn: 0.1666579	total: 15.1ms	remaining: 47.8ms
12:	learn: 0.166109

100%|██████████| 10/10 [00:00<00:00, 42.18it/s]
[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.0s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'agility', 'finishing', 'interceptions', 'jumping', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['acceleration', 'agility', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['acceleration', 'agility', 'crossing', 'dribbling', 'finishing', 'interceptions', 'marking', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.20

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.0s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


14:	learn: 0.1660592	total: 17.8ms	remaining: 41.5ms
15:	learn: 0.1646557	total: 19.1ms	remaining: 40.6ms
16:	learn: 0.1592600	total: 20.4ms	remaining: 39.7ms
17:	learn: 0.1584491	total: 21.8ms	remaining: 38.7ms
18:	learn: 0.1565251	total: 23ms	remaining: 37.5ms
19:	learn: 0.1549662	total: 24.3ms	remaining: 36.4ms
20:	learn: 0.1536330	total: 25.4ms	remaining: 35.1ms
21:	learn: 0.1526038	total: 26.7ms	remaining: 33.9ms
22:	learn: 0.1514940	total: 27.9ms	remaining: 32.7ms
23:	learn: 0.1495231	total: 29.1ms	remaining: 31.5ms
24:	learn: 0.1475724	total: 30.4ms	remaining: 30.4ms
25:	learn: 0.1473600	total: 31.6ms	remaining: 29.2ms
26:	learn: 0.1439455	total: 32.8ms	remaining: 28ms
27:	learn: 0.1425090	total: 34ms	remaining: 26.7ms
28:	learn: 0.1416465	total: 35.2ms	remaining: 25.5ms
29:	learn: 0.1405651	total: 36.7ms	remaining: 24.5ms
30:	learn: 0.1387317	total: 38ms	remaining: 23.3ms
31:	learn: 0.1382159	total: 39.4ms	remaining: 22.1ms
32:	learn: 0.1367558	total: 40.7ms	remaining: 21ms
33:

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.0s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.4069201	total: 6.36ms	remaining: 312ms
1:	learn: 0.3023270	total: 10.3ms	remaining: 248ms
2:	learn: 0.2537475	total: 14.5ms	remaining: 228ms
3:	learn: 0.2291273	total: 19ms	remaining: 219ms
4:	learn: 0.2150771	total: 24ms	remaining: 216ms
5:	learn: 0.2055539	total: 27.6ms	remaining: 203ms
6:	learn: 0.1977461	total: 31ms	remaining: 190ms
7:	learn: 0.1935256	total: 36.3ms	remaining: 190ms
8:	learn: 0.1886735	total: 42.7ms	remaining: 195ms
9:	learn: 0.1829972	total: 46.8ms	remaining: 187ms
10:	learn: 0.1696481	total: 52.2ms	remaining: 185ms
11:	learn: 0.1671014	total: 56.6ms	remaining: 179ms
12:

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.9s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['curve', 'dribbling', 'finishing', 'interceptions', 'long_shots', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['curve', 'dribbling', 'finishing', 'interceptions', 'long_shots', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.2447881	total: 12.5ms	remaining: 613ms
1:	learn: 0.2064001	total: 27.5ms	remaining: 661ms
2:	learn: 0.1927752	total: 40.7ms	remaining: 638ms
3:	learn: 0.1879067	total: 56.9ms	remaining: 655ms
4:	learn: 0.1812180	total: 99.4ms	remaining: 894ms
5:	learn: 0.1772414	total: 126ms	remaining: 921ms
6:	learn: 0.1747238	total:

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.7s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    1.2s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'crossing', 'finishing', 'freekick_accuracy', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['acceleration', 'agility', 'crossing', 'finishing', 'freekick_accuracy', 'interceptions', 'marking', 'sliding_tackle', 'standing_tackle', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.2460452	total: 8.95ms	remaining: 439ms
1:	learn: 0.2071820	total: 17.6ms	remaining: 422ms
2:	learn: 0.1934867	total: 25.1ms	remaining: 394ms
3:	learn: 0.1858625	total: 34.7ms	remaining: 399ms
4:	learn: 0.1820421	total: 41.9ms	remaining: 377ms
5:	learn: 0.1684461	total: 49.3ms	remaining: 362ms
6:	

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.8s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished
100%|██████████| 10/10 [00:01<00:00,  5.61it/s]


['acceleration', 'aggression', 'agility', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']


100%|██████████| 10/10 [00:01<00:00,  5.29it/s]


['acceleration', 'aggression', 'agility', 'finishing', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.2485223	total: 11.1ms	remaining: 542ms
1:	learn: 0.2064748	total: 20.5ms	remaining: 493ms
2:	learn: 0.1923875	total: 30.8ms	remaining: 483ms
3:	learn: 0.1862458	total: 39.9ms	remaining: 459ms
4:	learn: 0.1829554	total: 49.4ms	remaining: 445ms
5:	learn: 0.1695097	total: 58.3ms	remaining: 428ms
6:	learn: 0.1639866	total: 67.9ms	remaining: 417ms
7:	learn: 0.1593753	total: 77.9ms	remaining: 409ms
8:	learn: 0.1577716	total: 86.3ms	remaining: 393ms
9:	learn: 0.1543565	total: 97.2ms	remaining: 389ms
10:	learn: 0.1502415	total: 110ms	remaining: 391ms
11:	learn: 0.1423054	total: 120ms	remaining: 381ms
12:	learn: 0.1410157	total: 131

100%|██████████| 10/10 [00:01<00:00,  5.53it/s]
[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.


['acceleration', 'agility', 'finishing', 'interceptions', 'jumping', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']


[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.6s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    1.0s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['acceleration', 'agility', 'finishing', 'heading_accuracy', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['acceleration', 'agility', 'finishing', 'heading_accuracy', 'interceptions', 'marking', 'positioning', 'sliding_tackle', 'sprint_speed', 'standing_tackle', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.2459217	total: 8.5ms	remaining: 416ms
1:	learn: 0.2058050	total: 16.3ms	remaining: 391ms
2:	learn: 0.1919659	total: 24.6ms	remaining: 385ms
3:	learn: 0.1864423	total: 33.1ms	remaining: 381ms
4:	learn: 0.1831706	total: 41ms	remaining: 369ms
5:	learn: 0.1767160	total: 51.1ms	remaining: 37

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.8s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished


['aggression', 'ball_control', 'crossing', 'dribbling', 'long_passing', 'positioning', 'reactions', 'sliding_tackle', 'short_passing', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
['aggression', 'ball_control', 'crossing', 'dribbling', 'long_passing', 'positioning', 'reactions', 'sliding_tackle', 'short_passing', 'volleys', 'height_cm', 'weight_kgs', 'age', 'start_x', 'start_y', 'end_x', 'end_y', 'distance_opponent', 'num_opponent_closer_goal', 'num_opponent_in_path', 'distance_dribble', 'is_take_on']
Learning rate set to 0.5
0:	learn: 0.2505837	total: 9.18ms	remaining: 450ms
1:	learn: 0.2049320	total: 18.5ms	remaining: 445ms
2:	learn: 0.1929809	total: 25.8ms	remaining: 405ms
3:	learn: 0.1884906	total: 34.4ms	remaining: 396ms
4:	learn: 0.1814109	total: 43.9ms	remaining: 395ms
5:	learn: 0.1740828	total: 53.4ms	remaining: 392ms
6:	learn: 0.170

[Parallel(n_jobs=-3)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=-3)]: Done  30 tasks      | elapsed:    0.5s
[Parallel(n_jobs=-3)]: Done  50 out of  50 | elapsed:    0.9s finished
[Parallel(n_jobs=10)]: Using backend ThreadingBackend with 10 concurrent workers.
[Parallel(n_jobs=10)]: Done  30 tasks      | elapsed:    0.0s
[Parallel(n_jobs=10)]: Done  50 out of  50 | elapsed:    0.0s finished
