# Engine Training

In [1]:
# Importing Dependencies
from sklearn.preprocessing import RobustScaler, MinMaxScaler, MaxAbsScaler, StandardScaler, PowerTransformer, QuantileTransformer
from sklearn.ensemble import VotingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from epl_pipeline import build_pipeline
from lightgbm import LGBMClassifier
from epl_datasets import load_data

import pandas as pd
import numpy as np
import warnings
import pickle
import skopt
import yaml

In [2]:
warnings.filterwarnings(action = "ignore")
pd.options.display.max_columns = None
np.random.seed(seed = 42)

Let"s load the yaml file, extract the dependent variables and assign it to a global variable.

In [3]:
# Loading the yaml file
with open(file = "../Configuration/config.yml") as yaml_file:
    config = yaml.safe_load(stream = yaml_file)

# Loading the dependent variable for Win modeling
WIN_TARGET = config.get("target").get("win_model")

# Loading the dependent variable for Loss modeling
LOSS_TARGET = config.get("target").get("loss_model")

# Loading the dependent variable for Draw modeling
DRAW_TARGET = config.get("target").get("draw_model")

Let"s call the function to load the dataset.

In [4]:
# Calling the function to load the dataset
df = load_data()

# Displaying the first five records of the dataset
df.head()

Unnamed: 0,season,match_week,match_date,month,day,weekday,referee,home_team,away_team,stadium,attendance,derby_name,club_tier_h,club_tier_a,h_position,a_position,streak_h,streak_a,goals_h,goals_a,possession_h,possession_a,shots_on_target_h,shots_on_target_a,shots_h,shots_a,touches_h,touches_a,passes_h,passes_a,tackles_h,tackles_a,clearances_h,clearances_a,corners_h,corners_a,offsides_h,offsides_a,yellow_cards_h,yellow_cards_a,red_cards_h,red_cards_a,fouls_conceded_h,fouls_conceded_a,result_h,result_a,points_h,points_a,n_epls_h,n_epls_a,n_ucls_h,n_ucls_a,total_n_matches_played_h,total_n_matches_played_a,total_max_points_h,total_max_points_a,total_points_h_cum,total_avg_acc_points_h,total_avg_points_accumulated_last_3_h,total_avg_points_accumulated_last_5_h,total_points_a_cum,total_avg_acc_points_a,total_avg_points_accumulated_last_3_a,total_avg_points_accumulated_last_5_a,total_points_dropped_h,total_avg_dropped_points_h,total_avg_points_dropped_last_3_h,total_avg_points_dropped_last_5_h,total_points_dropped_a,total_avg_dropped_points_a,total_avg_points_dropped_last_3_a,total_avg_points_dropped_last_5_a,total_goals_scored_h,total_avg_goals_scored_h,total_avg_goals_scored_last_3_h,total_avg_goals_scored_last_5_h,total_goals_scored_a,total_avg_goals_scored_a,total_avg_goals_scored_last_3_a,total_avg_goals_scored_last_5_a,total_goals_conceded_h,total_avg_goals_conceded_h,total_avg_goals_conceded_last_3_h,total_avg_goals_conceded_last_5_h,total_goals_conceded_a,total_avg_goals_conceded_a,total_avg_goals_conceded_last_3_a,total_avg_goals_conceded_last_5_a,total_avg_possession_h,total_avg_possession_a,total_avg_possession_last_3_h,total_avg_possession_last_3_a,total_avg_possession_last_5_h,total_avg_possession_last_5_a,total_shots_on_target_cum_h,total_shots_on_target_cum_a,total_avg_shots_on_target_h,total_avg_shots_on_target_a,total_avg_shots_on_target_last_3_h,total_avg_shots_on_target_last_3_a,total_avg_shots_on_target_last_5_h,total_avg_shots_on_target_last_5_a,total_shots_cum_h,total_shots_cum_a,total_avg_shots_h,total_avg_shots_a,total_avg_shots_last_3_h,total_avg_shots_last_3_a,total_avg_shots_last_5_h,total_avg_shots_last_5_a,total_avg_touches_h,total_avg_touches_a,total_avg_touches_last_3_h,total_avg_touches_last_3_a,total_avg_touches_last_5_h,total_avg_touches_last_5_a,total_avg_passes_h,total_avg_passes_a,total_avg_passes_last_3_h,total_avg_passes_last_3_a,total_avg_passes_last_5_h,total_avg_passes_last_5_a,total_avg_tackles_h,total_avg_tackles_a,total_avg_tackles_last_3_h,total_avg_tackles_last_3_a,total_avg_tackles_last_5_h,total_avg_tackles_last_5_a,total_avg_clearances_h,total_avg_clearances_a,total_avg_clearances_last_3_h,total_avg_clearances_last_3_a,total_avg_clearances_last_5_h,total_avg_clearances_last_5_a,total_avg_corners_h,total_avg_corners_a,total_avg_corners_last_3_h,total_avg_corners_last_3_a,total_avg_corners_last_5_h,total_avg_corners_last_5_a,total_avg_offsides_h,total_avg_offsides_a,total_avg_offsides_last_3_h,total_avg_offsides_last_3_a,total_avg_offsides_last_5_h,total_avg_offsides_last_5_a,total_avg_yellow_cards_h,total_avg_yellow_cards_a,total_avg_yellow_cards_last_3_h,total_avg_yellow_cards_last_3_a,total_avg_yellow_cards_last_5_h,total_avg_yellow_cards_last_5_a,total_avg_fouls_conceded_h,total_avg_fouls_conceded_a,total_avg_fouls_conceded_last_3_h,total_avg_fouls_conceded_last_3_a,total_avg_fouls_conceded_last_5_h,total_avg_fouls_conceded_last_5_a,total_s2g_cum_ratio_h,total_s2g_ratio_last_3_h,total_s2g_ratio_last_5_h,total_s2g_cum_ratio_a,total_s2g_ratio_last_3_a,total_s2g_ratio_last_5_a,total_s2s_cum_ratio_h,total_s2s_ratio_last_3_h,total_s2s_ratio_last_5_h,total_s2s_cum_ratio_a,total_s2s_ratio_last_3_a,total_s2s_ratio_last_5_a,n_matches_played_h,n_matches_played_a,max_points_h,max_points_a,points_h_cum,avg_acc_points_h,avg_points_accumulated_last_3_h,avg_points_accumulated_last_5_h,points_a_cum,avg_acc_points_a,avg_points_accumulated_last_3_a,avg_points_accumulated_last_5_a,points_dropped_h,avg_dropped_points_h,avg_points_dropped_last_3_h,avg_points_dropped_last_5_h,points_dropped_a,avg_dropped_points_a,avg_points_dropped_last_3_a,avg_points_dropped_last_5_a,goals_scored_h_cum,avg_goals_scored_h,avg_goals_scored_last_3_h,avg_goals_scored_last_5_h,goals_scored_a_cum,avg_goals_scored_a,avg_goals_scored_last_3_a,avg_goals_scored_last_5_a,goals_conceded_h_cum,avg_goals_conceded_h,avg_goals_conceded_last_3_h,avg_goals_conceded_last_5_h,goals_conceded_a_cum,avg_goals_conceded_a,avg_goals_conceded_last_3_a,avg_goals_conceded_last_5_a,avg_possession_h,avg_possession_a,avg_possession_last_3_h,avg_possession_last_3_a,avg_possession_last_5_h,avg_possession_last_5_a,shots_on_target_h_cum,avg_shots_on_target_h,shots_on_target_a_cum,avg_shots_on_target_a,avg_shots_on_target_last_3_h,avg_shots_on_target_last_3_a,avg_shots_on_target_last_5_h,avg_shots_on_target_last_5_a,shots_h_cum,avg_shots_h,shots_a_cum,avg_shots_a,avg_shots_last_3_h,avg_shots_last_3_a,avg_shots_last_5_h,avg_shots_last_5_a,avg_touches_h,avg_touches_a,avg_touches_last_3_h,avg_touches_last_3_a,avg_touches_last_5_h,avg_touches_last_5_a,avg_passes_h,avg_passes_a,avg_passes_last_3_h,avg_passes_last_3_a,avg_passes_last_5_h,avg_passes_last_5_a,avg_tackles_h,avg_tackles_a,avg_tackles_last_3_h,avg_tackles_last_3_a,avg_tackles_last_5_h,avg_tackles_last_5_a,avg_clearances_h,avg_clearances_a,avg_clearances_last_3_h,avg_clearances_last_3_a,avg_clearances_last_5_h,avg_clearances_last_5_a,avg_corners_h,avg_corners_a,avg_corners_last_3_h,avg_corners_last_3_a,avg_corners_last_5_h,avg_corners_last_5_a,avg_offsides_h,avg_offsides_a,avg_offsides_last_3_h,avg_offsides_last_3_a,avg_offsides_last_5_h,avg_offsides_last_5_a,avg_yellow_cards_h,avg_yellow_cards_a,avg_yellow_cards_last_3_h,avg_yellow_cards_last_3_a,avg_yellow_cards_last_5_h,avg_yellow_cards_last_5_a,avg_fouls_conceded_h,avg_fouls_conceded_a,avg_fouls_conceded_last_3_h,avg_fouls_conceded_last_3_a,avg_fouls_conceded_last_5_h,avg_fouls_conceded_last_5_a,s2g_cum_ratio_h,s2g_ratio_last_3_h,s2g_ratio_last_5_h,s2g_cum_ratio_a,s2g_ratio_last_3_a,s2g_ratio_last_5_a,s2s_cum_ratio_h,s2s_ratio_last_3_h,s2s_ratio_last_5_h,s2s_cum_ratio_a,s2s_ratio_last_3_a,s2s_ratio_last_5_a,is_boxing_day,finished_top_4_last_season_h,finished_top_4_last_season_a,won_carabao_cup_last_season_h,won_carabao_cup_last_season_a,won_fa_cup_last_season_h,won_fa_cup_last_season_a,won_epl_last_season_h,won_epl_last_season_a,was_in_ucl_last_season_h,was_in_ucl_last_season_a,was_in_uel_last_season_h,was_in_uel_last_season_a,is_in_ucl_this_season_h,is_in_ucl_this_season_a,is_in_uel_this_season_h,is_in_uel_this_season_a,traditional_top_6_h,traditional_top_6_a,newly_promoted_h,newly_promoted_a,total_goal_difference_h,total_goal_difference_a,goal_difference_h,goal_difference_a,positive_total_goal_difference_h,positive_total_goal_difference_a,positive_goal_difference_h,positive_goal_difference_a,has_been_a_ucl_winner_h,has_been_a_ucl_winner_a,has_been_an_epl_winner_h,has_been_an_epl_winner_a,is_derby,ground_truth,home_win,draw,away_win,link
0,2006/07,1,2006-08-19,8,19,6,Phil Dowd,Bolton,Spurs,University of Bolton Stadium,22899.0,No Derby,Small Club,Considered as a Big Club,4,17,Out of Interval,Out of Interval,2,0,37.8,62.2,4,2,13,10,411,591,243,427,28,17,20,43,6,3,0,0,3,1,0,0,22,22,Win,Defeat,3,0,0,2,0,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,50.0,50.0,50.0,50.0,50.0,50.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,50.0,50.0,50.0,50.0,50.0,50.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,https://www.premierleague.com/match/5568
1,2006/07,1,2006-08-19,8,19,6,Mark Halsey,Reading,Middlesbrough,Madejski Stadium,23802.0,No Derby,Small Club,Small Club,15,12,Out of Interval,Out of Interval,3,2,52.1,47.9,8,7,14,11,515,441,335,328,23,28,9,18,8,2,0,1,1,3,0,0,7,16,Win,Defeat,3,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,50.0,50.0,50.0,50.0,50.0,50.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,50.0,50.0,50.0,50.0,50.0,50.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,https://www.premierleague.com/match/5572
2,2006/07,1,2006-08-19,8,19,6,Alan Wiley,Portsmouth,Blackburn,Fratton Park,19502.0,No Derby,Considered as a Big Club,Considered as a Big Club,14,3,Out of Interval,Out of Interval,3,0,44.3,55.7,11,3,21,8,415,527,279,327,23,19,15,28,6,2,5,0,2,0,0,1,22,17,Win,Defeat,3,0,2,3,0,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,50.0,50.0,50.0,50.0,50.0,50.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,50.0,50.0,50.0,50.0,50.0,50.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,https://www.premierleague.com/match/5571
3,2006/07,1,2006-08-19,8,19,6,Martin Atkinson,Newcastle,Wigan,Sports Direct Arena,52227.0,No Derby,Considered as a Big Club,Small Club,13,20,Out of Interval,Out of Interval,2,1,55.3,44.7,5,4,8,13,519,448,352,278,27,33,15,16,4,11,6,3,1,2,0,0,18,19,Win,Defeat,3,0,4,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,50.0,50.0,50.0,50.0,50.0,50.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,50.0,50.0,50.0,50.0,50.0,50.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,https://www.premierleague.com/match/5570
4,2006/07,1,2006-08-19,8,19,6,Peter Walton,Everton,Watford,Goodison Park,39691.0,No Derby,Big Club,Small Club,7,18,Out of Interval,Out of Interval,2,1,47.0,53.0,2,7,8,13,460,493,288,321,17,24,61,32,0,6,5,1,2,2,0,0,12,15,Win,Defeat,3,0,9,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,50.0,50.0,50.0,50.0,50.0,50.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0.0,0.0,50.0,50.0,50.0,50.0,50.0,50.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,1,0,0,https://www.premierleague.com/match/5569


Let's create train features and labels for each of the models.

In [5]:
# Creating train features for each of the models
X_train_win = df.drop(columns = WIN_TARGET)
X_train_loss = df.drop(columns = LOSS_TARGET)
X_train_draw = df.drop(columns = DRAW_TARGET)

# Creating train labels for each of the models
y_train_win = df[WIN_TARGET]
y_train_loss = df[LOSS_TARGET]
y_train_draw = df[DRAW_TARGET]

***
### Win Modeling

Let's call the function to build a classifier pipeline using **Logistic Regression** algorithm.

In [6]:
# Defining an operating level seed
np.random.seed(seed = 42)

# Creating a list of feature scaler instances
feature_scalers = [RobustScaler(), MinMaxScaler(), MaxAbsScaler(), StandardScaler(), PowerTransformer(), QuantileTransformer()]

# Creating a dictionary of hyperparameters
search_spaces = {}
search_spaces["feature_transformer__numeric_pipeline__feature_scaler"] = feature_scalers
search_spaces["feature_selector__percentile"] = skopt.space.Categorical(categories = list(range(10, 91, 10)))
search_spaces["classifier__C"] = skopt.space.Real(low = 1e-6, high = 100, prior = "log-uniform")
search_spaces["classifier__tol"] = skopt.space.Real(low = 1e-6, high = 100, prior = "log-uniform")

# Building a classifier
lr_model_win = build_pipeline(classifier = LogisticRegression(solver = "liblinear", random_state = 42), 
                              apply_feature_scaling = True,
                              feature_scaler_type = "robust",
                              winsorize_outliers = True,
                              apply_feature_selection = True,
                              feature_selection_type = "mutual_info",
                              apply_bayesian_optimization = True,
                              hyperparameters = search_spaces,
                              n_iterations = 100,
                              train_features = X_train_win, 
                              train_labels = y_train_win,
                              verbosity = 1)

# Displaying the architecture of the classifier
lr_model_win

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fi

Let's call the function to build a classifier pipeline using **Multi Layer Perceptron** algorithm.

In [7]:
# Defining an operating level seed
np.random.seed(seed = 42)

# Creating a dictionary of hyperparameters
search_spaces = {}
search_spaces["feature_transformer__numeric_pipeline__feature_scaler"] = feature_scalers
search_spaces["feature_selector__percentile"] = skopt.space.Categorical(categories = list(range(10, 91, 10)))
search_spaces["classifier__alpha"] = skopt.space.Real(low = 1e-8, high = 0.1, prior = "log-uniform")
search_spaces["classifier__learning_rate"] = skopt.space.Categorical(categories = ["constant", "invscaling", "adaptive"])
search_spaces["classifier__learning_rate_init"] = skopt.space.Real(low = 1e-8, high = 0.1, prior = "log-uniform")
search_spaces["classifier__tol"] = skopt.space.Real(low = 1e-8, high = 0.1, prior = "log-uniform")

# Building a classifier
mlp_model_win = build_pipeline(classifier = MLPClassifier(learning_rate = "adaptive", 
                                                          max_iter = 1000, 
                                                          shuffle = False, 
                                                          early_stopping = True,
                                                          n_iter_no_change = 3),  
                               apply_feature_scaling = True,
                               feature_scaler_type = "standard",
                               apply_feature_selection = True,
                               feature_selection_type = "mutual_info",
                               apply_bayesian_optimization = True,
                               hyperparameters = search_spaces,
                               n_iterations = 100,
                               train_features = X_train_win, 
                               train_labels = y_train_win, 
                               verbosity = 1)

# Displaying the architecture of the classifier
mlp_model_win

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fi

Let's call the function to build a classifier pipeline using **Light Gradient Boosted Machine (LightGBM)** algorithm.

In [8]:
# Defining an operating level seed
np.random.seed(seed = 42)

# Creating a dictionary of hyperparameters
search_spaces = {}
search_spaces["feature_selector__threshold"] = skopt.space.Categorical(categories = [None, "median"])
search_spaces["classifier__max_depth"] = skopt.space.Integer(low = 1, high = 6)
search_spaces["classifier__learning_rate"] = skopt.space.Real(low = 1e-8, high = 0.1, prior = "log-uniform")
search_spaces["classifier__n_estimators"] = skopt.space.Integer(low = 100, high = 300)
search_spaces["classifier__reg_alpha"] = skopt.space.Real(low = 1e-8, high = 0.1, prior = "log-uniform")
search_spaces["classifier__reg_lambda"] = skopt.space.Real(low = 1e-8, high = 0.1, prior = "log-uniform")

# Building a classifier
lgbm_model_win = build_pipeline(classifier = LGBMClassifier(objective = "binary", random_state = 42, n_jobs = -1),  
                                apply_feature_selection = True,
                                feature_selection_type = "meta",
                                apply_bayesian_optimization = True,
                                hyperparameters = search_spaces,
                                n_iterations = 100,
                                train_features = X_train_win, 
                                train_labels = y_train_win, 
                                verbosity = 1)
 
# Displaying the architecture of the classifier
lgbm_model_win

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fi

Let's create a **stacked** model containing previously trained three models.

In [9]:
# Creating a list of estimators
win_models = [('Logistic Regression', lr_model_win), 
              ('Light Gradient Boosted Machine', lgbm_model_win), 
              ('Multi Layer Perceptron', mlp_model_win)]

# Creating an estimator of stacked classifier
win_model = StackingClassifier(estimators = win_models, n_jobs = -1)

# Fitting the train features and labels
win_model.fit(X = X_train_win, y = y_train_win)

***
### Loss Modeling

Let's call the function to build a classifier pipeline using **Logistic Regression** algorithm.

In [10]:
# Defining an operating level seed
np.random.seed(seed = 42)

# Creating a dictionary of hyperparameters
search_spaces = {}
search_spaces["feature_transformer__numeric_pipeline__feature_scaler"] = feature_scalers
search_spaces["feature_selector__percentile"] = skopt.space.Categorical(categories = list(range(10, 91, 10)))
search_spaces["classifier__C"] = skopt.space.Real(low = 1e-6, high = 100, prior = "log-uniform")
search_spaces["classifier__tol"] = skopt.space.Real(low = 1e-6, high = 100, prior = "log-uniform")

# Building a classifier
lr_model_loss = build_pipeline(classifier = LogisticRegression(solver = "liblinear", random_state = 42), 
                               apply_feature_scaling = True,
                               feature_scaler_type = "robust",
                               winsorize_outliers = True,
                               apply_feature_selection = True,
                               feature_selection_type = "mutual_info",
                               apply_bayesian_optimization = True,
                               hyperparameters = search_spaces,
                               n_iterations = 100,
                               train_features = X_train_loss, 
                               train_labels = y_train_loss,
                               verbosity = 1)

# Displaying the architecture of the classifier
lr_model_loss

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fi

Let's call the function to build a classifier pipeline using **Multi Layer Perceptron** algorithm.

In [11]:
# Defining an operating level seed
np.random.seed(seed = 42)

# Creating a dictionary of hyperparameters
search_spaces = {}
search_spaces["feature_transformer__numeric_pipeline__feature_scaler"] = feature_scalers
search_spaces["feature_selector__percentile"] = skopt.space.Categorical(categories = list(range(10, 91, 10)))
search_spaces["classifier__alpha"] = skopt.space.Real(low = 1e-8, high = 0.1, prior = "log-uniform")
search_spaces["classifier__learning_rate"] = skopt.space.Categorical(categories = ["constant", "invscaling", "adaptive"])
search_spaces["classifier__learning_rate_init"] = skopt.space.Real(low = 1e-8, high = 0.1, prior = "log-uniform")
search_spaces["classifier__tol"] = skopt.space.Real(low = 1e-8, high = 0.1, prior = "log-uniform")

# Building a classifier
mlp_model_loss = build_pipeline(classifier = MLPClassifier(learning_rate = "adaptive", 
                                                           max_iter = 1000, 
                                                           shuffle = False, 
                                                           early_stopping = True,
                                                           n_iter_no_change = 3),  
                                apply_feature_scaling = True,
                                feature_scaler_type = "standard",
                                apply_feature_selection = True,
                                feature_selection_type = "mutual_info",
                                apply_bayesian_optimization = True,
                                hyperparameters = search_spaces,
                                n_iterations = 100,
                                train_features = X_train_loss, 
                                train_labels = y_train_loss, 
                                verbosity = 1)

# Displaying the architecture of the classifier
mlp_model_loss

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fi

Let's call the function to build a classifier pipeline using **Light Gradient Boosted Machine (LightGBM)** algorithm.

In [12]:
# Defining an operating level seed
np.random.seed(seed = 42)

# Creating a dictionary of hyperparameters
search_spaces = {}
search_spaces["feature_selector__threshold"] = skopt.space.Categorical(categories = [None, "median"])
search_spaces["classifier__max_depth"] = skopt.space.Integer(low = 1, high = 6)
search_spaces["classifier__learning_rate"] = skopt.space.Real(low = 1e-8, high = 0.1, prior = "log-uniform")
search_spaces["classifier__n_estimators"] = skopt.space.Integer(low = 100, high = 300)
search_spaces["classifier__reg_alpha"] = skopt.space.Real(low = 1e-8, high = 0.1, prior = "log-uniform")
search_spaces["classifier__reg_lambda"] = skopt.space.Real(low = 1e-8, high = 0.1, prior = "log-uniform")

# Building a classifier
lgbm_model_loss = build_pipeline(classifier = LGBMClassifier(objective = "binary", random_state = 42, n_jobs = -1),  
                                 apply_feature_selection = True,
                                 feature_selection_type = "meta",
                                 apply_bayesian_optimization = True,
                                 hyperparameters = search_spaces,
                                 n_iterations = 100,
                                 train_features = X_train_loss, 
                                 train_labels = y_train_loss, 
                                 verbosity = 1)
 
# Displaying the architecture of the classifier
lgbm_model_loss

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fi

Let's build **Voting** model based on majority vote using previously trained models.

In [13]:
# Creating a list of estimators
loss_models = [('Logistic Regression', lr_model_loss), 
               ('Light Gradient Boosted Machine', lgbm_model_loss), 
               ('Multi Layer Perceptron', mlp_model_loss)]

# Creating an estimator of voting classifier
loss_model = VotingClassifier(estimators = loss_models, voting = 'soft', n_jobs = -1)

# Fitting the train features and labels
loss_model.fit(X = X_train_loss, y = y_train_loss)

***
### Draw Modeling

Let's call the function to build a classifier pipeline using **Multi Layer Perceptron** algorithm.

In [14]:
# Defining an operating level seed
np.random.seed(seed = 42)

# Creating a dictionary of hyperparameters
search_spaces = {}
search_spaces["feature_transformer__numeric_pipeline__feature_scaler"] = feature_scalers
search_spaces["feature_selector__percentile"] = skopt.space.Categorical(categories = list(range(10, 91, 10)))
search_spaces["classifier__alpha"] = skopt.space.Real(low = 1e-8, high = 0.1, prior = "log-uniform")
search_spaces["classifier__learning_rate"] = skopt.space.Categorical(categories = ["constant", "invscaling", "adaptive"])
search_spaces["classifier__learning_rate_init"] = skopt.space.Real(low = 1e-8, high = 0.1, prior = "log-uniform")
search_spaces["classifier__tol"] = skopt.space.Real(low = 1e-8, high = 0.1, prior = "log-uniform")

# Building a classifier
draw_model = build_pipeline(classifier = MLPClassifier(learning_rate = "adaptive", 
                                                       max_iter = 1000, 
                                                       shuffle = False, 
                                                       early_stopping = True,
                                                       n_iter_no_change = 3),  
                            apply_feature_scaling = True,
                            feature_scaler_type = "standard",
                            apply_feature_selection = True,
                            feature_selection_type = "mutual_info",
                            apply_bayesian_optimization = True,
                            hyperparameters = search_spaces,
                            n_iterations = 100,
                            train_features = X_train_draw, 
                            train_labels = y_train_draw, 
                            verbosity = 1)

# Displaying the architecture of the classifier
draw_model

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fi

Let's save the classifier pipelines as pickle objects.

In [15]:
# Saving the classifier pipeline for the Win model as pickle object
with open(file = '../Models/win_model.pickle', mode = 'wb') as pickled_model:
    pickle.dump(obj = win_model, file = pickled_model)
    
# Saving the classifier pipeline for the Loss model as pickle object
with open(file = '../Models/loss_model.pickle', mode = 'wb') as pickled_model:
    pickle.dump(obj = loss_model, file = pickled_model)
    
# Saving the classifier pipeline for the Draw model as pickle object
with open(file = '../Models/draw_model.pickle', mode = 'wb') as pickled_model:
    pickle.dump(obj = draw_model, file = pickled_model)