We analyze all matches in 2015/16 season and train models using Logistic Regression and Random Forest. We choose these 2 algorithms because in the previous notebook with English Premier League data those algorithms were promising. 

In [1]:
## Importing required libraries
import sqlite3
import pandas as pd
import numpy as np
import seaborn as sns
import itertools
import matplotlib.pyplot as plt
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier 
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import linear_model
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report, accuracy_score
from sklearn.calibration import CalibratedClassifierCV
from sklearn import model_selection
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer
from time import time
from sklearn.decomposition import PCA, FastICA
from sklearn.pipeline import Pipeline
import warnings

warnings.simplefilter("ignore")

In [2]:
database = 'database.sqlite'
conn = sqlite3.connect(database)

In [3]:
#Fetching required data tables
country_data = pd.read_sql("SELECT * FROM Country;", conn)
league_data = pd.read_sql("SELECT * FROM League;", conn)
match_data = pd.read_sql("SELECT * FROM Match;", conn)
player_data = pd.read_sql("SELECT * FROM Player;", conn)
player_attr_data = pd.read_sql("SELECT * FROM Player_Attributes;", conn)
team_data = pd.read_sql("SELECT * FROM Team;", conn)
team_attr_data = pd.read_sql("SELECT * FROM Team_Attributes;", conn)

In [4]:
match_2015_season = match_data[match_data['season'] == '2015/2016']


In [5]:
season_sliced_columns = ['season', 'match_api_id', 'home_team_api_id', 'away_team_api_id', 'home_team_goal', 'away_team_goal',\
                 'home_player_1', 'home_player_2','home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',\
                 'home_player_7', 'home_player_8', 'home_player_9','home_player_10', 'home_player_11', 'away_player_1',\
                 'away_player_2', 'away_player_3', 'away_player_4', 'away_player_5','away_player_6', 'away_player_7',\
                 'away_player_8', 'away_player_9','away_player_10', 'away_player_11']

In [6]:
match_2015_season_sliced = match_2015_season[season_sliced_columns]

In [7]:
match_2015_season_sliced

Unnamed: 0,season,match_api_id,home_team_api_id,away_team_api_id,home_team_goal,away_team_goal,home_player_1,home_player_2,home_player_3,home_player_4,...,away_player_2,away_player_3,away_player_4,away_player_5,away_player_6,away_player_7,away_player_8,away_player_9,away_player_10,away_player_11
1488,2015/2016,1979832,9997,8342,2,1,242243.0,130738.0,164323.0,210392.0,...,543969.0,445873.0,305132.0,178293.0,109061.0,113868.0,26440.0,465730.0,37069.0,201915.0
1489,2015/2016,1979833,8571,9985,2,1,206592.0,94284.0,156693.0,157375.0,...,174054.0,277848.0,203755.0,33620.0,34183.0,512033.0,242719.0,38378.0,167057.0,202562.0
1490,2015/2016,1979834,9987,1773,3,1,91929.0,465750.0,195448.0,149219.0,...,131404.0,26078.0,179783.0,419238.0,280968.0,303202.0,38777.0,105680.0,409000.0,197848.0
1491,2015/2016,1979835,8573,8203,3,1,107806.0,95615.0,298941.0,30918.0,...,140932.0,186184.0,300977.0,181140.0,178283.0,173957.0,67896.0,157490.0,240044.0,77813.0
1492,2015/2016,1979836,10000,9994,3,1,37900.0,289883.0,563215.0,46232.0,...,149150.0,25791.0,213873.0,460004.0,38290.0,181066.0,277821.0,287341.0,523363.0,526230.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25974,2015/2016,1992091,10190,10191,1,0,42231.0,678384.0,95220.0,638592.0,...,563066.0,8800.0,67304.0,158253.0,133126.0,186524.0,93223.0,121115.0,232110.0,289732.0
25975,2015/2016,1992092,9824,10199,1,2,33272.0,41621.0,25813.0,257845.0,...,114792.0,150007.0,178119.0,27232.0,570830.0,260708.0,201704.0,36382.0,34082.0,95257.0
25976,2015/2016,1992093,9956,10179,2,0,157856.0,274779.0,177689.0,294256.0,...,67349.0,202663.0,32597.0,114794.0,188114.0,25840.0,482200.0,95230.0,451335.0,275122.0
25977,2015/2016,1992094,7896,10243,0,0,,8881.0,173534.0,39646.0,...,121080.0,197757.0,260964.0,231614.0,113235.0,41116.0,462608.0,42262.0,92252.0,194532.0


### Just 2015/16 Season

In [8]:
starting_date = '2015-09-01'
ending_date = '2016-01-01'

In [9]:
higher_up = player_attr_data[player_attr_data['date'] > starting_date]
player_overall_df = higher_up[higher_up['date'] <  ending_date].drop_duplicates(subset = ["player_api_id"])


## currently we are only concerned with overall_rating

player_overall_df = player_overall_df[['player_api_id', 'overall_rating']]

In [10]:
player_overall_df

Unnamed: 0,player_api_id,overall_rating
1,505942,67.0
8,155782,73.0
39,162549,66.0
65,30572,69.0
87,23780,70.0
...,...,...
183823,107281,73.0
183856,491794,58.0
183878,99031,80.0
183896,192132,64.0


## Join Data with Players

In [11]:
experimenting_df = match_2015_season_sliced

In [12]:
experimenting_df

Unnamed: 0,season,match_api_id,home_team_api_id,away_team_api_id,home_team_goal,away_team_goal,home_player_1,home_player_2,home_player_3,home_player_4,...,away_player_2,away_player_3,away_player_4,away_player_5,away_player_6,away_player_7,away_player_8,away_player_9,away_player_10,away_player_11
1488,2015/2016,1979832,9997,8342,2,1,242243.0,130738.0,164323.0,210392.0,...,543969.0,445873.0,305132.0,178293.0,109061.0,113868.0,26440.0,465730.0,37069.0,201915.0
1489,2015/2016,1979833,8571,9985,2,1,206592.0,94284.0,156693.0,157375.0,...,174054.0,277848.0,203755.0,33620.0,34183.0,512033.0,242719.0,38378.0,167057.0,202562.0
1490,2015/2016,1979834,9987,1773,3,1,91929.0,465750.0,195448.0,149219.0,...,131404.0,26078.0,179783.0,419238.0,280968.0,303202.0,38777.0,105680.0,409000.0,197848.0
1491,2015/2016,1979835,8573,8203,3,1,107806.0,95615.0,298941.0,30918.0,...,140932.0,186184.0,300977.0,181140.0,178283.0,173957.0,67896.0,157490.0,240044.0,77813.0
1492,2015/2016,1979836,10000,9994,3,1,37900.0,289883.0,563215.0,46232.0,...,149150.0,25791.0,213873.0,460004.0,38290.0,181066.0,277821.0,287341.0,523363.0,526230.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25974,2015/2016,1992091,10190,10191,1,0,42231.0,678384.0,95220.0,638592.0,...,563066.0,8800.0,67304.0,158253.0,133126.0,186524.0,93223.0,121115.0,232110.0,289732.0
25975,2015/2016,1992092,9824,10199,1,2,33272.0,41621.0,25813.0,257845.0,...,114792.0,150007.0,178119.0,27232.0,570830.0,260708.0,201704.0,36382.0,34082.0,95257.0
25976,2015/2016,1992093,9956,10179,2,0,157856.0,274779.0,177689.0,294256.0,...,67349.0,202663.0,32597.0,114794.0,188114.0,25840.0,482200.0,95230.0,451335.0,275122.0
25977,2015/2016,1992094,7896,10243,0,0,,8881.0,173534.0,39646.0,...,121080.0,197757.0,260964.0,231614.0,113235.0,41116.0,462608.0,42262.0,92252.0,194532.0


In [13]:
columns_to_loop = ['home_player_1', 'home_player_2','home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',\
                 'home_player_7', 'home_player_8', 'home_player_9','home_player_10', 'home_player_11', 'away_player_1',\
                 'away_player_2', 'away_player_3', 'away_player_4', 'away_player_5','away_player_6', 'away_player_7',\
                 'away_player_8', 'away_player_9','away_player_10', 'away_player_11']

In [14]:
column_names_overall = ['home_player_1_overall', 'home_player_2_overall','home_player_3_overall', 'home_player_4_overall', 'home_player_5_overall', 'home_player_6_overall',\
                 'home_player_7_overall', 'home_player_8_overall', 'home_player_9_overall','home_player_10_overall', 'home_player_11_overall', 'away_player_1_overall',\
                 'away_player_2_overall', 'away_player_3_overall', 'away_player_4_overall', 'away_player_5_overall','away_player_6_overall', 'away_player_7_overall',\
                 'away_player_8_overall', 'away_player_9_overall','away_player_10_overall', 'away_player_11_overall']

In [15]:
experimenting_df.columns

Index(['season', 'match_api_id', 'home_team_api_id', 'away_team_api_id',
       'home_team_goal', 'away_team_goal', 'home_player_1', 'home_player_2',
       'home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
       'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10',
       'home_player_11', 'away_player_1', 'away_player_2', 'away_player_3',
       'away_player_4', 'away_player_5', 'away_player_6', 'away_player_7',
       'away_player_8', 'away_player_9', 'away_player_10', 'away_player_11'],
      dtype='object')

In [16]:
for i in range(len(column_names_overall)):
    experimenting_df = experimenting_df.merge(player_overall_df,\
                                left_on= columns_to_loop[i], right_on='player_api_id', how='left', suffixes=('_1', '_2'))\
                                .rename(columns={"overall_rating": column_names_overall[i]})

In [17]:
experimenting_df.columns

Index(['season', 'match_api_id', 'home_team_api_id', 'away_team_api_id',
       'home_team_goal', 'away_team_goal', 'home_player_1', 'home_player_2',
       'home_player_3', 'home_player_4', 'home_player_5', 'home_player_6',
       'home_player_7', 'home_player_8', 'home_player_9', 'home_player_10',
       'home_player_11', 'away_player_1', 'away_player_2', 'away_player_3',
       'away_player_4', 'away_player_5', 'away_player_6', 'away_player_7',
       'away_player_8', 'away_player_9', 'away_player_10', 'away_player_11',
       'player_api_id_1', 'home_player_1_overall', 'player_api_id_2',
       'home_player_2_overall', 'player_api_id_1', 'home_player_3_overall',
       'player_api_id_2', 'home_player_4_overall', 'player_api_id_1',
       'home_player_5_overall', 'player_api_id_2', 'home_player_6_overall',
       'player_api_id_1', 'home_player_7_overall', 'player_api_id_2',
       'home_player_8_overall', 'player_api_id_1', 'home_player_9_overall',
       'player_api_id_2', 'home_p

### Create win/lose/draw labels (3 is win, 1 is draw, 0 is lose [based on point system])

In [18]:
conditions = [
    (experimenting_df['home_team_goal'] > experimenting_df['away_team_goal']),
    (experimenting_df['home_team_goal'] == experimenting_df['away_team_goal']),
    (experimenting_df['home_team_goal'] < experimenting_df['away_team_goal']),
    ]

conditions_values = [3.0, 1.0, 0.0]

experimenting_df['results'] = np.select(conditions, conditions_values)


In [19]:
np.unique(experimenting_df['results'])

array([0., 1., 3.])

## Final Dataset

In [20]:
final_df_columns = ['home_player_1_overall', 'home_player_2_overall','home_player_3_overall', 'home_player_4_overall', 'home_player_5_overall', 'home_player_6_overall',\
                 'home_player_7_overall', 'home_player_8_overall', 'home_player_9_overall','home_player_10_overall', 'home_player_11_overall', 'away_player_1_overall',\
                 'away_player_2_overall', 'away_player_3_overall', 'away_player_4_overall', 'away_player_5_overall','away_player_6_overall', 'away_player_7_overall',\
                 'away_player_8_overall', 'away_player_9_overall','away_player_10_overall', 'away_player_11_overall', 'results']

In [21]:
prepared_df = experimenting_df[final_df_columns]

In [22]:
prepared_df

Unnamed: 0,home_player_1_overall,home_player_2_overall,home_player_3_overall,home_player_4_overall,home_player_5_overall,home_player_6_overall,home_player_7_overall,home_player_8_overall,home_player_9_overall,home_player_10_overall,...,away_player_3_overall,away_player_4_overall,away_player_5_overall,away_player_6_overall,away_player_7_overall,away_player_8_overall,away_player_9_overall,away_player_10_overall,away_player_11_overall,results
0,69.0,65.0,75.0,66.0,64.0,60.0,69.0,65.0,69.0,67.0,...,73.0,72.0,72.0,76.0,76.0,74.0,67.0,73.0,72.0,3.0
1,69.0,66.0,73.0,70.0,68.0,71.0,67.0,69.0,69.0,68.0,...,69.0,73.0,72.0,71.0,66.0,73.0,67.0,71.0,73.0,3.0
2,72.0,70.0,74.0,73.0,72.0,64.0,72.0,71.0,70.0,68.0,...,67.0,68.0,62.0,67.0,65.0,70.0,66.0,63.0,65.0,3.0
3,70.0,69.0,68.0,74.0,70.0,72.0,66.0,68.0,70.0,72.0,...,66.0,68.0,67.0,66.0,72.0,68.0,67.0,70.0,69.0,3.0
4,72.0,63.0,64.0,69.0,69.0,68.0,70.0,70.0,69.0,64.0,...,72.0,70.0,64.0,71.0,66.0,72.0,69.0,64.0,65.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3321,63.0,53.0,63.0,60.0,65.0,59.0,65.0,62.0,66.0,67.0,...,61.0,57.0,65.0,63.0,65.0,65.0,65.0,60.0,67.0,3.0
3322,62.0,57.0,62.0,58.0,59.0,62.0,62.0,60.0,66.0,55.0,...,64.0,64.0,68.0,53.0,62.0,67.0,65.0,68.0,70.0,0.0
3323,63.0,60.0,63.0,69.0,62.0,75.0,67.0,72.0,68.0,74.0,...,66.0,73.0,65.0,63.0,68.0,66.0,68.0,66.0,72.0,3.0
3324,,64.0,66.0,53.0,61.0,60.0,60.0,64.0,61.0,59.0,...,63.0,62.0,58.0,61.0,74.0,60.0,73.0,67.0,68.0,1.0


In [23]:
prepared_df = prepared_df.dropna()

In [24]:
prepared_df

Unnamed: 0,home_player_1_overall,home_player_2_overall,home_player_3_overall,home_player_4_overall,home_player_5_overall,home_player_6_overall,home_player_7_overall,home_player_8_overall,home_player_9_overall,home_player_10_overall,...,away_player_3_overall,away_player_4_overall,away_player_5_overall,away_player_6_overall,away_player_7_overall,away_player_8_overall,away_player_9_overall,away_player_10_overall,away_player_11_overall,results
0,69.0,65.0,75.0,66.0,64.0,60.0,69.0,65.0,69.0,67.0,...,73.0,72.0,72.0,76.0,76.0,74.0,67.0,73.0,72.0,3.0
1,69.0,66.0,73.0,70.0,68.0,71.0,67.0,69.0,69.0,68.0,...,69.0,73.0,72.0,71.0,66.0,73.0,67.0,71.0,73.0,3.0
2,72.0,70.0,74.0,73.0,72.0,64.0,72.0,71.0,70.0,68.0,...,67.0,68.0,62.0,67.0,65.0,70.0,66.0,63.0,65.0,3.0
3,70.0,69.0,68.0,74.0,70.0,72.0,66.0,68.0,70.0,72.0,...,66.0,68.0,67.0,66.0,72.0,68.0,67.0,70.0,69.0,3.0
4,72.0,63.0,64.0,69.0,69.0,68.0,70.0,70.0,69.0,64.0,...,72.0,70.0,64.0,71.0,66.0,72.0,69.0,64.0,65.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3319,61.0,60.0,64.0,63.0,66.0,60.0,65.0,55.0,65.0,68.0,...,56.0,57.0,60.0,63.0,65.0,65.0,65.0,65.0,67.0,1.0
3321,63.0,53.0,63.0,60.0,65.0,59.0,65.0,62.0,66.0,67.0,...,61.0,57.0,65.0,63.0,65.0,65.0,65.0,60.0,67.0,3.0
3322,62.0,57.0,62.0,58.0,59.0,62.0,62.0,60.0,66.0,55.0,...,64.0,64.0,68.0,53.0,62.0,67.0,65.0,68.0,70.0,0.0
3323,63.0,60.0,63.0,69.0,62.0,75.0,67.0,72.0,68.0,74.0,...,66.0,73.0,65.0,63.0,68.0,66.0,68.0,66.0,72.0,3.0


### Logistic Regression

In [25]:
features = prepared_df.drop('results', axis=1)

In [26]:
features

Unnamed: 0,home_player_1_overall,home_player_2_overall,home_player_3_overall,home_player_4_overall,home_player_5_overall,home_player_6_overall,home_player_7_overall,home_player_8_overall,home_player_9_overall,home_player_10_overall,...,away_player_2_overall,away_player_3_overall,away_player_4_overall,away_player_5_overall,away_player_6_overall,away_player_7_overall,away_player_8_overall,away_player_9_overall,away_player_10_overall,away_player_11_overall
0,69.0,65.0,75.0,66.0,64.0,60.0,69.0,65.0,69.0,67.0,...,67.0,73.0,72.0,72.0,76.0,76.0,74.0,67.0,73.0,72.0
1,69.0,66.0,73.0,70.0,68.0,71.0,67.0,69.0,69.0,68.0,...,68.0,69.0,73.0,72.0,71.0,66.0,73.0,67.0,71.0,73.0
2,72.0,70.0,74.0,73.0,72.0,64.0,72.0,71.0,70.0,68.0,...,66.0,67.0,68.0,62.0,67.0,65.0,70.0,66.0,63.0,65.0
3,70.0,69.0,68.0,74.0,70.0,72.0,66.0,68.0,70.0,72.0,...,70.0,66.0,68.0,67.0,66.0,72.0,68.0,67.0,70.0,69.0
4,72.0,63.0,64.0,69.0,69.0,68.0,70.0,70.0,69.0,64.0,...,72.0,72.0,70.0,64.0,71.0,66.0,72.0,69.0,64.0,65.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3319,61.0,60.0,64.0,63.0,66.0,60.0,65.0,55.0,65.0,68.0,...,61.0,56.0,57.0,60.0,63.0,65.0,65.0,65.0,65.0,67.0
3321,63.0,53.0,63.0,60.0,65.0,59.0,65.0,62.0,66.0,67.0,...,54.0,61.0,57.0,65.0,63.0,65.0,65.0,65.0,60.0,67.0
3322,62.0,57.0,62.0,58.0,59.0,62.0,62.0,60.0,66.0,55.0,...,60.0,64.0,64.0,68.0,53.0,62.0,67.0,65.0,68.0,70.0
3323,63.0,60.0,63.0,69.0,62.0,75.0,67.0,72.0,68.0,74.0,...,67.0,66.0,73.0,65.0,63.0,68.0,66.0,68.0,66.0,72.0


In [27]:
target = prepared_df['results']

In [28]:
x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=0.25, random_state=0)

In [29]:
logisticRegr = LogisticRegression()


In [30]:
logisticRegr.fit(x_train, y_train)

LogisticRegression()

In [31]:
logisticRegr.score(x_train, y_train)

0.49630801687763715

In [32]:
y_pred = logisticRegr.predict(x_test)

In [33]:
accuracy_log_reg = accuracy_score(y_test, y_pred)
accuracy_log_reg

0.5339652448657188

In [34]:
logisticRegr_ovr = LogisticRegression(multi_class = "ovr", solver="newton-cg")

In [35]:
logisticRegr_ovr.fit(x_train, y_train)

LogisticRegression(multi_class='ovr', solver='newton-cg')

In [36]:
logisticRegr_ovr.score(x_train, y_train)

0.4947257383966245

In [37]:
y_pred_ovr = logisticRegr_ovr.predict(x_test)

In [38]:
accuracy_log_reg_ovr = accuracy_score(y_test, y_pred_ovr)
accuracy_log_reg_ovr

0.5387045813586098

### Random Forest with hyper parameter tuning

In [39]:
from sklearn.model_selection import RandomizedSearchCV
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
print(random_grid)
{'bootstrap': [True, False],
 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
 'max_features': ['auto', 'sqrt'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]}

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt'], 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None], 'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 2, 4], 'bootstrap': [True, False]}


{'bootstrap': [True, False],
 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
 'max_features': ['auto', 'sqrt'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]}

In [40]:
rand_forest = RandomForestClassifier()

In [41]:
rf_random = RandomizedSearchCV(estimator = rand_forest, param_distributions = random_grid,\
                               n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)

In [42]:
rf_random.fit(x_train, y_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


RandomizedSearchCV(cv=3, estimator=RandomForestClassifier(), n_iter=100,
                   n_jobs=-1,
                   param_distributions={'bootstrap': [True, False],
                                        'max_depth': [10, 20, 30, 40, 50, 60,
                                                      70, 80, 90, 100, 110,
                                                      None],
                                        'max_features': ['auto', 'sqrt'],
                                        'min_samples_leaf': [1, 2, 4],
                                        'min_samples_split': [2, 5, 10],
                                        'n_estimators': [200, 400, 600, 800,
                                                         1000, 1200, 1400, 1600,
                                                         1800, 2000]},
                   random_state=42, verbose=2)

In [43]:
best_random = rf_random.best_estimator_

In [44]:
best_random.fit(x_train, y_train)

RandomForestClassifier(max_depth=60, max_features='sqrt', min_samples_leaf=2,
                       min_samples_split=10, n_estimators=600)

In [45]:
y_pred_best_random_forest = best_random.predict(x_test)

In [46]:
accuracy_score_best_random = accuracy_score(y_test, y_pred_best_random_forest)
accuracy_score_best_random

0.5197472353870458