# Notebook Imports

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import requests
from pydantic import BaseModel, ValidationError
from typing import List
from datetime import datetime
import pydantic_core
import concurrent.futures
import json

from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OrdinalEncoder
from tqdm import tqdm
from sklearn.model_selection import RandomizedSearchCV

import statistics
from statistics import mode

import joblib

import time

### Shortcuts

#### 1. [Tunning Hyperparameters for the First Pick - Phase 2](#fp2p_link)
##### 1.1. [Function for the First Pick - Phase 2](#fp23_func_link)
#### 2. [Tunning Hyperparameters for the First Pick - Phase 3](#fp3p_link)
##### 2.1. [Function for the First Pick - Phase 3](#fp45_func_link)
#### 3. [Tunning Hyperparameters for the Second Pick - Phase 1](#sp1p_link)
#### 4. [Tunning Hyperparameters for the Second Pick - Phase 2](#sp2p_link)
#### 5. [Tunning Hyperparameters for the Second Pick - Phase 3](#sp3p_link)

# Scraping & Parsing Data

## Creating Classes for Parsing

In [92]:
# Create classes for parsing
class Hero(BaseModel):
    code: str
    grade: str
    name: str
    job_cd: str
    attribute_cd: str

class Heroes(BaseModel):
    en: List[Hero]

class Player(BaseModel):
    nick_no: int
    world_code: str

class MyTeam(BaseModel):
    pick_order: int
    hero_code: str
    attack_damage: int
    receive_damage: float
    recovery: int
    mvp_point: int
    artifact: str
    equip: list
    respawn: int
    mvp: int
    kill_count: int
    attribute_cd: str
    job_cd: str

class TeamBettleInfoenemy(BaseModel):
    my_team: List[MyTeam]
    
class TeamBettleInfo(BaseModel):
    my_team: List[MyTeam]
      
class Battle(BaseModel):
    battleCompletedate: datetime
    teamBettleInfoenemy: str
    prebanListEnemy: str
    teamBettleInfo: str
    prebanList: str
    iswin: int

class ResultBody(BaseModel):
    nick_no: int
    battle_list: List[Battle]

class BattleListResponse(BaseModel):
    result_body: ResultBody

# Players list
class PlayerListResponse(BaseModel):
    result_body: List[Player]

# Hero list 
class HeroListResponse(BaseModel):
    result_body: List[Heroes]

## Collecting Players ID Numbers

In [12]:
# Create a function that collects query parameters to follow up with a request on battles of top 100 players 
nick_no_list = []
world_code_list = []

def get_query_data():
    try:
        player_resp = requests.post(
            url = 'https://epic7.gg.onstove.com/gameApi/getWorldUserRankingDetail?lang=en&season_code=&world_code=all',
            headers={
                'Accept-Language': 'en-US',
                'Accept': 'application/json, text/plain, */*',
            },
        )
        if player_resp.ok:
            player_response = PlayerListResponse.model_validate(player_resp.json())
            for i in range(len(player_response.result_body)):
                nick_no_list.append(player_response.result_body[i].nick_no)
                world_code_list.append(player_response.result_body[i].world_code)
            query_df = pd.DataFrame({'nick_no': nick_no_list, 'world_code': world_code_list})
            return query_df
        
        else:
            player_resp.raise_for_status()

    except Exception as e:
        print(e)
    

In [13]:
get_query_data()

Unnamed: 0,nick_no,world_code
0,119456895,world_kor
1,192119856,world_eu
2,85251058,world_global
3,72428245,world_asia
4,71212252,world_asia
...,...,...
95,182147370,world_eu
96,200148716,world_global
97,60502181,world_global
98,63047239,world_global


## Collecting Battle Statistics

In [14]:
# Create a function that requests and collects battle info of each player

#right side lists
right_pick_order = []
right_hero_code = []
right_attack_damage = []
right_receive_damage = []
right_recovery = []
right_mvp_point = []
right_artifact = []
right_equip = []
right_respawn= []
right_mvp= []
right_kill_count= []
right_attribute_cd= []
right_job_cd= []
right_postban = []
    
#left side lists
left_pick_order = []
left_hero_code = []
left_attack_damage = []
left_receive_damage = []
left_recovery = []
left_mvp_point = []
left_artifact = []
left_equip = []
left_respawn= []
left_mvp= []
left_kill_count= []
left_attribute_cd= []
left_job_cd= []
left_postban = []
    
right_preban = []
is_win = []
left_preban = []
first_pick = []

def get_player_battles():
#    try:
    #print(len(nick_no_list))
    for i in range(len(nick_no_list)):
    #for i in range(len(nick_no_list[:4])):
        #print(nick_no_list[i])
        resp = requests.post(
            url=f'https://epic7.gg.onstove.com/gameApi/getBattleList?nick_no={nick_no_list[i]}&world_code={world_code_list[i]}&lang=en&season_code=',
            headers={'Accept-Language': 'en-US', 'Accept': 'application/json, text/plain, */*'})
        #print(resp)
        if resp.ok:
            time.sleep(1)
            response = BattleListResponse.model_validate(resp.json())
            #print(response.result_body.battle_list)
            row_count = 0 # looking for rows with error in first pick for loop
            for j in range(len(response.result_body.battle_list)):
                response.result_body.battle_list[j].teamBettleInfoenemy = (  #preprocessing  data on right side characters and battle stats
                    TeamBettleInfoenemy.model_validate_json("{" + response.result_body.battle_list[j].teamBettleInfoenemy + "}"))
                for f in range(len(response.result_body.battle_list[j].teamBettleInfoenemy.my_team)): #gathering right side characters and battle stats
                    #print(len(response.result_body.battle_list[j].teamBettleInfoenemy.my_team))
                    #print(response.result_body.battle_list[j].teamBettleInfoenemy.my_team[f])
                    right_pick_order.append(response.result_body.battle_list[j].teamBettleInfoenemy.my_team[f].pick_order)
                    right_hero_code.append(response.result_body.battle_list[j].teamBettleInfoenemy.my_team[f].hero_code)
                    right_attack_damage.append(response.result_body.battle_list[j].teamBettleInfoenemy.my_team[f].attack_damage)
                    right_receive_damage.append(response.result_body.battle_list[j].teamBettleInfoenemy.my_team[f].receive_damage)
                    right_recovery.append(response.result_body.battle_list[j].teamBettleInfoenemy.my_team[f].recovery)
                    right_mvp_point.append(response.result_body.battle_list[j].teamBettleInfoenemy.my_team[f].mvp_point)
                    right_artifact.append(response.result_body.battle_list[j].teamBettleInfoenemy.my_team[f].artifact)
                    right_equip.append(response.result_body.battle_list[j].teamBettleInfoenemy.my_team[f].equip)
                    right_respawn.append(response.result_body.battle_list[j].teamBettleInfoenemy.my_team[f].respawn)
                    right_mvp.append(response.result_body.battle_list[j].teamBettleInfoenemy.my_team[f].mvp)
                    right_kill_count.append(response.result_body.battle_list[j].teamBettleInfoenemy.my_team[f].kill_count)
                    right_attribute_cd.append(response.result_body.battle_list[j].teamBettleInfoenemy.my_team[f].attribute_cd)
                    right_job_cd.append(response.result_body.battle_list[j].teamBettleInfoenemy.my_team[f].job_cd)
                    
                # preprocessing  data on right side bans and adding to the list
                right_side_preban = resp.json()['result_body']['battle_list'][j]['prebanListEnemy']
                json_string = f'{{ {right_side_preban} }}'
                right_side_preban_data = json.loads(json_string)
                right_preban.append(right_side_preban_data['preban_list'])
                
                for postban in range(len(resp.json()['result_body']['battle_list'][j]['enemy_deck']['hero_list'])):
                    if resp.json()['result_body']['battle_list'][j]['enemy_deck']['hero_list'][postban]['ban'] == 1:
                        is_right_postban = resp.json()['result_body']['battle_list'][j]['enemy_deck']['hero_list'][postban]['hero_code']
                        right_postban.append(is_right_postban)
                    # elif resp.json()['result_body']['battle_list'][j]['enemy_deck']['hero_list'][postban]['ban'] == 0:
                    #     pass
                    # else:
                    #     right_postban.append('no_postban_data')
        
                                
                response.result_body.battle_list[j].teamBettleInfo = (  #preprocessing  data on left side characters and battle stats
                    TeamBettleInfo.model_validate_json("{" + response.result_body.battle_list[j].teamBettleInfo + "}"))
                # print(len(response.result_body.battle_list[j].teamBettleInfo.my_team))
                for f in range(len(response.result_body.battle_list[j].teamBettleInfo.my_team)): #gathering left side characters and battle stats
                    left_pick_order.append(response.result_body.battle_list[j].teamBettleInfo.my_team[f].pick_order)
                    left_hero_code.append(response.result_body.battle_list[j].teamBettleInfo.my_team[f].hero_code)
                    left_attack_damage.append(response.result_body.battle_list[j].teamBettleInfo.my_team[f].attack_damage)
                    left_receive_damage.append(response.result_body.battle_list[j].teamBettleInfo.my_team[f].receive_damage)
                    left_recovery.append(response.result_body.battle_list[j].teamBettleInfo.my_team[f].recovery)
                    left_mvp_point.append(response.result_body.battle_list[j].teamBettleInfo.my_team[f].mvp_point)
                    left_artifact.append(response.result_body.battle_list[j].teamBettleInfo.my_team[f].artifact)
                    left_equip.append(response.result_body.battle_list[j].teamBettleInfo.my_team[f].equip)
                    left_respawn.append(response.result_body.battle_list[j].teamBettleInfo.my_team[f].respawn)
                    left_mvp.append(response.result_body.battle_list[j].teamBettleInfo.my_team[f].mvp)
                    left_kill_count.append(response.result_body.battle_list[j].teamBettleInfo.my_team[f].kill_count)
                    left_attribute_cd.append(response.result_body.battle_list[j].teamBettleInfo.my_team[f].attribute_cd)
                    left_job_cd.append(response.result_body.battle_list[j].teamBettleInfo.my_team[f].job_cd)
                                    
                #preprocessing  data on left side bans and adding to the list
                left_side_preban = resp.json()['result_body']['battle_list'][j]['prebanList']
                json_string = f'{{ {left_side_preban} }}'
                left_side_preban_data = json.loads(json_string)
                left_preban.append(left_side_preban_data['preban_list'])
                #print(resp.json()['result_body']['battle_list'][j]['my_deck']['hero_list'])
                for postban in range(len(resp.json()['result_body']['battle_list'][j]['my_deck']['hero_list'])):
                    print(resp.json()['result_body']['battle_list'][j]['my_deck']['hero_list'][postban])
                    if resp.json()['result_body']['battle_list'][j]['my_deck']['hero_list'][postban]['ban'] == 1:
                        is_left_postban = resp.json()['result_body']['battle_list'][j]['my_deck']['hero_list'][postban]['hero_code']
                        left_postban.append(is_left_postban)
                    # elif resp.json()['result_body']['battle_list'][j]['my_deck']['hero_list'][postban]['ban'] == 0:
                    #     print('Das ist chero')
                    # else:
                    #     left_postban.append('no_postban_data')
                        
                        
                #preprocessing  data on batte outcome and adding to the list
                right_side_win = resp.json()['result_body']['battle_list'][j]
                is_win.append(right_side_win['iswin'])
                #first pick
                #print(resp.json()['result_body']['battle_list'][j]['my_deck']['hero_list'])
                
                try:
                    left_first_pick = resp.json()['result_body']['battle_list'][j]['my_deck']['hero_list'][0]
                    first_pick.append(left_first_pick['first_pick'])
                    row_count +=1
                except IndexError: 
                    first_pick.append('no_fp_data')
                    left_postban.append('no_postban_data')
                    right_postban.append('no_postban_data')
                    print(f'No data on first pick in row {row_count}')
                
        
            
        else:
            resp.raise_for_status()
# except Exception as e:

#        print(e)

In [15]:
get_player_battles()

{'hero_code': 'c1159', 'first_pick': 1, 'mvp': 0, 'ban': 0}
{'hero_code': 'c2039', 'first_pick': 0, 'mvp': 0, 'ban': 0}
{'hero_code': 'c6037', 'first_pick': 0, 'mvp': 0, 'ban': 0}
{'hero_code': 'c1096', 'first_pick': 0, 'mvp': 0, 'ban': 1}
{'hero_code': 'c1038', 'first_pick': 0, 'mvp': 1, 'ban': 0}
{'hero_code': 'c2112', 'first_pick': 0, 'mvp': 0, 'ban': 0}
{'hero_code': 'c2042', 'first_pick': 0, 'mvp': 0, 'ban': 1}
{'hero_code': 'c1019', 'first_pick': 0, 'mvp': 1, 'ban': 0}
{'hero_code': 'c5016', 'first_pick': 0, 'mvp': 0, 'ban': 0}
{'hero_code': 'c1151', 'first_pick': 0, 'mvp': 0, 'ban': 0}
{'hero_code': 'c2112', 'first_pick': 1, 'mvp': 0, 'ban': 1}
{'hero_code': 'c2039', 'first_pick': 0, 'mvp': 0, 'ban': 0}
{'hero_code': 'c1129', 'first_pick': 0, 'mvp': 0, 'ban': 0}
{'hero_code': 'c2069', 'first_pick': 0, 'mvp': 1, 'ban': 0}
{'hero_code': 'c2102', 'first_pick': 0, 'mvp': 0, 'ban': 0}
{'hero_code': 'c1159', 'first_pick': 1, 'mvp': 0, 'ban': 0}
{'hero_code': 'c2039', 'first_pick': 0, 

In [19]:
# Checking the length of the lists prior to concatinating,
# since the scraped data was inconsistent and had missing values for variables first_pick,
# left_postban and right_postban.
print(len(first_pick), len(left_preban), len(right_preban),len(left_postban), len(right_postban),
      
      len(right_pick_order), len(right_hero_code), len(right_attack_damage), len(right_receive_damage),
      len(right_recovery), len(right_mvp_point), len(right_artifact), len(right_equip), len(right_respawn),
      len(right_mvp), len(right_kill_count), len(right_attribute_cd), len(right_job_cd),
      
      len(right_postban),
      
      len(left_pick_order), len(left_hero_code), len(left_attack_damage),
      len(left_receive_damage), len(left_recovery), len(left_mvp_point), len(left_artifact), len(left_equip),
      len(left_respawn), len(left_mvp), len(left_kill_count), len(left_attribute_cd), len(left_job_cd),
      
      len(left_postban), len(right_preban), len(is_win), len(left_preban), len(first_pick))

10000 10000 10000 10000 10000 49730 49730 49730 49730 49730 49730 49730 49730 49730 49730 49730 49730 49730 10000 49730 49730 49730 49730 49730 49730 49730 49730 49730 49730 49730 49730 49730 10000 10000 10000 10000 10000


## Concatinating Gathered Data

In [20]:
def make_df():
# Right side df
    right_side_df = pd.DataFrame({'right_pick_order': right_pick_order, 'right_hero_code': right_hero_code, 'right_attack_damage': right_attack_damage, 
    'right_receive_damage': right_receive_damage, 'right_recovery': right_recovery, 'right_mvp_point': right_mvp_point,
    'right_artifact': right_artifact, 'right_equip': right_equip, 'right_respawn': right_respawn, 'right_mvp': right_mvp, 
    'right_kill_count': right_kill_count, 'right_attribute_cd': right_attribute_cd, 'right_job_cd': right_job_cd})
            
# Left side df
    left_side_df = pd.DataFrame({'left_pick_order': left_pick_order, 'left_hero_code': left_hero_code, 'left_attack_damage': left_attack_damage, 
    'left_receive_damage': left_receive_damage, 'left_recovery': left_recovery, 'left_mvp_point': left_mvp_point,
    'left_artifact': left_artifact, 'left_equip': left_equip, 'left_respawn': left_respawn, 'left_mvp': left_mvp, 
    'left_kill_count': left_kill_count, 'left_attribute_cd': left_attribute_cd, 'left_job_cd': left_job_cd})
    
    df_1 = pd.concat([left_side_df, right_side_df], axis= 1)
    
    return df_1
    
# Post bans and first pick have different lenght and will be collected in a separate df
df_2 = pd.DataFrame({'left_preban': left_preban, 'left_postban': left_postban,
                     'right_preban': right_preban, 'right_postban': right_postban,
                     'first_pick': first_pick, 'is_win': is_win})
    


In [21]:
df_1 = make_df()
make_df()

Unnamed: 0,left_pick_order,left_hero_code,left_attack_damage,left_receive_damage,left_recovery,left_mvp_point,left_artifact,left_equip,left_respawn,left_mvp,...,right_receive_damage,right_recovery,right_mvp_point,right_artifact,right_equip,right_respawn,right_mvp,right_kill_count,right_attribute_cd,right_job_cd
0,1,c1159,0,0.00,0,8546,efw35,"[set_speed, set_max_hp]",0,0,...,27562.74,0,27562,efh14,"[set_acc, set_speed]",0,0,0,dark,manauser
1,2,c2039,1267,29297.90,4766,35331,efh06,"[set_counter, set_max_hp]",0,0,...,0.00,0,0,,[],0,0,0,light,knight
2,3,c6037,0,0.00,0,64096,efh20,"[set_immune, set_speed]",0,0,...,21514.00,128,36505,efa02,"[set_immune, set_speed]",0,0,0,fire,assassin
3,4,c1096,0,0.00,0,0,,[],0,0,...,70482.26,0,70482,ef507,"[set_res, set_speed]",0,0,0,dark,warrior
4,5,c1038,50131,0.00,0,67223,efa07,"[set_cri, set_att]",0,1,...,19075.44,2478,118116,ef504,"[set_speed, set_cri]",0,1,0,light,assassin
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49725,1,c2090,0,0.00,0,40181,efh04,"[set_speed, set_max_hp]",0,0,...,0.00,0,22495,efm03,"[set_speed, set_immune]",0,0,0,light,mage
49726,2,c2042,0,9581.75,0,29672,efk03,"[set_max_hp, set_shield]",0,0,...,0.00,0,31004,efa14,"[set_cri_dmg, set_penetrate]",0,1,0,wind,assassin
49727,3,c1156,0,37581.07,0,37581,efk21,"[set_max_hp, set_speed]",0,0,...,0.00,0,24744,efh12,"[set_speed, set_res]",0,0,0,light,manauser
49728,4,c1151,0,0.00,0,0,,[],0,0,...,0.00,0,0,,[],0,0,0,light,mage


### Saving to Pickle

In [22]:
# Save df in .pickle
df_1.to_pickle('./data_1.pickle')
df_2.to_pickle('./data_2.pickle')

In [93]:
df_read_1 =pd.read_pickle('./data_1.pickle').copy() #.copy to keep the original df
df_read_2 =pd.read_pickle('./data_2.pickle').copy()

In [94]:
df_read_1.tail()

Unnamed: 0,left_pick_order,left_hero_code,left_attack_damage,left_receive_damage,left_recovery,left_mvp_point,left_artifact,left_equip,left_respawn,left_mvp,...,right_receive_damage,right_recovery,right_mvp_point,right_artifact,right_equip,right_respawn,right_mvp,right_kill_count,right_attribute_cd,right_job_cd
49725,1,c2090,0,0.0,0,40181,efh04,"[set_speed, set_max_hp]",0,0,...,0.0,0,22495,efm03,"[set_speed, set_immune]",0,0,0,light,mage
49726,2,c2042,0,9581.75,0,29672,efk03,"[set_max_hp, set_shield]",0,0,...,0.0,0,31004,efa14,"[set_cri_dmg, set_penetrate]",0,1,0,wind,assassin
49727,3,c1156,0,37581.07,0,37581,efk21,"[set_max_hp, set_speed]",0,0,...,0.0,0,24744,efh12,"[set_speed, set_res]",0,0,0,light,manauser
49728,4,c1151,0,0.0,0,0,,[],0,0,...,0.0,0,0,,[],0,0,0,light,mage
49729,5,c2016,0,78403.23,0,83425,efk02,"[set_vampire, set_immune]",0,1,...,0.0,0,4988,efw01,"[set_scar, set_cri]",0,0,0,fire,warrior


In [95]:
df_read_1['left_pick_order'].value_counts()

left_pick_order
1    9946
2    9946
3    9946
4    9946
5    9946
Name: count, dtype: int64

In [96]:
df_read_1['right_pick_order'].value_counts()

right_pick_order
1    9946
2    9946
3    9946
4    9946
5    9946
Name: count, dtype: int64

## Reshaping the Dataframe 

In [97]:
# Create empty df with labels to be used in ML
df_reshaped = pd.DataFrame(columns= ['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3', 'left_pick_order_4', 'left_pick_order_5',                           #left
                                     'left_hero_code_1', 'left_hero_code_2', 'left_hero_code_3', 'left_hero_code_4', 'left_hero_code_5',
                                     'left_attack_damage_1', 'left_attack_damage_2', 'left_attack_damage_3', 'left_attack_damage_4', 'left_attack_damage_5',
                                    'left_receive_damage_1', 'left_receive_damage_2', 'left_receive_damage_3', 'left_receive_damage_4', 'left_receive_damage_5',
                                    'left_kill_count_1', 'left_kill_count_2', 'left_kill_count_3', 'left_kill_count_4', 'left_kill_count_5',
                                    'left_attribute_cd_1', 'left_attribute_cd_2', 'left_attribute_cd_3', 'left_attribute_cd_4', 'left_attribute_cd_5',
                                    'left_job_cd_1', 'left_job_cd_2', 'left_job_cd_3', 'left_job_cd_4', 'left_job_cd_5',
                                    'left_preban', 'left_postban',
                                    'right_pick_order_1', 'right_pick_order_2', 'right_pick_order_3', 'right_pick_order_4', 'right_pick_order_5',                          #right 
                                    'right_hero_code_1', 'right_hero_code_2', 'right_hero_code_3', 'right_hero_code_4', 'right_hero_code_5',
                                    'right_attack_damage_1', 'right_attack_damage_2', 'right_attack_damage_3', 'right_attack_damage_4', 'right_attack_damage_5',
                                    'right_receive_damage_1', 'right_receive_damage_2', 'right_receive_damage_3', 'right_receive_damage_4', 'right_receive_damage_5',
                                    'right_kill_count_1', 'right_kill_count_2', 'right_kill_count_3', 'right_kill_count_4', 'right_kill_count_5',
                                    'right_attribute_cd_1', 'right_attribute_cd_2', 'right_attribute_cd_3', 'right_attribute_cd_4', 'right_attribute_cd_5',
                                    'right_job_cd_1', 'right_job_cd_2', 'right_job_cd_3', 'right_job_cd_4', 'right_job_cd_5', 
                                    'right_preban', 'right_postban', 'first_pick', 'is_win'], 
                           index= np.arange(0, 10000))
    
df_reshaped.head()


Unnamed: 0,left_pick_order_1,left_pick_order_2,left_pick_order_3,left_pick_order_4,left_pick_order_5,left_hero_code_1,left_hero_code_2,left_hero_code_3,left_hero_code_4,left_hero_code_5,...,right_attribute_cd_5,right_job_cd_1,right_job_cd_2,right_job_cd_3,right_job_cd_4,right_job_cd_5,right_preban,right_postban,first_pick,is_win
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,


In [99]:
# Fill in the empty df with gathered data
counter = 0
pick_order_counter = 1
for index, row in df_read_1.iterrows():
    if pick_order_counter == 6:
        pick_order_counter = 1
        counter +=1
    df_reshaped.iloc[counter][f'left_pick_order_{pick_order_counter}'] = row['left_pick_order']
    df_reshaped.iloc[counter][f'left_hero_code_{pick_order_counter}'] = row['left_hero_code']
    df_reshaped.iloc[counter][f'left_attack_damage_{pick_order_counter}'] = row['left_attack_damage']
    df_reshaped.iloc[counter][f'left_receive_damage_{pick_order_counter}'] = row['left_receive_damage']
    df_reshaped.iloc[counter][f'left_kill_count_{pick_order_counter}'] = row['left_kill_count']
    df_reshaped.iloc[counter][f'left_attribute_cd_{pick_order_counter}'] = row['left_attribute_cd']
    df_reshaped.iloc[counter][f'left_job_cd_{pick_order_counter}'] = row['left_job_cd']
        
    df_reshaped.iloc[counter][f'right_pick_order_{pick_order_counter}'] = row['right_pick_order']
    df_reshaped.iloc[counter][f'right_hero_code_{pick_order_counter}'] = row['right_hero_code']
    df_reshaped.iloc[counter][f'right_attack_damage_{pick_order_counter}'] = row['right_attack_damage']
    df_reshaped.iloc[counter][f'right_receive_damage_{pick_order_counter}'] = row['right_receive_damage']
    df_reshaped.iloc[counter][f'right_kill_count_{pick_order_counter}'] = row['right_kill_count']
    df_reshaped.iloc[counter][f'right_attribute_cd_{pick_order_counter}'] = row['right_attribute_cd']
    df_reshaped.iloc[counter][f'right_job_cd_{pick_order_counter}'] = row['right_job_cd']
    
    pick_order_counter +=1    
    
    
df_reshaped['left_preban'] = df_read_2['left_preban']
df_reshaped['left_postban'] = df_read_2['left_postban']

    
df_reshaped['right_preban'] = df_read_2['right_preban']
df_reshaped['right_postban'] = df_read_2['right_postban']

df_reshaped['first_pick'] = df_read_2['first_pick']
df_reshaped['is_win'] = df_read_2['is_win']  

    
    

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_reshaped.iloc[counter][f'left_pick_order_{pick_order_counter}'] = row['left_pick_order']
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this

In [100]:
df_reshaped

Unnamed: 0,left_pick_order_1,left_pick_order_2,left_pick_order_3,left_pick_order_4,left_pick_order_5,left_hero_code_1,left_hero_code_2,left_hero_code_3,left_hero_code_4,left_hero_code_5,...,right_attribute_cd_5,right_job_cd_1,right_job_cd_2,right_job_cd_3,right_job_cd_4,right_job_cd_5,right_preban,right_postban,first_pick,is_win
0,1,2,3,4,5,c1159,c2039,c6037,c1096,c1038,...,light,manauser,knight,assassin,warrior,assassin,"[c2112, c2066]",c2042,1,1
1,1,2,3,4,5,c2112,c2042,c1019,c5016,c1151,...,light,knight,warrior,assassin,manauser,knight,"[c2039, c1055]",c2090,0,1
2,1,2,3,4,5,c2112,c2039,c1129,c2069,c2102,...,wind,knight,assassin,assassin,mage,assassin,"[c1118, c2066]",c1014,1,1
3,1,2,3,4,5,c1159,c2039,c6037,c1096,c1135,...,ice,manauser,knight,warrior,assassin,mage,"[c2112, c1125]",c1103,1,1
4,1,2,3,4,5,c1159,c2042,c6037,c1135,c1096,...,ice,manauser,manauser,knight,warrior,mage,"[c1151, c2112]",c2022,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,,,,,,,,,,,...,,,,,,,"[c2112, c2090]",c1134,1,1
9996,,,,,,,,,,,...,,,,,,,"[c2039, c1133]",c1055,0,2
9997,,,,,,,,,,,...,,,,,,,"[c1118, c1151]",c1103,0,2
9998,,,,,,,,,,,...,,,,,,,"[c1118, c1133]",c1055,1,1


# Preprocessing Data

In [101]:
# Drop rows with nan values and with missing values
df_reshaped.isna().sum()

left_pick_order_1    54
left_pick_order_2    54
left_pick_order_3    54
left_pick_order_4    54
left_pick_order_5    54
                     ..
right_job_cd_5       54
right_preban          0
right_postban         0
first_pick            0
is_win                0
Length: 76, dtype: int64

In [102]:
df_reshaped.dropna(inplace = True)
df_reshaped.reset_index(drop=True, inplace= True)
df_reshaped.isna().sum()

left_pick_order_1    0
left_pick_order_2    0
left_pick_order_3    0
left_pick_order_4    0
left_pick_order_5    0
                    ..
right_job_cd_5       0
right_preban         0
right_postban        0
first_pick           0
is_win               0
Length: 76, dtype: int64

In [103]:
# Filter out rows with missing values in columns: first_pick, left_postban and right_postban
df_reshaped = df_reshaped[(df_reshaped['first_pick'] != 'no_fp_data') & (df_reshaped['left_postban'] != 'no_preban_data') & (df_reshaped['right_postban'] != 'no_preban_data')]
df_reshaped

Unnamed: 0,left_pick_order_1,left_pick_order_2,left_pick_order_3,left_pick_order_4,left_pick_order_5,left_hero_code_1,left_hero_code_2,left_hero_code_3,left_hero_code_4,left_hero_code_5,...,right_attribute_cd_5,right_job_cd_1,right_job_cd_2,right_job_cd_3,right_job_cd_4,right_job_cd_5,right_preban,right_postban,first_pick,is_win
0,1,2,3,4,5,c1159,c2039,c6037,c1096,c1038,...,light,manauser,knight,assassin,warrior,assassin,"[c2112, c2066]",c2042,1,1
1,1,2,3,4,5,c2112,c2042,c1019,c5016,c1151,...,light,knight,warrior,assassin,manauser,knight,"[c2039, c1055]",c2090,0,1
2,1,2,3,4,5,c2112,c2039,c1129,c2069,c2102,...,wind,knight,assassin,assassin,mage,assassin,"[c1118, c2066]",c1014,1,1
3,1,2,3,4,5,c1159,c2039,c6037,c1096,c1135,...,ice,manauser,knight,warrior,assassin,mage,"[c2112, c1125]",c1103,1,1
4,1,2,3,4,5,c1159,c2042,c6037,c1135,c1096,...,ice,manauser,manauser,knight,warrior,mage,"[c1151, c2112]",c2022,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9941,1,2,3,4,5,c1159,c2016,c2008,c2066,c2106,...,fire,warrior,knight,manauser,mage,warrior,"[c1118, c2066]",c2088,0,2
9942,1,2,3,4,5,c2090,c1159,c2008,c5082,c1151,...,fire,knight,assassin,warrior,assassin,assassin,"[c2112, c1055]",c2090,0,2
9943,1,2,3,4,5,c2090,c1159,c2106,c2101,c2089,...,fire,manauser,assassin,manauser,warrior,assassin,"[c1055, c2066]",c5089,1,1
9944,1,2,3,4,5,c1159,c2016,c2008,c1117,c2106,...,dark,manauser,mage,assassin,assassin,warrior,"[c2109, c1117]",c2066,0,2


In [33]:
# Change hero codes in df_reshaped to hero names after
# gathering heroes codes and names

hero_names = []
hero_codes = []    

url = "https://static.smilegatemegaport.com/gameRecord/epic7/epic7_hero.json?_=1721065508349"

# Send a GET request to fetch the JSON data
response = requests.get(url)


# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the JSON data
    data = response.json()
    
# Iterate through each hero entry in the JSON data
    for hero in data['en']:
        hero_names.append(hero['name'])     
        hero_codes.append(hero['code'])     

heroes_df = pd.DataFrame(index = hero_codes, data = hero_names)

heroes_df.to_pickle('./heroes_names.pickle')

In [104]:
heroes_df = pd.read_pickle('./heroes_names.pickle')
heroes_df

Unnamed: 0,0
c0001,Mercedes
c0002,Mercedes
c1001,Ras
c1002,Cecilia
c1003,Rose
...,...
c6011,Last Piece Karin
c6014,Wandering Prince Cidd
c6017,Infinite Horizon Achates
c6037,Moon Bunny Dominiel


In [35]:
# Convert gathered data into a dictionary to convert hero codes into names
heroes_dic = heroes_df.to_dict()
print(heroes_dic[0])
print(type(heroes_dic))


{'c0001': 'Mercedes', 'c0002': 'Mercedes', 'c1001': 'Ras', 'c1002': 'Cecilia', 'c1003': 'Rose', 'c1004': 'Silk', 'c1005': 'Mercedes', 'c1006': 'Kise', 'c1007': 'Vildred', 'c1008': 'Armin', 'c1009': 'Charlotte', 'c1010': 'Zerato', 'c1011': 'Karin', 'c1012': 'Corvus', 'c1013': 'Cartuja', 'c1014': 'Cidd', 'c1015': 'Baal & Sezan', 'c1016': 'Yufine', 'c1017': 'Achates', 'c1018': 'Aither', 'c1019': 'Ravi', 'c1020': 'Schuri', 'c1021': 'Dingo', 'c1022': 'Ruele of Light', 'c1023': 'Kayron', 'c1024': 'Iseria', 'c1027': 'Charles', 'c1028': 'Clarissa', 'c1029': 'Leo', 'c1030': 'Yuna', 'c1031': 'Lots', 'c1032': 'Maya', 'c1033': 'Coli', 'c1034': 'Straze', 'c1035': 'Purrgis', 'c1036': 'Crozet', 'c1037': 'Dominiel', 'c1038': 'Sez', 'c1039': 'Haste', 'c1040': 'Serila', 'c1042': 'Tywin', 'c1043': 'Romann', 'c1044': 'Mui', 'c1046': 'Lidica', 'c1047': 'Ken', 'c1048': 'Aramintha', 'c1049': 'Chloe', 'c1050': 'Tenebria', 'c1053': 'Basar', 'c1054': 'Rin', 'c1055': 'Jenua', 'c1062': 'Angelica', 'c1065': 'Surin

In [105]:
# Add information on prebans to the df
df_reshaped[['left_preban_1', 'left_preban_2']] = pd.DataFrame(df_reshaped.left_preban.to_list(), index= df_reshaped.index)
#df_reshaped[['left_preban_1', 'left_preban_2', 'left_preban']]
df_reshaped[['right_preban_1', 'right_preban_2']] = pd.DataFrame(df_reshaped.right_preban.to_list(), index= df_reshaped.index)
df_reshaped.drop(['left_preban', 'right_preban'], axis = 1, inplace= True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reshaped[['left_preban_1', 'left_preban_2']] = pd.DataFrame(df_reshaped.left_preban.to_list(), index= df_reshaped.index)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reshaped[['left_preban_1', 'left_preban_2']] = pd.DataFrame(df_reshaped.left_preban.to_list(), index= df_reshaped.index)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/inde

In [106]:
df_reshaped[['left_preban_1', 'left_preban_2', 'right_preban_1', 'right_preban_2']]

Unnamed: 0,left_preban_1,left_preban_2,right_preban_1,right_preban_2
0,c1133,c2066,c2112,c2066
1,c1133,c2066,c2039,c1055
2,c1133,c2066,c1118,c2066
3,c1133,c2066,c2112,c1125
4,c2066,c1133,c1151,c2112
...,...,...,...,...
9941,c2112,c1055,c1118,c2066
9942,c2112,c1055,c2112,c1055
9943,c2112,c1055,c1055,c2066
9944,c2112,c1055,c2109,c1117


In [107]:
# Replace hero codes with names
df_reshaped.replace(to_replace= heroes_dic[0], inplace= True)
df_reshaped

NameError: name 'heroes_dic' is not defined

In [108]:
df_reshaped.columns

Index(['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3',
       'left_pick_order_4', 'left_pick_order_5', 'left_hero_code_1',
       'left_hero_code_2', 'left_hero_code_3', 'left_hero_code_4',
       'left_hero_code_5', 'left_attack_damage_1', 'left_attack_damage_2',
       'left_attack_damage_3', 'left_attack_damage_4', 'left_attack_damage_5',
       'left_receive_damage_1', 'left_receive_damage_2',
       'left_receive_damage_3', 'left_receive_damage_4',
       'left_receive_damage_5', 'left_kill_count_1', 'left_kill_count_2',
       'left_kill_count_3', 'left_kill_count_4', 'left_kill_count_5',
       'left_attribute_cd_1', 'left_attribute_cd_2', 'left_attribute_cd_3',
       'left_attribute_cd_4', 'left_attribute_cd_5', 'left_job_cd_1',
       'left_job_cd_2', 'left_job_cd_3', 'left_job_cd_4', 'left_job_cd_5',
       'left_postban', 'right_pick_order_1', 'right_pick_order_2',
       'right_pick_order_3', 'right_pick_order_4', 'right_pick_order_5',
       'right_hero_co

In [109]:
# Change df values to relevant dtypes
# df_reshaped.convert_dtypes().dtypes

# integers
df_reshaped[['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3',
       'left_pick_order_4', 'left_pick_order_5', 'left_attack_damage_1', 'left_attack_damage_2',
       'left_attack_damage_3', 'left_attack_damage_4', 'left_attack_damage_5',
       'left_receive_damage_1', 'left_receive_damage_2',
       'left_receive_damage_3', 'left_receive_damage_4',
       'left_receive_damage_5', 'left_kill_count_1', 'left_kill_count_2',
       'left_kill_count_3', 'left_kill_count_4', 'left_kill_count_5',
       'right_pick_order_1', 'right_pick_order_2',
       'right_pick_order_3', 'right_pick_order_4', 'right_pick_order_5', 'right_attack_damage_1',
       'right_attack_damage_2', 'right_attack_damage_3',
       'right_attack_damage_4', 'right_attack_damage_5',
       'right_receive_damage_1', 'right_receive_damage_2',
       'right_receive_damage_3', 'right_receive_damage_4',
       'right_receive_damage_5', 'right_kill_count_1', 'right_kill_count_2',
       'right_kill_count_3', 'right_kill_count_4', 'right_kill_count_5',
       'first_pick', 'is_win']] = df_reshaped[['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3',
       'left_pick_order_4', 'left_pick_order_5', 'left_attack_damage_1', 'left_attack_damage_2',
       'left_attack_damage_3', 'left_attack_damage_4', 'left_attack_damage_5',
       'left_receive_damage_1', 'left_receive_damage_2',
       'left_receive_damage_3', 'left_receive_damage_4',
       'left_receive_damage_5', 'left_kill_count_1', 'left_kill_count_2',
       'left_kill_count_3', 'left_kill_count_4', 'left_kill_count_5',
       'right_pick_order_1', 'right_pick_order_2',
       'right_pick_order_3', 'right_pick_order_4', 'right_pick_order_5', 'right_attack_damage_1',
       'right_attack_damage_2', 'right_attack_damage_3',
       'right_attack_damage_4', 'right_attack_damage_5',
       'right_receive_damage_1', 'right_receive_damage_2',
       'right_receive_damage_3', 'right_receive_damage_4',
       'right_receive_damage_5', 'right_kill_count_1', 'right_kill_count_2',
       'right_kill_count_3', 'right_kill_count_4', 'right_kill_count_5',
       'first_pick', 'is_win']].astype(int)


#strings
df_reshaped[['left_hero_code_1', 'left_hero_code_2', 'left_hero_code_3', 'left_hero_code_4',
       'left_hero_code_5', 'left_attribute_cd_1', 'left_attribute_cd_2', 'left_attribute_cd_3',
       'left_attribute_cd_4', 'left_attribute_cd_5', 'left_job_cd_1',
       'left_job_cd_2', 'left_job_cd_3', 'left_job_cd_4', 'left_job_cd_5',
       'left_postban','right_hero_code_1', 'right_hero_code_2', 'right_hero_code_3',
       'right_hero_code_4', 'right_hero_code_5', 'right_attribute_cd_1', 'right_attribute_cd_2', 'right_attribute_cd_3',
       'right_attribute_cd_4', 'right_attribute_cd_5', 'right_job_cd_1',
       'right_job_cd_2', 'right_job_cd_3', 'right_job_cd_4', 'right_job_cd_5',
       'right_postban', 'left_preban_1',
       'left_preban_2', 'right_preban_1', 'right_preban_2']] = df_reshaped[['left_hero_code_1', 'left_hero_code_2', 'left_hero_code_3', 'left_hero_code_4',
       'left_hero_code_5', 'left_attribute_cd_1', 'left_attribute_cd_2', 'left_attribute_cd_3',
       'left_attribute_cd_4', 'left_attribute_cd_5', 'left_job_cd_1',
       'left_job_cd_2', 'left_job_cd_3', 'left_job_cd_4', 'left_job_cd_5',
       'left_postban','right_hero_code_1', 'right_hero_code_2', 'right_hero_code_3',
       'right_hero_code_4', 'right_hero_code_5', 'right_attribute_cd_1', 'right_attribute_cd_2', 'right_attribute_cd_3',
       'right_attribute_cd_4', 'right_attribute_cd_5', 'right_job_cd_1',
       'right_job_cd_2', 'right_job_cd_3', 'right_job_cd_4', 'right_job_cd_5',
       'right_postban', 'left_preban_1',
       'left_preban_2', 'right_preban_1', 'right_preban_2']].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reshaped[['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3',
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reshaped[['left_hero_code_1', 'left_hero_code_2', 'left_hero_code_3', 'left_hero_code_4',


In [110]:
df_reshaped.describe()

Unnamed: 0,left_pick_order_1,left_pick_order_2,left_pick_order_3,left_pick_order_4,left_pick_order_5,left_attack_damage_1,left_attack_damage_2,left_attack_damage_3,left_attack_damage_4,left_attack_damage_5,...,right_receive_damage_3,right_receive_damage_4,right_receive_damage_5,right_kill_count_1,right_kill_count_2,right_kill_count_3,right_kill_count_4,right_kill_count_5,first_pick,is_win
count,9892.0,9892.0,9892.0,9892.0,9892.0,9892.0,9892.0,9892.0,9892.0,9892.0,...,9892.0,9892.0,9892.0,9892.0,9892.0,9892.0,9892.0,9892.0,9892.0,9892.0
mean,1.0,2.0,3.0,4.0,5.0,8358.296401,11651.846846,11073.439143,13027.751415,10190.808532,...,39982.515063,25037.167711,17982.807521,0.16023,0.230894,0.235342,0.290841,0.236454,0.496563,1.391832
std,0.0,0.0,0.0,0.0,0.0,12387.267146,15707.541664,16893.497811,18449.573722,16816.526785,...,37798.12589,32752.530231,28656.323167,0.461985,0.593552,0.609974,0.706874,0.635289,0.500013,0.488184
min,1.0,2.0,3.0,4.0,5.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
25%,1.0,2.0,3.0,4.0,5.0,0.0,0.0,0.0,0.0,0.0,...,13374.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
50%,1.0,2.0,3.0,4.0,5.0,2463.5,5411.5,3934.5,3230.5,0.0,...,27567.0,15180.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
75%,1.0,2.0,3.0,4.0,5.0,12761.75,17441.25,14322.0,21650.75,15992.0,...,57568.25,36902.0,25727.5,0.0,0.0,0.0,0.0,0.0,1.0,2.0
max,1.0,2.0,3.0,4.0,5.0,111032.0,132986.0,148284.0,125165.0,125145.0,...,310896.0,288966.0,293721.0,6.0,6.0,5.0,7.0,5.0,1.0,2.0


In [42]:
# Save preprocessed df in a pickle format
df_reshaped.to_pickle('./data_final.pickle')

# Explolatory Data Analysis

In [3]:
df_reshaped = pd.read_pickle('./data_final.pickle')

In [44]:
# Quick EDA by utilising ProfileReport
from ydata_profiling import ProfileReport

profile = ProfileReport(df_reshaped)
profile.to_file('./EDA.json')
profile.to_file('./EDA.html')

# Numeric data requires sclaling

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

(using `df.profile_report(correlations={"auto": {"calculate": False}})`
If this is problematic for your use case, please report this as an issue:
https://github.com/ydataai/ydata-profiling/issues
(include the error message: 'could not convert string to float: 'wind'')


Render JSON:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

# Feature & Target Split

In [5]:
# defining features and targets

# selected features for the 1st iter with Ordinal Encoding
features = df_reshaped.drop(['left_hero_code_1', 'left_hero_code_2', 'left_hero_code_3', 'left_hero_code_4',
       'left_hero_code_5'], axis = 1)

#selected features for the 2nd iter with Ordinal Encoding
# features = df_reshaped[['right_hero_code_1', 'right_hero_code_2', 'right_hero_code_3', 'right_hero_code_4', 'right_hero_code_5',
#                        'first_pick', 'right_postban', 'left_postban', 'left_preban_1', 'left_preban_2', 'right_preban_1', 'right_preban_2', 'is_win']]

target = df_reshaped[['left_hero_code_1', 'left_hero_code_2', 'left_hero_code_3', 'left_hero_code_4',
       'left_hero_code_5']]

In [113]:
features.head()

Unnamed: 0,left_pick_order_1,left_pick_order_2,left_pick_order_3,left_pick_order_4,left_pick_order_5,left_attack_damage_1,left_attack_damage_2,left_attack_damage_3,left_attack_damage_4,left_attack_damage_5,...,right_job_cd_3,right_job_cd_4,right_job_cd_5,right_postban,first_pick,is_win,left_preban_1,left_preban_2,right_preban_1,right_preban_2
0,1,2,3,4,5,0,1267,0,0,50131,...,assassin,warrior,assassin,Ambitious Tywin,1,1,Zio,New Moon Luna,Sea Phantom Politis,New Moon Luna
1,1,2,3,4,5,3112,0,53383,26621,2975,...,assassin,manauser,knight,Death Dealer Ray,0,1,Zio,New Moon Luna,Blood Moon Haste,Jenua
2,1,2,3,4,5,0,4084,6434,47260,3828,...,assassin,mage,assassin,Cidd,1,1,Zio,New Moon Luna,Ran,New Moon Luna
3,1,2,3,4,5,2043,16469,1228,32659,0,...,warrior,assassin,mage,Celine,1,1,Zio,New Moon Luna,Sea Phantom Politis,Peira
4,1,2,3,4,5,0,0,0,42649,32357,...,knight,warrior,mage,Destina,1,1,New Moon Luna,Zio,Nahkwol,Sea Phantom Politis


In [114]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size= 0.2, shuffle = True, random_state= 890)

# Encoding Categorical Data

### Ordinal Encoder

In [115]:
# Initializing an ordinal encoder for categorical variables
enc = OrdinalEncoder(handle_unknown = 'use_encoded_value', unknown_value = -1)

In [116]:
# Encoding the categorical features

# categorical labels for the 1st iter
categorical = ['left_attribute_cd_1', 'left_attribute_cd_2',
       'left_attribute_cd_3', 'left_attribute_cd_4', 'left_attribute_cd_5',
       'left_job_cd_1', 'left_job_cd_2', 'left_job_cd_3', 'left_job_cd_4',
       'left_job_cd_5', 'left_postban', 'right_hero_code_1', 'right_hero_code_2',
       'right_hero_code_3', 'right_hero_code_4', 'right_hero_code_5', 'right_attribute_cd_1',
       'right_attribute_cd_2', 'right_attribute_cd_3', 'right_attribute_cd_4',
       'right_attribute_cd_5', 'right_job_cd_1', 'right_job_cd_2',
       'right_job_cd_3', 'right_job_cd_4', 'right_job_cd_5', 'right_postban', 'left_preban_1', 'left_preban_2',
       'right_preban_1', 'right_preban_2']

X_train[categorical] = enc.fit_transform(X_train[categorical].to_numpy())
X_test[categorical] = enc.transform(X_test[categorical].to_numpy())

# categorical labels for the 2nd iter
# categorical = ['right_hero_code_1', 'right_hero_code_2', 'right_hero_code_3', 'right_hero_code_4', 'right_hero_code_5',
#                         'first_pick', 'right_postban', 'left_postban', 'left_preban_1', 'left_preban_2', 'right_preban_1', 'right_preban_2']
# X_train[categorical] = enc.fit_transform(X_train[categorical].to_numpy())
# X_test[categorical] = enc.transform(X_test[categorical].to_numpy())

In [117]:
X_train

Unnamed: 0,left_pick_order_1,left_pick_order_2,left_pick_order_3,left_pick_order_4,left_pick_order_5,left_attack_damage_1,left_attack_damage_2,left_attack_damage_3,left_attack_damage_4,left_attack_damage_5,...,right_job_cd_3,right_job_cd_4,right_job_cd_5,right_postban,first_pick,is_win,left_preban_1,left_preban_2,right_preban_1,right_preban_2
5234,1,2,3,4,5,2528,13612,5251,0,41299,...,2.0,2.0,2.0,106.0,1,1,19.0,17.0,32.0,37.0
7320,1,2,3,4,5,0,0,101229,0,883,...,1.0,1.0,1.0,45.0,0,2,4.0,18.0,32.0,33.0
2133,1,2,3,4,5,0,37189,533,9538,10111,...,5.0,2.0,2.0,1.0,0,1,23.0,19.0,39.0,30.0
6434,1,2,3,4,5,0,1043,1161,10008,55067,...,1.0,0.0,5.0,3.0,0,2,19.0,23.0,22.0,28.0
1313,1,2,3,4,5,0,0,0,0,0,...,2.0,2.0,2.0,30.0,0,2,5.0,23.0,39.0,30.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5713,1,2,3,4,5,14778,17639,32984,0,26004,...,2.0,1.0,1.0,62.0,0,2,27.0,19.0,13.0,19.0
3477,1,2,3,4,5,4385,23122,43164,22209,0,...,1.0,1.0,5.0,107.0,0,2,13.0,19.0,30.0,37.0
9204,1,2,3,4,5,13295,0,18039,68455,0,...,1.0,1.0,0.0,5.0,1,1,27.0,3.0,32.0,37.0
1142,1,2,3,4,5,0,2160,2153,18865,52701,...,1.0,0.0,5.0,39.0,0,1,23.0,19.0,39.0,30.0


In [118]:
y_test

Unnamed: 0,left_hero_code_1,left_hero_code_2,left_hero_code_3,left_hero_code_4,left_hero_code_5
3767,Laia,Abyssal Yufine,Ambitious Tywin,Albedo,Infinite Horizon Achates
3281,Blood Moon Haste,Savior Adin,Ran,Sea Phantom Politis,Death Dealer Ray
5924,Laia,Ambitious Tywin,Belian,Peira,Jenua
8028,Blood Moon Haste,Death Dealer Ray,Urban Shadow Choux,Crimson Armin,Celine
7041,New Moon Luna,Dragon Bride Senya,Navy Captain Landy,Laia,Jenua
...,...,...,...,...,...
6749,Laia,Abyssal Yufine,Crimson Armin,Moon Bunny Dominiel,Albedo
1464,Blood Moon Haste,Conqueror Lilias,Top Model Luluca,Politis,Nahkwol
5260,Jenua,Urban Shadow Choux,Silvertide Christy,Midnight Gala Lilias,Celine
8784,Dragon Bride Senya,Jenua,Moon Bunny Dominiel,Midnight Gala Lilias,Rimuru


# Scaling Numeric Data

In [119]:
from sklearn.preprocessing import StandardScaler

In [120]:
# Define numeric labels
numeric = ['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3',
       'left_pick_order_4', 'left_pick_order_5', 'left_attack_damage_1',
       'left_attack_damage_2', 'left_attack_damage_3', 'left_attack_damage_4',
       'left_attack_damage_5', 'left_receive_damage_1',
       'left_receive_damage_2', 'left_receive_damage_3',
       'left_receive_damage_4', 'left_receive_damage_5', 'left_kill_count_1',
       'left_kill_count_2', 'left_kill_count_3', 'left_kill_count_4',
       'left_kill_count_5', 'right_pick_order_1',
       'right_pick_order_2', 'right_pick_order_3', 'right_pick_order_4',
       'right_pick_order_5', 'right_attack_damage_1', 'right_attack_damage_2',
       'right_attack_damage_3', 'right_attack_damage_4',
       'right_attack_damage_5', 'right_receive_damage_1',
       'right_receive_damage_2', 'right_receive_damage_3',
       'right_receive_damage_4', 'right_receive_damage_5',
       'right_kill_count_1', 'right_kill_count_2', 'right_kill_count_3',
       'right_kill_count_4', 'right_kill_count_5']

In [121]:
# Scale
scaler = StandardScaler()
scaler.fit(X_train[numeric])

In [122]:
X_train[numeric] = scaler.transform(X_train[numeric])
X_test[numeric] = scaler.transform(X_test[numeric])
X_train.describe()

Unnamed: 0,left_pick_order_1,left_pick_order_2,left_pick_order_3,left_pick_order_4,left_pick_order_5,left_attack_damage_1,left_attack_damage_2,left_attack_damage_3,left_attack_damage_4,left_attack_damage_5,...,right_job_cd_3,right_job_cd_4,right_job_cd_5,right_postban,first_pick,is_win,left_preban_1,left_preban_2,right_preban_1,right_preban_2
count,7913.0,7913.0,7913.0,7913.0,7913.0,7913.0,7913.0,7913.0,7913.0,7913.0,...,7913.0,7913.0,7913.0,7913.0,7913.0,7913.0,7913.0,7913.0,7913.0,7913.0
mean,0.0,0.0,0.0,0.0,0.0,-5.567250000000001e-17,-3.771363e-17,6.914165000000001e-17,7.183548e-17,-1.5265040000000002e-17,...,1.980412,2.206748,2.298496,68.938582,0.495387,1.392519,17.923796,17.703399,28.98711,27.223051
std,0.0,0.0,0.0,0.0,0.0,1.000063,1.000063,1.000063,1.000063,1.000063,...,1.608007,1.760989,1.835326,40.354396,0.50001,0.488342,6.841602,6.920235,12.057437,11.530049
min,0.0,0.0,0.0,0.0,0.0,-0.6705173,-0.743798,-0.6499785,-0.7020787,-0.6060766,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,-0.6705173,-0.743798,-0.6499785,-0.7020787,-0.6060766,...,1.0,1.0,1.0,33.0,0.0,1.0,13.0,13.0,22.0,19.0
50%,0.0,0.0,0.0,0.0,0.0,-0.4740564,-0.3959401,-0.4223488,-0.5356759,-0.6060766,...,2.0,2.0,2.0,71.0,0.0,1.0,19.0,19.0,32.0,30.0
75%,0.0,0.0,0.0,0.0,0.0,0.337314,0.3750707,0.1846055,0.4670172,0.3501339,...,3.0,4.0,4.0,99.0,1.0,2.0,23.0,23.0,39.0,37.0
max,0.0,0.0,0.0,0.0,0.0,8.298826,7.713283,8.004855,6.073395,6.892198,...,5.0,5.0,5.0,146.0,1.0,2.0,27.0,27.0,44.0,44.0


# Training and Evaluating ML Models

## MultiOutputClassifier

Depending on what player gets pick a hero first, they pick heroes in accordance with a specific pattern.
If Player 1 gets to pick first: 
1. Player 1 picks 1 hero;
2. Player 2 picks 2 heroes;
3. Player 1 picks 2 heroes;
4. Player 2 picks 2 heroes;
5. Player 1 picks 2 heroes;
6. Player 2 picks 1 hero.

Thus, each player selects 5 heroes and postbans 1. <br>
If Player 1 gets to pick second - pick order remains, but Player 1 and Player 2 switch positions. <br>

Prior to limiting algorithm targets and features according to the pick order,
I am going to evaluate and select a model that have the best performance based on prediction of all 5 targets.


In [2]:
# Import a model for working with multiple outputs
from sklearn.multioutput import MultiOutputClassifier


### Logistic Regression

In [3]:
from sklearn.linear_model import LogisticRegression

In [125]:
# Using Logistic Regression for multioutput and evaluaing the result after Ordinal Encoding
classifier = MultiOutputClassifier(LogisticRegression(solver='saga'))
classifier.fit(X_train, y_train)
predictions= classifier.predict(X_test)
classifier.score(X_test, y_test)

#1st  iter result: 0.009688934217236104
##2nd  iter result: 0.0



0.013137948458817585

In [55]:
# Find a number of iterations that yeilds the highest score
solvers = ['lbfgs', 'newton-cg', 'sag', 'saga']
best_solver = None
best_model_lr = None
best_iter_lr = 0
best_result_lr = 0
for iter in tqdm(range(500)):
    for solver in range(len(solvers)):
        multi_lr = MultiOutputClassifier(LogisticRegression(random_state=890, max_iter = iter, solver = solvers[solver]))  # solvers with multiclass support: lbfgs, newton-cg, sag, saga
        multi_lr.fit(X_train, y_train)
        prediction_lr = multi_lr.predict(X_test)
        result_lr = multi_lr.score(X_test, y_test)
        if best_result_lr < result_lr:
            best_model_lr = multi_lr
            best_iter_lr = iter
            best_solver = solver
            best_result_lr = result_lr
    print('Mean accuracy of predicted target vs true target:', best_result_lr)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Mean accuracy of predicted target vs true target: 0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

KeyboardInterrupt: 

In [20]:
print('best iteration number =', best_iter_lr, '; ',
      'best solver =', best_solver, '; ',
      'best Accuracy = ', best_result_lr, '; ',
      'best model = ', best_model_lr, '; ',
      end ='')

# Increasing number of iterations from 100 to 500 and supplying different solvers did not result in a good model.
# Need to try using different encoding and discarding numeric features
# Discarding numeric features did not change the result

#best iteration number = 400 ;  best solver = 1 ;  best Accuracy =  0.09994900560938297 ;  best model =  MultiOutputClassifier(estimator=LogisticRegression(max_iter=400,
                                                #    random_state=890,
                                                #    solver='newton-cg'))

best iteration number = 400 ;  best solver = 1 ;  best Accuracy =  0.09994900560938297 ;  best model =  MultiOutputClassifier(estimator=LogisticRegression(max_iter=400,
                                                   random_state=890,
                                                   solver='newton-cg')) ; 

### Decision Tree Classifier

In [126]:
from sklearn.tree import DecisionTreeClassifier

In [127]:
# Using Decision Tree Classifier for multioutput and evaluaing the result after Ordinal Encoding
classifier = MultiOutputClassifier(DecisionTreeClassifier())
classifier.fit(X_train, y_train)
predictions= classifier.predict(X_test)
classifier.score(X_test, y_test)

# 1st iter result: 0.11830698623151453
# 2nd iter result: 0.0

0.15159171298635674

In [50]:
# Find a best model that yeilds the highest score
best_model = None
criterions = ['gini', 'entropy', 'log_loss']
best_result = 0
best_depth = 0
for depth in range(1,100,1):
    for criterion in range(len(criterions)):
        multi_tree= MultiOutputClassifier(DecisionTreeClassifier(max_depth=depth,random_state=890, criterion = criterions[criterion]))
        multi_tree.fit(X_train, y_train)
        prediction = multi_tree.predict(X_test)
        result_tree = multi_tree.score(X_test, y_test)
        if result_tree > best_result:
            best_model = multi_tree
            best_result = result_tree
            best_depth = depth

In [51]:
print('best depth =', best_depth, '; ',
      'best Accuracy = ', best_result_lr, '; ',
      'best model =', best_model, '; ',
      end ='')

# Same result as with the Logit
# Discarding numeric features did not change the result

# 1st iter result:best depth = 7 ;  best Accuracy =  0.09994900560938297 ;  best model = MultiOutputClassifier(estimator=DecisionTreeClassifier(criterion='entropy',
                                                      #  max_depth=7,
                                                      #  random_state=890)) ; 
                                                      
# 2nd iter best depth = 7 ;  best Accuracy =  0.0005099439061703213 ;  best model = MultiOutputClassifier(estimator=DecisionTreeClassifier(criterion='entropy',
#                                                        max_depth=7,
#                                                        random_state=890)) 

best depth = 7 ;  best Accuracy =  0.0005099439061703213 ;  best model = MultiOutputClassifier(estimator=DecisionTreeClassifier(criterion='entropy',
                                                       max_depth=7,
                                                       random_state=890)) ; 

### Random Forest Classifier

In [128]:
from sklearn.ensemble import RandomForestClassifier

In [129]:
# Using RandomForestClassifier for multioutput and evaluaing the result after Ordinal Encoding
classifier = MultiOutputClassifier(RandomForestClassifier(random_state=890, max_depth = 1, n_estimators=1))
classifier.fit(X_train, y_train)
predictions= classifier.predict(X_test)
classifier.score(X_test, y_test)

# 1st iter result: 0.0
# 2nd iter result: 0.0

0.0

In [8]:
# Find a best model that yeilds the highest score
best_model_rfc = None
best_est_rfc = 0
best_depth_rfc = 0
best_result_rfc = 0
for est in tqdm(range(1, 51)):
    for depth in range(1, 51):
            multi_rfc = MultiOutputClassifier(RandomForestClassifier(random_state=890, max_depth = depth, n_estimators=est))
            multi_rfc.fit(X_train, y_train)
            prediction_rfc = multi_rfc.predict(X_test)
            result_rfc = multi_rfc.score(X_test, y_test)
            if best_result_rfc < result_rfc:
                best_model_rfc = multi_rfc
                best_depth_rfc = depth
                best_est_rfc = est
                best_result_rfc = result_rfc
    print('Mean accuracy of predicted target vs true target:', best_result_rfc)

  0%|          | 0/50 [00:00<?, ?it/s]


NameError: name 'X_train' is not defined

In [67]:
print('best n_estimators =',
      best_est_rfc,
      '; ',
      'best max_depth =',
      best_depth_rfc,
      '; ',
      'best Accuracy = ',
      best_result_rfc,
      end =''
     )

# Need to try using different encoding and discarding numeric features
# Discarding numeric features did not change the result

# 1st iter result: best n_estimators = 44 ;  best max_depth = 32 ;  best Accuracy =  0.1432942376338603
# 2nd iter result: best n_estimators = 14 ;  best max_depth = 24 ;  best Accuracy =  0.0015298317185109638

best n_estimators = 1 ;  best max_depth = 18 ;  best Accuracy =  0.006568974229408792

In [68]:
# Testing if excluding '*_pick_order_*' features improves the model

from sklearn.ensemble import RandomForestClassifier
df_reshaped = pd.read_pickle('./data_final.pickle')
# defining features and targets
# selected features for the 1st iter with Ordinal Encoding
features = df_reshaped.drop(['left_hero_code_1', 'left_hero_code_2', 'left_hero_code_3', 'left_hero_code_4',
       'left_hero_code_5', 'left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3', 'left_pick_order_4', 'left_pick_order_5',
       'right_pick_order_1', 'right_pick_order_2', 'right_pick_order_3', 'right_pick_order_4', 'right_pick_order_5'], axis = 1)

target = df_reshaped[['left_hero_code_1', 'left_hero_code_2', 'left_hero_code_3', 'left_hero_code_4',
       'left_hero_code_5']]

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size= 0.2, shuffle = True, random_state= 890)

# Encoding the categorical features
# Initializing an ordinal encoder for categorical variables
enc = OrdinalEncoder(handle_unknown = 'use_encoded_value', unknown_value = -1)
# categorical labels for the 1st iter
categorical = ['left_attribute_cd_1', 'left_attribute_cd_2',
       'left_attribute_cd_3', 'left_attribute_cd_4', 'left_attribute_cd_5',
       'left_job_cd_1', 'left_job_cd_2', 'left_job_cd_3', 'left_job_cd_4',
       'left_job_cd_5', 'left_postban', 'right_hero_code_1', 'right_hero_code_2',
       'right_hero_code_3', 'right_hero_code_4', 'right_hero_code_5', 'right_attribute_cd_1',
       'right_attribute_cd_2', 'right_attribute_cd_3', 'right_attribute_cd_4',
       'right_attribute_cd_5', 'right_job_cd_1', 'right_job_cd_2',
       'right_job_cd_3', 'right_job_cd_4', 'right_job_cd_5', 'right_postban', 'left_preban_1', 'left_preban_2',
       'right_preban_1', 'right_preban_2']

X_train[categorical] = enc.fit_transform(X_train[categorical].to_numpy())
X_test[categorical] = enc.transform(X_test[categorical].to_numpy())

# Scaling 
# Define numeric labels
numeric = ['left_attack_damage_1',
       'left_attack_damage_2', 'left_attack_damage_3', 'left_attack_damage_4',
       'left_attack_damage_5', 'left_receive_damage_1',
       'left_receive_damage_2', 'left_receive_damage_3',
       'left_receive_damage_4', 'left_receive_damage_5', 'left_kill_count_1',
       'left_kill_count_2', 'left_kill_count_3', 'left_kill_count_4',
       'left_kill_count_5', 'right_attack_damage_1', 'right_attack_damage_2',
       'right_attack_damage_3', 'right_attack_damage_4',
       'right_attack_damage_5', 'right_receive_damage_1',
       'right_receive_damage_2', 'right_receive_damage_3',
       'right_receive_damage_4', 'right_receive_damage_5',
       'right_kill_count_1', 'right_kill_count_2', 'right_kill_count_3',
       'right_kill_count_4', 'right_kill_count_5']
scaler = StandardScaler()
scaler.fit(X_train[numeric])
X_train[numeric] = scaler.transform(X_train[numeric])
X_test[numeric] = scaler.transform(X_test[numeric])

# Find a best model that yeilds the highest score
best_model_rfc = None
best_est_rfc = 0
best_depth_rfc = 0
best_result_rfc = 0
for est in tqdm(range(1, 51)):
    for depth in range(1, 51):
            multi_rfc = MultiOutputClassifier(RandomForestClassifier(random_state=890, max_depth = depth, n_estimators=est))
            multi_rfc.fit(X_train, y_train)
            prediction_rfc = multi_rfc.predict(X_test)
            result_rfc = multi_rfc.score(X_test, y_test)
            if best_result_rfc < result_rfc:
                best_model_rfc = multi_rfc
                best_depth_rfc = depth
                best_est_rfc = est
                best_result_rfc = result_rfc
    print('Mean accuracy of predicted target vs true target:', best_result_rfc)

  0%|          | 0/50 [00:04<?, ?it/s]


KeyboardInterrupt: 

In [194]:
# The model's accuracy decreased after excluding '*_pick_order_*' features
# Random Forest seems to provide the highest accuracy, hence, this model is selected 
# for predictions within the players pick order.

## Training Models for Each Phase of Character Selection 

### First Pick - Phase 1

In [None]:
# Most frequently used hero with the highest number of wins will be provided as the suggestion for the 1st pick 

### First Pick - Phase 2

In [2]:
df_reshaped = pd.read_pickle('./data_final.pickle')
# defining features and targets

# select features
fp_2p_features = df_reshaped[['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3',
                              'right_pick_order_1', 'right_pick_order_2', 
                              'left_attack_damage_1', 'left_attack_damage_2', 'left_attack_damage_3',
                              'left_receive_damage_1', 'left_receive_damage_2', 'left_receive_damage_3',
                              'left_kill_count_1', 'left_kill_count_2', 'left_kill_count_3',
                              'right_attack_damage_1', 'right_attack_damage_2',
                              'right_receive_damage_1', 'right_receive_damage_2',
                              'right_kill_count_1', 'right_kill_count_2', 
                              'left_attribute_cd_1', 
#                              'left_attribute_cd_2', 'left_attribute_cd_3', 'left_job_cd_2', 'left_job_cd_3',
                              'left_hero_code_1', 
                              'left_job_cd_1', 
                              'right_hero_code_1', 'right_hero_code_2',
                              'right_attribute_cd_1', 'right_attribute_cd_2' ,
                              'right_job_cd_1', 'right_job_cd_2',
#                              'left_postban', 'right_postban', 
                              'left_preban_1', 'left_preban_2', 
                              'right_preban_1', 'right_preban_2', 'first_pick', 'is_win']]

fp_2p_target = df_reshaped[['left_hero_code_2', 'left_hero_code_3']]

X_train_fp_2p, X_test_fp_2p, y_train_fp_2p, y_test_fp_2p = train_test_split(fp_2p_features, fp_2p_target, test_size= 0.2, shuffle = True, random_state= 890)

# Scaling 
# Define numeric features
fp_2p_numeric = ['left_attack_damage_1', 'left_attack_damage_2', 'left_attack_damage_3', 
                 'left_receive_damage_1', 'left_receive_damage_2', 'left_receive_damage_3',
                 'left_kill_count_1', 'left_kill_count_2', 'left_kill_count_3',
                 'right_attack_damage_1', 'right_attack_damage_2', 
                 'right_receive_damage_1', 'right_receive_damage_2',
                 'right_kill_count_1', 'right_kill_count_2']



In [3]:
# Scale numeric features
fp_2p_scaler = StandardScaler()
fp_2p_scaler.fit(X_train_fp_2p[fp_2p_numeric])

In [4]:
X_train_fp_2p_trans = pd.DataFrame()
X_test_fp_2p_trans = pd.DataFrame()

X_train_fp_2p_trans[fp_2p_numeric] = fp_2p_scaler.transform(X_train_fp_2p[fp_2p_numeric])
X_test_fp_2p_trans[fp_2p_numeric] = fp_2p_scaler.transform(X_test_fp_2p[fp_2p_numeric])

In [5]:
# Encoding the categorical features
# Initializing an ordinal encoder for categorical variables
fp_2p_enc = OrdinalEncoder(handle_unknown = 'use_encoded_value', unknown_value = -1)
# categorical labels for the 1st iter
fp_2p_categorical = ['left_attribute_cd_1', 
#                     'left_attribute_cd_2', 'left_attribute_cd_3', 'left_job_cd_2', 'left_job_cd_3',
                     'left_hero_code_1',
                     'left_job_cd_1',
                     'right_hero_code_1', 'right_hero_code_2',
                     'right_attribute_cd_1', 'right_attribute_cd_2' ,
                     'right_job_cd_1', 'right_job_cd_2', 
#                     'left_postban', 'right_postban', 
                     'left_preban_1', 'left_preban_2', 
                     'right_preban_1', 'right_preban_2']

X_train_fp_2p_trans[fp_2p_categorical] = fp_2p_enc.fit_transform(X_train_fp_2p[fp_2p_categorical].to_numpy())
X_test_fp_2p_trans[fp_2p_categorical] = fp_2p_enc.transform(X_test_fp_2p[fp_2p_categorical].to_numpy())

In [6]:
X_train_fp_2p_trans.index
X_test_fp_2p_trans.index

RangeIndex(start=0, stop=1979, step=1)

In [7]:
X_train_fp_2p = X_train_fp_2p.reset_index(drop=True)
X_test_fp_2p = X_test_fp_2p.reset_index(drop=True)

In [8]:
X_train_fp_2p.index
X_test_fp_2p.index

RangeIndex(start=0, stop=1979, step=1)

In [9]:
X_test_fp_2p.isna().sum()
X_test_fp_2p_trans.isna().sum()

left_attack_damage_1      0
left_attack_damage_2      0
left_attack_damage_3      0
left_receive_damage_1     0
left_receive_damage_2     0
left_receive_damage_3     0
left_kill_count_1         0
left_kill_count_2         0
left_kill_count_3         0
right_attack_damage_1     0
right_attack_damage_2     0
right_receive_damage_1    0
right_receive_damage_2    0
right_kill_count_1        0
right_kill_count_2        0
left_attribute_cd_1       0
left_hero_code_1          0
left_job_cd_1             0
right_hero_code_1         0
right_hero_code_2         0
right_attribute_cd_1      0
right_attribute_cd_2      0
right_job_cd_1            0
right_job_cd_2            0
left_preban_1             0
left_preban_2             0
right_preban_1            0
right_preban_2            0
dtype: int64

In [10]:
# add features left out of scaling
l0f_fp_2p = ['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3',
                    'right_pick_order_1', 'right_pick_order_2', 
                   'first_pick', 'is_win']

X_train_fp_2p_trans[l0f_fp_2p] = X_train_fp_2p[l0f_fp_2p]
X_test_fp_2p_trans[l0f_fp_2p] = X_test_fp_2p[l0f_fp_2p]

In [57]:
X_test_fp_2p_trans[l0f_fp_2p]

Unnamed: 0,left_pick_order_1,left_pick_order_2,left_pick_order_3,right_pick_order_1,right_pick_order_2,first_pick,is_win
0,1,2,3,1,2,0,2
1,1,2,3,1,2,0,2
2,1,2,3,1,2,1,2
3,1,2,3,1,2,0,1
4,1,2,3,1,2,0,2
...,...,...,...,...,...,...,...
1974,1,2,3,1,2,0,2
1975,1,2,3,1,2,0,1
1976,1,2,3,1,2,0,2
1977,1,2,3,1,2,1,1


In [14]:
X_test_fp_2p_trans.isna().sum()

left_attack_damage_1      0
left_attack_damage_2      0
left_attack_damage_3      0
left_receive_damage_1     0
left_receive_damage_2     0
left_receive_damage_3     0
left_kill_count_1         0
left_kill_count_2         0
left_kill_count_3         0
right_attack_damage_1     0
right_attack_damage_2     0
right_receive_damage_1    0
right_receive_damage_2    0
right_kill_count_1        0
right_kill_count_2        0
left_attribute_cd_1       0
left_hero_code_1          0
left_job_cd_1             0
right_hero_code_1         0
right_hero_code_2         0
right_attribute_cd_1      0
right_attribute_cd_2      0
right_job_cd_1            0
right_job_cd_2            0
left_preban_1             0
left_preban_2             0
right_preban_1            0
right_preban_2            0
left_pick_order_1         0
left_pick_order_2         0
left_pick_order_3         0
right_pick_order_1        0
right_pick_order_2        0
first_pick                0
is_win                    0
dtype: int64

In [15]:
# Create a copy of training data after encoding
new_X_train_fp_2p = X_train_fp_2p.copy()

In [16]:
X_train_fp_2p.columns

Index(['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3',
       'right_pick_order_1', 'right_pick_order_2', 'left_attack_damage_1',
       'left_attack_damage_2', 'left_attack_damage_3', 'left_receive_damage_1',
       'left_receive_damage_2', 'left_receive_damage_3', 'left_kill_count_1',
       'left_kill_count_2', 'left_kill_count_3', 'right_attack_damage_1',
       'right_attack_damage_2', 'right_receive_damage_1',
       'right_receive_damage_2', 'right_kill_count_1', 'right_kill_count_2',
       'left_attribute_cd_1', 'left_hero_code_1', 'left_job_cd_1',
       'right_hero_code_1', 'right_hero_code_2', 'right_attribute_cd_1',
       'right_attribute_cd_2', 'right_job_cd_1', 'right_job_cd_2',
       'left_preban_1', 'left_preban_2', 'right_preban_1', 'right_preban_2',
       'first_pick', 'is_win'],
      dtype='object')

In [17]:
# Change columns names in the copied data to concatinate with the original data
new_X_train_fp_2p.columns = ['left_pick_order_1_orig', 'left_pick_order_2_orig', 'left_pick_order_3_orig', 
                             'right_pick_order_1_orig', 'right_pick_order_2_orig', 
                             'left_attack_damage_1_orig', 'left_attack_damage_2_orig', 'left_attack_damage_3_orig', 
                             'left_receive_damage_1_orig', 'left_receive_damage_2_orig', 'left_receive_damage_3_orig',
                             'left_kill_count_1_orig', 'left_kill_count_2_orig', 'left_kill_count_3_orig',
                             'right_attack_damage_1_orig', 'right_attack_damage_2_orig', 
                             'right_receive_damage_1_orig', 'right_receive_damage_2_orig',
                             'right_kill_count_1_orig', 'right_kill_count_2_orig',
                             'left_attribute_cd_1_orig',
#                             'left_attribute_cd_2_orig', 'left_attribute_cd_3_orig',  'left_job_cd_2_orig', 'left_job_cd_3_orig',
                             'left_hero_code_1_orig',
                             'left_job_cd_1_orig', 
                             'right_hero_code_1_orig', 'right_hero_code_2_orig', 
                             'right_attribute_cd_1_orig', 'right_attribute_cd_2_orig', 
                             'right_job_cd_1_orig', 'right_job_cd_2_orig', 
#                             'left_postban_orig', 'right_postban_orig', 
                             'left_preban_1_orig', 'left_preban_2_orig', 
                             'right_preban_1_orig', 'right_preban_2_orig', 
                             'first_pick_orig', 'is_win_orig']

In [18]:
len(new_X_train_fp_2p.columns)

35

In [19]:
# Cocatenate original data with encoded data for encoding names later in the function
dict_fp_2p = pd.concat((X_train_fp_2p_trans,new_X_train_fp_2p.reset_index(drop=True)), axis= 1).copy()
len(dict_fp_2p.columns)

70

In [20]:
dict_fp_2p[['left_hero_code_1', 'left_hero_code_1_orig']]

Unnamed: 0,left_hero_code_1,left_hero_code_1_orig
0,33.0,Sea Phantom Politis
1,25.0,New Moon Luna
2,19.0,Laia
3,13.0,Death Dealer Ray
4,19.0,Laia
...,...,...
7908,18.0,Jenua
7909,39.0,Zio
7910,15.0,Dragon Bride Senya
7911,13.0,Death Dealer Ray


<a id='fp2p_link'></a>
### Tunning Hyperparameters for the First Pick - Phase 2

In [56]:
# RandomizedSearchCV parameters
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 1000, num = 10)]
# Number of features to consider at every split
max_features = ['sqrt', 'log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 50, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {'estimator__n_estimators': n_estimators,
               'estimator__max_features': max_features,
               'estimator__max_depth': max_depth,
               'estimator__min_samples_split': min_samples_split,
               'estimator__min_samples_leaf': min_samples_leaf,
               'estimator__bootstrap': bootstrap}
print(random_grid)
{'bootstrap': [True, False],
 'max_depth': [10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, None],
 'max_features': ['sqrt', 'log2'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 288, 377, 466, 555, 644, 733, 822, 911, 1000]}

{'estimator__n_estimators': [200, 288, 377, 466, 555, 644, 733, 822, 911, 1000], 'estimator__max_features': ['sqrt', 'log2'], 'estimator__max_depth': [10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, None], 'estimator__min_samples_split': [2, 5, 10], 'estimator__min_samples_leaf': [1, 2, 4], 'estimator__bootstrap': [True, False]}


{'bootstrap': [True, False],
 'max_depth': [10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, None],
 'max_features': ['sqrt', 'log2'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 288, 377, 466, 555, 644, 733, 822, 911, 1000]}

In [29]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = MultiOutputClassifier(RandomForestClassifier())
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=890)
# Fit the random search model
rf_random.fit(X_train_fp_2p_trans, y_train_fp_2p)

Fitting 3 folds for each of 100 candidates, totalling 300 fits
[CV] END estimator__bootstrap=True, estimator__max_depth=38, estimator__max_features=log2, estimator__min_samples_leaf=1, estimator__min_samples_split=5, estimator__n_estimators=377; total time=  10.9s
[CV] END estimator__bootstrap=True, estimator__max_depth=38, estimator__max_features=log2, estimator__min_samples_leaf=1, estimator__min_samples_split=5, estimator__n_estimators=377; total time=   7.9s
[CV] END estimator__bootstrap=True, estimator__max_depth=38, estimator__max_features=log2, estimator__min_samples_leaf=1, estimator__min_samples_split=5, estimator__n_estimators=377; total time=   7.8s
[CV] END estimator__bootstrap=True, estimator__max_depth=46, estimator__max_features=sqrt, estimator__min_samples_leaf=4, estimator__min_samples_split=5, estimator__n_estimators=466; total time=   8.0s
[CV] END estimator__bootstrap=True, estimator__max_depth=46, estimator__max_features=sqrt, estimator__min_samples_leaf=4, estimat

In [30]:
# View the best parameters from fitting the random search
rf_random.best_params_

{'estimator__n_estimators': 288,
 'estimator__min_samples_split': 2,
 'estimator__min_samples_leaf': 1,
 'estimator__max_features': 'log2',
 'estimator__max_depth': 50,
 'estimator__bootstrap': False}

In [31]:
# Using RandomForestClassifier for multioutput and evaluaing the result after Ordinal Encoding
classifier_fp_2p = MultiOutputClassifier(RandomForestClassifier(random_state=890, max_depth = 50, n_estimators=288, min_samples_split = 2, min_samples_leaf = 1, max_features = 'log2', bootstrap = False))
classifier_fp_2p.fit(X_train_fp_2p_trans, y_train_fp_2p)
predictions_fp_2p = classifier_fp_2p.predict(X_test_fp_2p_trans)
classifier_fp_2p.score(X_test_fp_2p_trans, y_test_fp_2p)

0.21172309247094492

In [33]:
X_train_fp_2p_trans.isna().sum()

left_attack_damage_1      0
left_attack_damage_2      0
left_attack_damage_3      0
left_receive_damage_1     0
left_receive_damage_2     0
left_receive_damage_3     0
left_kill_count_1         0
left_kill_count_2         0
left_kill_count_3         0
right_attack_damage_1     0
right_attack_damage_2     0
right_receive_damage_1    0
right_receive_damage_2    0
right_kill_count_1        0
right_kill_count_2        0
left_attribute_cd_1       0
left_hero_code_1          0
left_job_cd_1             0
right_hero_code_1         0
right_hero_code_2         0
right_attribute_cd_1      0
right_attribute_cd_2      0
right_job_cd_1            0
right_job_cd_2            0
left_preban_1             0
left_preban_2             0
right_preban_1            0
right_preban_2            0
left_pick_order_1         0
left_pick_order_2         0
left_pick_order_3         0
right_pick_order_1        0
right_pick_order_2        0
first_pick                0
is_win                    0
dtype: int64

In [32]:
#saving the model
joblib.dump(classifier_fp_2p, 'classifier_fp_2p.joblib')

['classifier_fp_2p.joblib']

In [18]:
#loading the model
joblib.load('classifier_fp_2p.joblib')


: 

In [None]:
classifier_fp_2p.score(X_test_fp_2p_trans, y_test_fp_2p)

### First Pick - Phase 3

In [3]:
df_reshaped = pd.read_pickle('./data_final.pickle')
# defining features and targets

# select features
fp_3p_features = df_reshaped[['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3', 'left_pick_order_4', 'left_pick_order_5',
                        'right_pick_order_1', 'right_pick_order_2', 'right_pick_order_3', 'right_pick_order_4',
                        'left_attack_damage_1', 'left_attack_damage_2','left_attack_damage_3',
                        'left_receive_damage_1', 'left_receive_damage_2', 'left_receive_damage_3',
                        'left_kill_count_1', 'left_kill_count_2', 'left_kill_count_3',
                        'right_attack_damage_1', 'right_attack_damage_2', 'right_attack_damage_3', 'right_attack_damage_4',
                        'right_receive_damage_1', 'right_receive_damage_2', 'right_receive_damage_3', 'right_receive_damage_4',
                        'right_kill_count_1', 'right_kill_count_2', 'right_kill_count_3', 'right_kill_count_4',
                        'left_hero_code_1', 'left_hero_code_2', 'left_hero_code_3',
                        'left_attribute_cd_1', 'left_attribute_cd_2', 'left_attribute_cd_3',
                        'left_job_cd_1', 'left_job_cd_2', 'left_job_cd_3', 
                        'right_hero_code_1', 'right_hero_code_2', 'right_hero_code_3', 'right_hero_code_4',
                        'right_attribute_cd_1', 'right_attribute_cd_2', 'right_attribute_cd_3', 'right_attribute_cd_4',
                        'right_job_cd_1', 'right_job_cd_2', 'right_job_cd_3', 'right_job_cd_4',
#                        'left_postban', 'right_postban',
                        'left_preban_1', 'left_preban_2', 'right_preban_1', 'right_preban_2', 'first_pick', 'is_win']]

fp_3p_target = df_reshaped[['left_hero_code_4', 'left_hero_code_5']]

X_train_fp_3p, X_test_fp_3p, y_train_fp_3p, y_test_fp_3p = train_test_split(fp_3p_features, fp_3p_target, test_size= 0.2, shuffle = True, random_state= 890)

# Scaling 
# Define numeric features
fp_3p_numeric = ['left_attack_damage_1', 'left_attack_damage_2', 'left_attack_damage_3', 
                 'left_receive_damage_1', 'left_receive_damage_2', 'left_receive_damage_3', 
                 'left_kill_count_1', 'left_kill_count_2', 'left_kill_count_3', 
                 'right_attack_damage_1', 'right_attack_damage_2', 'right_attack_damage_3', 'right_attack_damage_4',
                 'right_receive_damage_1', 'right_receive_damage_2', 'right_receive_damage_3', 'right_receive_damage_4',
                 'right_kill_count_1', 'right_kill_count_2', 'right_kill_count_3', 'right_kill_count_4']



In [4]:
# Scale numeric features
fp_3p_scaler = StandardScaler()
fp_3p_scaler.fit(X_train_fp_3p[fp_3p_numeric])

In [5]:
X_train_fp_3p_trans = pd.DataFrame()
X_test_fp_3p_trans = pd.DataFrame()
X_train_fp_3p_trans[fp_3p_numeric] = fp_3p_scaler.transform(X_train_fp_3p[fp_3p_numeric])
X_test_fp_3p_trans[fp_3p_numeric] = fp_3p_scaler.transform(X_test_fp_3p[fp_3p_numeric])

In [6]:
# Encoding the categorical features
# Initializing an ordinal encoder for categorical variables
fp_3p_enc = OrdinalEncoder(handle_unknown = 'use_encoded_value', unknown_value = -1)
# categorical labels for the 1st iter
fp_3p_categorical = ['left_hero_code_1', 'left_hero_code_2', 'left_hero_code_3',
                     'left_attribute_cd_1', 'left_attribute_cd_2', 'left_attribute_cd_3', 
#                     'left_attribute_cd_4', 'left_attribute_cd_5', 'left_job_cd_4', 'left_job_cd_5',
                     'left_job_cd_1', 'left_job_cd_2', 'left_job_cd_3',
                     'right_hero_code_1', 'right_hero_code_2', 'right_hero_code_3', 'right_hero_code_4',
                     'right_attribute_cd_1', 'right_attribute_cd_2', 'right_attribute_cd_3', 'right_attribute_cd_4',
                     'right_job_cd_1', 'right_job_cd_2', 'right_job_cd_3', 'right_job_cd_4',
#                     'left_postban', 'right_postban',
                     'left_preban_1', 'left_preban_2',
                     'right_preban_1', 'right_preban_2']

X_train_fp_3p_trans[fp_3p_categorical] = fp_3p_enc.fit_transform(X_train_fp_3p[fp_3p_categorical].to_numpy())
X_test_fp_3p_trans[fp_3p_categorical] = fp_3p_enc.transform(X_test_fp_3p[fp_3p_categorical].to_numpy())

In [12]:
X_train_fp_3p_trans.index
X_test_fp_3p_trans.index

RangeIndex(start=0, stop=1979, step=1)

In [11]:
X_train_fp_3p = X_train_fp_3p.reset_index(drop=True)
X_test_fp_3p = X_test_fp_3p.reset_index(drop=True)

In [10]:
X_train_fp_3p.index
X_test_fp_3p.index

RangeIndex(start=0, stop=1979, step=1)

In [14]:
X_test_fp_3p.isna().sum()
X_test_fp_3p_trans.isna().sum()

left_pick_order_1         0
left_pick_order_2         0
left_pick_order_3         0
left_pick_order_4         0
left_pick_order_5         0
right_pick_order_1        0
right_pick_order_2        0
right_pick_order_3        0
right_pick_order_4        0
left_attack_damage_1      0
left_attack_damage_2      0
left_attack_damage_3      0
left_receive_damage_1     0
left_receive_damage_2     0
left_receive_damage_3     0
left_kill_count_1         0
left_kill_count_2         0
left_kill_count_3         0
right_attack_damage_1     0
right_attack_damage_2     0
right_attack_damage_3     0
right_attack_damage_4     0
right_receive_damage_1    0
right_receive_damage_2    0
right_receive_damage_3    0
right_receive_damage_4    0
right_kill_count_1        0
right_kill_count_2        0
right_kill_count_3        0
right_kill_count_4        0
left_hero_code_1          0
left_hero_code_2          0
left_hero_code_3          0
left_attribute_cd_1       0
left_attribute_cd_2       0
left_attribute_cd_3 

In [15]:
# add features left out of scaling
l0f_fp_3p = ['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3', 'left_pick_order_4', 'left_pick_order_5',
             'right_pick_order_1', 'right_pick_order_2', 'right_pick_order_3', 'right_pick_order_4', 
             'first_pick', 'is_win']

X_train_fp_3p_trans[l0f_fp_3p] = X_train_fp_3p[l0f_fp_3p]
X_test_fp_3p_trans[l0f_fp_3p] = X_test_fp_3p[l0f_fp_3p]

In [16]:
X_test_fp_3p_trans[l0f_fp_3p]

Unnamed: 0,left_pick_order_1,left_pick_order_2,left_pick_order_3,left_pick_order_4,left_pick_order_5,right_pick_order_1,right_pick_order_2,right_pick_order_3,right_pick_order_4,first_pick,is_win
0,1,2,3,4,5,1,2,3,4,0,2
1,1,2,3,4,5,1,2,3,4,0,2
2,1,2,3,4,5,1,2,3,4,1,2
3,1,2,3,4,5,1,2,3,4,0,1
4,1,2,3,4,5,1,2,3,4,0,2
...,...,...,...,...,...,...,...,...,...,...,...
1974,1,2,3,4,5,1,2,3,4,0,2
1975,1,2,3,4,5,1,2,3,4,0,1
1976,1,2,3,4,5,1,2,3,4,0,2
1977,1,2,3,4,5,1,2,3,4,1,1


In [17]:
X_test_fp_3p_trans.isna().sum()

left_attack_damage_1      0
left_attack_damage_2      0
left_attack_damage_3      0
left_receive_damage_1     0
left_receive_damage_2     0
left_receive_damage_3     0
left_kill_count_1         0
left_kill_count_2         0
left_kill_count_3         0
right_attack_damage_1     0
right_attack_damage_2     0
right_attack_damage_3     0
right_attack_damage_4     0
right_receive_damage_1    0
right_receive_damage_2    0
right_receive_damage_3    0
right_receive_damage_4    0
right_kill_count_1        0
right_kill_count_2        0
right_kill_count_3        0
right_kill_count_4        0
left_hero_code_1          0
left_hero_code_2          0
left_hero_code_3          0
left_attribute_cd_1       0
left_attribute_cd_2       0
left_attribute_cd_3       0
left_job_cd_1             0
left_job_cd_2             0
left_job_cd_3             0
right_hero_code_1         0
right_hero_code_2         0
right_hero_code_3         0
right_hero_code_4         0
right_attribute_cd_1      0
right_attribute_cd_2

In [18]:
# Create a copy of training data after encoding
new_X_train_fp_3p = X_train_fp_3p.copy()

In [19]:
X_train_fp_3p.columns

Index(['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3',
       'left_pick_order_4', 'left_pick_order_5', 'right_pick_order_1',
       'right_pick_order_2', 'right_pick_order_3', 'right_pick_order_4',
       'left_attack_damage_1', 'left_attack_damage_2', 'left_attack_damage_3',
       'left_receive_damage_1', 'left_receive_damage_2',
       'left_receive_damage_3', 'left_kill_count_1', 'left_kill_count_2',
       'left_kill_count_3', 'right_attack_damage_1', 'right_attack_damage_2',
       'right_attack_damage_3', 'right_attack_damage_4',
       'right_receive_damage_1', 'right_receive_damage_2',
       'right_receive_damage_3', 'right_receive_damage_4',
       'right_kill_count_1', 'right_kill_count_2', 'right_kill_count_3',
       'right_kill_count_4', 'left_hero_code_1', 'left_hero_code_2',
       'left_hero_code_3', 'left_attribute_cd_1', 'left_attribute_cd_2',
       'left_attribute_cd_3', 'left_job_cd_1', 'left_job_cd_2',
       'left_job_cd_3', 'right_hero_code_1',

In [20]:
# Change columns names in the copied data to concatinate with the original data
new_X_train_fp_3p.columns = ['left_pick_order_1_orig', 'left_pick_order_2_orig', 'left_pick_order_3_orig', 'left_pick_order_4_orig', 'left_pick_order_5_orig',
                             'right_pick_order_1_orig', 'right_pick_order_2_orig', 'right_pick_order_3_orig', 'right_pick_order_4_orig', 
                             'left_attack_damage_1_orig', 'left_attack_damage_2_orig', 'left_attack_damage_3_orig', 
                             'left_receive_damage_1_orig', 'left_receive_damage_2_orig', 'left_receive_damage_3_orig',
                             'left_kill_count_1_orig', 'left_kill_count_2_orig', 'left_kill_count_3_orig',
                             'right_attack_damage_1_orig', 'right_attack_damage_2_orig', 'right_attack_damage_3_orig', 'right_attack_damage_4_orig',
                             'right_receive_damage_1_orig', 'right_receive_damage_2_orig', 'right_receive_damage_3_orig', 'right_receive_damage_4_orig',
                             'right_kill_count_1_orig', 'right_kill_count_2_orig', 'right_kill_count_3_orig', 'right_kill_count_4_orig', 
                             'left_hero_code_1_orig', 'left_hero_code_2_orig', 'left_hero_code_3_orig',
                             'left_attribute_cd_1_orig', 'left_attribute_cd_2_orig', 'left_attribute_cd_3_orig',
                             'left_job_cd_1_orig', 'left_job_cd_2_orig', 'left_job_cd_3_orig',
                             'right_hero_code_1_orig', 'right_hero_code_2_orig', 'right_hero_code_3_orig', 'right_hero_code_4_orig', 
                             'right_attribute_cd_1_orig', 'right_attribute_cd_2_orig', 'right_attribute_cd_3_orig', 'right_attribute_cd_4_orig',
                             'right_job_cd_1_orig', 'right_job_cd_2_orig', 'right_job_cd_3_orig', 'right_job_cd_4_orig', 
                             'left_preban_1_orig', 'left_preban_2_orig', 
                             'right_preban_1_orig', 'right_preban_2_orig',
                             'first_pick_orig', 'is_win_orig']

In [21]:
len(new_X_train_fp_3p.columns)

57

In [22]:
# Cocatenate original data with encoded data for later use in the function
dict_fp_3p = pd.concat((X_train_fp_3p_trans,new_X_train_fp_3p.reset_index(drop=True)), axis= 1).copy()
len(dict_fp_3p.columns)

114

In [23]:
dict_fp_3p[['left_hero_code_1', 'left_hero_code_1_orig']]

Unnamed: 0,left_hero_code_1,left_hero_code_1_orig
0,33.0,Sea Phantom Politis
1,25.0,New Moon Luna
2,19.0,Laia
3,13.0,Death Dealer Ray
4,19.0,Laia
...,...,...
7908,18.0,Jenua
7909,39.0,Zio
7910,15.0,Dragon Bride Senya
7911,13.0,Death Dealer Ray


<a id='fp3p_link'></a>
### Tunning Hyperparameters for the First Pick - Phase 3

In [57]:
# RandomizedSearchCV parameters
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 500, num = 10)]
# Number of features to consider at every split
max_features = ['sqrt', 'log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 50, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {'estimator__n_estimators': n_estimators,
               'estimator__max_features': max_features,
               'estimator__max_depth': max_depth,
               'estimator__min_samples_split': min_samples_split,
               'estimator__min_samples_leaf': min_samples_leaf,
               'estimator__bootstrap': bootstrap}
print(random_grid)
{'bootstrap': [True, False],
 'max_depth': [10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, None],
 'max_features': ['sqrt', 'log2'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 233, 266, 300, 333, 366, 400, 433, 466, 500]}

{'estimator__n_estimators': [200, 233, 266, 300, 333, 366, 400, 433, 466, 500], 'estimator__max_features': ['sqrt', 'log2'], 'estimator__max_depth': [10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, None], 'estimator__min_samples_split': [2, 5, 10], 'estimator__min_samples_leaf': [1, 2, 4], 'estimator__bootstrap': [True, False]}


{'bootstrap': [True, False],
 'max_depth': [10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, None],
 'max_features': ['sqrt', 'log2'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 233, 266, 300, 333, 366, 400, 433, 466, 500]}

In [25]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = MultiOutputClassifier(RandomForestClassifier())
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=890)
# Fit the random search model
rf_random.fit(X_train_fp_3p_trans, y_train_fp_3p)

Fitting 3 folds for each of 100 candidates, totalling 300 fits
[CV] END estimator__bootstrap=True, estimator__max_depth=38, estimator__max_features=log2, estimator__min_samples_leaf=1, estimator__min_samples_split=5, estimator__n_estimators=266; total time=   8.7s
[CV] END estimator__bootstrap=True, estimator__max_depth=38, estimator__max_features=log2, estimator__min_samples_leaf=1, estimator__min_samples_split=5, estimator__n_estimators=266; total time=   8.7s
[CV] END estimator__bootstrap=True, estimator__max_depth=38, estimator__max_features=log2, estimator__min_samples_leaf=1, estimator__min_samples_split=5, estimator__n_estimators=266; total time=   8.9s
[CV] END estimator__bootstrap=True, estimator__max_depth=46, estimator__max_features=sqrt, estimator__min_samples_leaf=4, estimator__min_samples_split=5, estimator__n_estimators=300; total time=   9.5s
[CV] END estimator__bootstrap=True, estimator__max_depth=46, estimator__max_features=sqrt, estimator__min_samples_leaf=4, estimat

In [26]:
# View the best parameters from fitting the random search
rf_random.best_params_

{'estimator__n_estimators': 266,
 'estimator__min_samples_split': 2,
 'estimator__min_samples_leaf': 2,
 'estimator__max_features': 'sqrt',
 'estimator__max_depth': 14,
 'estimator__bootstrap': False}

In [28]:
# Using RandomForestClassifier for multioutput and evaluaing the result after Ordinal Encoding
classifier_fp_3p = MultiOutputClassifier(RandomForestClassifier(random_state=890, max_depth = 14, n_estimators=266, min_samples_split = 2, min_samples_leaf = 2, max_features = 'sqrt', bootstrap = False))
classifier_fp_3p.fit(X_train_fp_3p_trans, y_train_fp_3p)
predictions_fp_3p = classifier_fp_3p.predict(X_test_fp_3p_trans)
classifier_fp_3p.score(X_test_fp_3p_trans, y_test_fp_3p)

0.03233956543708944

In [27]:
X_train_fp_3p_trans.isna().sum()

left_attack_damage_1      0
left_attack_damage_2      0
left_attack_damage_3      0
left_receive_damage_1     0
left_receive_damage_2     0
left_receive_damage_3     0
left_kill_count_1         0
left_kill_count_2         0
left_kill_count_3         0
right_attack_damage_1     0
right_attack_damage_2     0
right_attack_damage_3     0
right_attack_damage_4     0
right_receive_damage_1    0
right_receive_damage_2    0
right_receive_damage_3    0
right_receive_damage_4    0
right_kill_count_1        0
right_kill_count_2        0
right_kill_count_3        0
right_kill_count_4        0
left_hero_code_1          0
left_hero_code_2          0
left_hero_code_3          0
left_attribute_cd_1       0
left_attribute_cd_2       0
left_attribute_cd_3       0
left_job_cd_1             0
left_job_cd_2             0
left_job_cd_3             0
right_hero_code_1         0
right_hero_code_2         0
right_hero_code_3         0
right_hero_code_4         0
right_attribute_cd_1      0
right_attribute_cd_2

In [65]:
#saving the model
joblib.dump(classifier_fp_3p, 'classifier_fp_3p.joblib')

['classifier_fp_3p.joblib']

<a id='sp1p_link'></a>
### Second Pick - Phase 1

In [30]:
df_reshaped = pd.read_pickle('./data_final.pickle')
# defining features and targets

# selected features for the 1st iter with Ordinal Encoding
sp_1p_features = df_reshaped[['left_pick_order_1', 'left_pick_order_2', 
                              'right_pick_order_1',
                              'left_attack_damage_1', 'left_attack_damage_2',
                              'left_receive_damage_1', 'left_receive_damage_2',
                              'left_kill_count_1', 'left_kill_count_2',
                              'right_attack_damage_1',
                              'right_receive_damage_1', 
                              'right_kill_count_1',
                              'left_attribute_cd_1', 'left_attribute_cd_2', 
                              'left_job_cd_1', 'left_job_cd_2',
                              'right_hero_code_1', 
                              'right_attribute_cd_1', 
                              'right_job_cd_1', 
#                              'left_postban', 'right_postban',
                              'left_preban_1', 'left_preban_2', 
                              'right_preban_1', 'right_preban_2',
                              'first_pick', 'is_win']]

sp_1p_target = df_reshaped[['left_hero_code_1', 'left_hero_code_2']]

X_train_sp_1p, X_test_sp_1p, y_train_sp_1p, y_test_sp_1p = train_test_split(sp_1p_features, sp_1p_target, test_size= 0.2, shuffle = True, random_state= 890)



# Scaling 
# Define numeric labels
sp_1p_numeric = ['left_attack_damage_1', 'left_attack_damage_2',
                 'left_receive_damage_1', 'left_receive_damage_2', 
                 'left_kill_count_1', 'left_kill_count_2',
                 'right_attack_damage_1',
                 'right_receive_damage_1', 
                 'right_kill_count_1']


In [33]:
# Scale numeric features
sp_1p_scaler = StandardScaler()
sp_1p_scaler.fit(X_train_sp_1p[sp_1p_numeric])

In [34]:
X_train_sp_1p_trans = pd.DataFrame()
X_test_sp_1p_trans = pd.DataFrame()

X_train_sp_1p_trans[sp_1p_numeric] = sp_1p_scaler.transform(X_train_sp_1p[sp_1p_numeric])
X_test_sp_1p_trans[sp_1p_numeric] = sp_1p_scaler.transform(X_test_sp_1p[sp_1p_numeric])

In [35]:
# Encoding the categorical features
# Initializing an ordinal encoder for categorical variables
sp_1p_enc = OrdinalEncoder(handle_unknown = 'use_encoded_value', unknown_value = -1)
# categorical labels for the 1st iter
sp_1p_categorical = ['left_attribute_cd_1', 'left_attribute_cd_2',
                     'left_job_cd_1', 'left_job_cd_2',
                     'right_hero_code_1',
                     'right_attribute_cd_1',
                     'right_job_cd_1', 
#                     'left_postban', 'right_postban',
                     'left_preban_1', 'left_preban_2',
                     'right_preban_1', 'right_preban_2']

X_train_sp_1p_trans[sp_1p_categorical] = sp_1p_enc.fit_transform(X_train_sp_1p[sp_1p_categorical].to_numpy())
X_test_sp_1p_trans[sp_1p_categorical] = sp_1p_enc.transform(X_test_sp_1p[sp_1p_categorical].to_numpy())

In [36]:
X_train_sp_1p_trans.index
X_test_sp_1p_trans.index

RangeIndex(start=0, stop=1979, step=1)

In [37]:
X_train_sp_1p = X_train_sp_1p.reset_index(drop=True)
X_test_sp_1p = X_test_sp_1p.reset_index(drop=True)

In [38]:
X_train_sp_1p.index
X_test_sp_1p.index

RangeIndex(start=0, stop=1979, step=1)

In [41]:
X_test_sp_1p.isna().sum()
X_test_sp_1p_trans.isna().sum()

left_attack_damage_1      0
left_attack_damage_2      0
left_receive_damage_1     0
left_receive_damage_2     0
left_kill_count_1         0
left_kill_count_2         0
right_attack_damage_1     0
right_receive_damage_1    0
right_kill_count_1        0
left_attribute_cd_1       0
left_attribute_cd_2       0
left_job_cd_1             0
left_job_cd_2             0
right_hero_code_1         0
right_attribute_cd_1      0
right_job_cd_1            0
left_preban_1             0
left_preban_2             0
right_preban_1            0
right_preban_2            0
dtype: int64

In [42]:
X_test_sp_1p_trans.shape

(1979, 20)

In [43]:
X_test_sp_1p.shape

(1979, 25)

In [44]:
# add features left out of scaling
l0f_sp_1p = ['left_pick_order_1', 'left_pick_order_2',
             'right_pick_order_1', 
             'first_pick', 'is_win']

X_train_sp_1p_trans[l0f_sp_1p] = X_train_sp_1p[l0f_sp_1p]
X_test_sp_1p_trans[l0f_sp_1p] = X_test_sp_1p[l0f_sp_1p]

In [45]:
X_test_sp_1p_trans[l0f_sp_1p]

Unnamed: 0,left_pick_order_1,left_pick_order_2,right_pick_order_1,first_pick,is_win
0,1,2,1,0,2
1,1,2,1,0,2
2,1,2,1,1,2
3,1,2,1,0,1
4,1,2,1,0,2
...,...,...,...,...,...
1974,1,2,1,0,2
1975,1,2,1,0,1
1976,1,2,1,0,2
1977,1,2,1,1,1


<a id='sp1p_link'></a>
### Tunning Hyperparameters for the Second Pick - Phase 1

In [46]:
X_test_sp_1p_trans.isna().sum()

left_attack_damage_1      0
left_attack_damage_2      0
left_receive_damage_1     0
left_receive_damage_2     0
left_kill_count_1         0
left_kill_count_2         0
right_attack_damage_1     0
right_receive_damage_1    0
right_kill_count_1        0
left_attribute_cd_1       0
left_attribute_cd_2       0
left_job_cd_1             0
left_job_cd_2             0
right_hero_code_1         0
right_attribute_cd_1      0
right_job_cd_1            0
left_preban_1             0
left_preban_2             0
right_preban_1            0
right_preban_2            0
left_pick_order_1         0
left_pick_order_2         0
right_pick_order_1        0
first_pick                0
is_win                    0
dtype: int64

In [47]:
# Create a copy of training data after encoding
new_X_train_sp_1p = X_train_sp_1p.copy()

In [48]:
X_train_sp_1p.columns

Index(['left_pick_order_1', 'left_pick_order_2', 'right_pick_order_1',
       'left_attack_damage_1', 'left_attack_damage_2', 'left_receive_damage_1',
       'left_receive_damage_2', 'left_kill_count_1', 'left_kill_count_2',
       'right_attack_damage_1', 'right_receive_damage_1', 'right_kill_count_1',
       'left_attribute_cd_1', 'left_attribute_cd_2', 'left_job_cd_1',
       'left_job_cd_2', 'right_hero_code_1', 'right_attribute_cd_1',
       'right_job_cd_1', 'left_preban_1', 'left_preban_2', 'right_preban_1',
       'right_preban_2', 'first_pick', 'is_win'],
      dtype='object')

In [50]:
# Change columns names in the copied data to concatinate with the original data
new_X_train_sp_1p.columns = ['left_pick_order_1_orig', 'left_pick_order_2_orig', 
                             'right_pick_order_1_orig', 
                             'left_attack_damage_1_orig', 'left_attack_damage_2_orig',
                             'left_receive_damage_1_orig', 'left_receive_damage_2_orig',
                             'left_kill_count_1_orig', 'left_kill_count_2_orig', 
                             'right_attack_damage_1_orig', 
                             'right_receive_damage_1_orig', 
                             'right_kill_count_1_orig', 
                             'left_attribute_cd_1_orig', 'left_attribute_cd_2_orig',
                             'left_job_cd_1_orig', 'left_job_cd_2_orig',
                             'right_hero_code_1_orig',
                             'right_attribute_cd_1_orig', 
                             'right_job_cd_1_orig',
                             'left_preban_1_orig', 'left_preban_2_orig',
                             'right_preban_1_orig', 'right_preban_2_orig',
                             'first_pick_orig', 'is_win_orig']

In [51]:
len(new_X_train_sp_1p.columns)

25

In [52]:
# Cocatenate original data with encoded data for later use in the function
dict_sp_1p = pd.concat((X_train_sp_1p_trans,new_X_train_sp_1p.reset_index(drop=True)), axis= 1).copy()
len(dict_sp_1p.columns)

50

In [54]:
dict_sp_1p[['right_hero_code_1', 'right_hero_code_1_orig']]

Unnamed: 0,right_hero_code_1,right_hero_code_1_orig
0,3.0,Ambitious Tywin
1,30.0,Laia
2,15.0,Death Dealer Ray
3,30.0,Laia
4,41.0,New Moon Luna
...,...,...
7908,17.0,Dragon Bride Senya
7909,41.0,New Moon Luna
7910,41.0,New Moon Luna
7911,30.0,Laia


<a id='sp1p_link'></a>
### Tunning Hyperparameters for the Second Pick - Phase 1

In [58]:
# RandomizedSearchCV parameters
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 1000, num = 10)]
# Number of features to consider at every split
max_features = ['sqrt', 'log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 50, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {'estimator__n_estimators': n_estimators,
               'estimator__max_features': max_features,
               'estimator__max_depth': max_depth,
               'estimator__min_samples_split': min_samples_split,
               'estimator__min_samples_leaf': min_samples_leaf,
               'estimator__bootstrap': bootstrap}
print(random_grid)
{'bootstrap': [True, False],
 'max_depth': [10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, None],
 'max_features': ['sqrt', 'log2'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 288, 377, 466, 555, 644, 733, 822, 911, 1000]}


{'estimator__n_estimators': [200, 288, 377, 466, 555, 644, 733, 822, 911, 1000], 'estimator__max_features': ['sqrt', 'log2'], 'estimator__max_depth': [10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, None], 'estimator__min_samples_split': [2, 5, 10], 'estimator__min_samples_leaf': [1, 2, 4], 'estimator__bootstrap': [True, False]}


{'bootstrap': [True, False],
 'max_depth': [10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, None],
 'max_features': ['sqrt', 'log2'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 288, 377, 466, 555, 644, 733, 822, 911, 1000]}

In [59]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = MultiOutputClassifier(RandomForestClassifier())
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=890)
# Fit the random search model
rf_random.fit(X_train_sp_1p_trans, y_train_sp_1p)

Fitting 3 folds for each of 100 candidates, totalling 300 fits
[CV] END estimator__bootstrap=True, estimator__max_depth=38, estimator__max_features=log2, estimator__min_samples_leaf=1, estimator__min_samples_split=5, estimator__n_estimators=377; total time=   4.9s
[CV] END estimator__bootstrap=True, estimator__max_depth=38, estimator__max_features=log2, estimator__min_samples_leaf=1, estimator__min_samples_split=5, estimator__n_estimators=377; total time=   4.8s
[CV] END estimator__bootstrap=True, estimator__max_depth=38, estimator__max_features=log2, estimator__min_samples_leaf=1, estimator__min_samples_split=5, estimator__n_estimators=377; total time=   5.0s
[CV] END estimator__bootstrap=True, estimator__max_depth=46, estimator__max_features=sqrt, estimator__min_samples_leaf=4, estimator__min_samples_split=5, estimator__n_estimators=466; total time=   5.9s
[CV] END estimator__bootstrap=True, estimator__max_depth=46, estimator__max_features=sqrt, estimator__min_samples_leaf=4, estimat

In [60]:
# View the best parameters from fitting the random search
rf_random.best_params_

{'estimator__n_estimators': 555,
 'estimator__min_samples_split': 2,
 'estimator__min_samples_leaf': 1,
 'estimator__max_features': 'sqrt',
 'estimator__max_depth': 42,
 'estimator__bootstrap': False}

In [62]:
# Using RandomForestClassifier for multioutput and evaluaing the result after Ordinal Encoding
classifier_sp_1p = MultiOutputClassifier(RandomForestClassifier(random_state=890, max_depth = 42, n_estimators=555, min_samples_split = 2, min_samples_leaf = 1, max_features = 'sqrt', bootstrap = False))
classifier_sp_1p.fit(X_train_sp_1p_trans, y_train_sp_1p)
sp_1p_predictions= classifier_sp_1p.predict(X_test_sp_1p_trans)
classifier_sp_1p.score(X_test_sp_1p_trans, y_test_sp_1p)

0.786255684689237

In [64]:
#saving the model
joblib.dump(classifier_sp_1p, 'classifier_sp_1p.joblib')

['classifier_sp_1p.joblib']

<a id='sp2p_link'></a>
### Second Pick - Phase 2

In [3]:
df_reshaped = pd.read_pickle('./data_final.pickle')
# defining features and targets

# selected features for the 1st iter with Ordinal Encoding
sp_2p_features = df_reshaped[['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3', 'left_pick_order_4', 
                              'right_pick_order_1', 'right_pick_order_2', 'right_pick_order_3', 
                              'left_attack_damage_1', 'left_attack_damage_2', 'left_attack_damage_3', 'left_attack_damage_4', 
                              'left_receive_damage_1', 'left_receive_damage_2', 'left_receive_damage_3', 'left_receive_damage_4', 
                              'left_kill_count_1', 'left_kill_count_2', 'left_kill_count_3', 'left_kill_count_4',
                              'right_attack_damage_1', 'right_attack_damage_2', 'right_attack_damage_3',
                              'right_receive_damage_1', 'right_receive_damage_2', 'right_receive_damage_3',
                              'right_kill_count_1', 'right_kill_count_2', 'right_kill_count_3',
                              'left_hero_code_1', 'left_hero_code_2',
                              'left_attribute_cd_1', 'left_attribute_cd_2', 'left_attribute_cd_3', 'left_attribute_cd_4',
                              'left_job_cd_1', 'left_job_cd_2', 'left_job_cd_3', 'left_job_cd_4',
                              'right_hero_code_1', 'right_hero_code_2', 'right_hero_code_3',
                              'right_attribute_cd_1', 'right_attribute_cd_2', 'right_attribute_cd_3',
                              'right_job_cd_1', 'right_job_cd_2', 'right_job_cd_3',
#                              'left_postban', 'right_postban',
                              'left_preban_1', 'left_preban_2', 'right_preban_1', 'right_preban_2', 
                              'first_pick', 'is_win']]

sp_2p_target = df_reshaped[['left_hero_code_3', 'left_hero_code_4']]

X_train_sp_2p, X_test_sp_2p, y_train_sp_2p, y_test_sp_2p = train_test_split(sp_2p_features, sp_2p_target, test_size= 0.2, shuffle = True, random_state= 890)

# Scaling 
# Define numeric labels
sp_2p_numeric = ['left_attack_damage_1', 'left_attack_damage_2', 'left_attack_damage_3', 'left_attack_damage_4',
                 'left_receive_damage_1', 'left_receive_damage_2', 'left_receive_damage_3', 'left_receive_damage_4',
                 'left_kill_count_1', 'left_kill_count_2', 'left_kill_count_3', 'left_kill_count_4', 
                 'right_attack_damage_1', 'right_attack_damage_2', 'right_attack_damage_3',
                 'right_receive_damage_1', 'right_receive_damage_2', 'right_receive_damage_3',
                 'right_kill_count_1', 'right_kill_count_2', 'right_kill_count_3']

In [4]:
sp_2p_scaler = StandardScaler()
sp_2p_scaler.fit(X_train_sp_2p[sp_2p_numeric])

In [5]:
X_train_sp_2p_trans = pd.DataFrame()
X_test_sp_2p_trans = pd.DataFrame()
X_train_sp_2p_trans[sp_2p_numeric] = sp_2p_scaler.transform(X_train_sp_2p[sp_2p_numeric])
X_test_sp_2p_trans[sp_2p_numeric] = sp_2p_scaler.transform(X_test_sp_2p[sp_2p_numeric])

In [6]:
# Encoding the categorical features
# Initializing an ordinal encoder for categorical variables
sp_2p_enc = OrdinalEncoder(handle_unknown = 'use_encoded_value', unknown_value = -1)
# categorical labels for the 1st iter
sp_2p_categorical = ['left_hero_code_1', 'left_hero_code_2',
#                     'left_postban', 'right_postban',
                     'left_preban_1', 'left_preban_2',
                     'right_preban_1', 'right_preban_2',
                     'left_attribute_cd_1', 'left_attribute_cd_2', 'left_attribute_cd_3', 'left_attribute_cd_4',
                     'left_job_cd_1', 'left_job_cd_2', 'left_job_cd_3', 'left_job_cd_4',
                     'right_hero_code_1', 'right_hero_code_2', 'right_hero_code_3', 
                     'right_attribute_cd_1', 'right_attribute_cd_2', 'right_attribute_cd_3',
                     'right_job_cd_1', 'right_job_cd_2', 'right_job_cd_3']

X_train_sp_2p_trans[sp_2p_categorical] = sp_2p_enc.fit_transform(X_train_sp_2p[sp_2p_categorical].to_numpy())
X_test_sp_2p_trans[sp_2p_categorical] = sp_2p_enc.transform(X_test_sp_2p[sp_2p_categorical].to_numpy())

In [7]:
X_train_sp_2p_trans.index
X_test_sp_2p_trans.index

RangeIndex(start=0, stop=1979, step=1)

In [8]:
X_train_sp_2p = X_train_sp_2p.reset_index(drop=True)
X_test_sp_2p = X_test_sp_2p.reset_index(drop=True)

In [9]:
X_train_sp_2p.index
X_test_sp_2p.index

RangeIndex(start=0, stop=1979, step=1)

In [10]:
X_test_sp_2p.isna().sum()
X_test_sp_2p_trans.isna().sum()

left_attack_damage_1      0
left_attack_damage_2      0
left_attack_damage_3      0
left_attack_damage_4      0
left_receive_damage_1     0
left_receive_damage_2     0
left_receive_damage_3     0
left_receive_damage_4     0
left_kill_count_1         0
left_kill_count_2         0
left_kill_count_3         0
left_kill_count_4         0
right_attack_damage_1     0
right_attack_damage_2     0
right_attack_damage_3     0
right_receive_damage_1    0
right_receive_damage_2    0
right_receive_damage_3    0
right_kill_count_1        0
right_kill_count_2        0
right_kill_count_3        0
left_hero_code_1          0
left_hero_code_2          0
left_preban_1             0
left_preban_2             0
right_preban_1            0
right_preban_2            0
left_attribute_cd_1       0
left_attribute_cd_2       0
left_attribute_cd_3       0
left_attribute_cd_4       0
left_job_cd_1             0
left_job_cd_2             0
left_job_cd_3             0
left_job_cd_4             0
right_hero_code_1   

In [11]:
# add features left out of scaling
l0f_sp_2p = ['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3', 'left_pick_order_4',
             'right_pick_order_1', 'right_pick_order_2', 'right_pick_order_3', 
             'first_pick', 'is_win']

X_train_sp_2p_trans[l0f_sp_2p] = X_train_sp_2p[l0f_sp_2p]
X_test_sp_2p_trans[l0f_sp_2p] = X_test_sp_2p[l0f_sp_2p]

In [12]:
X_test_sp_2p_trans[l0f_sp_2p]

Unnamed: 0,left_pick_order_1,left_pick_order_2,left_pick_order_3,left_pick_order_4,right_pick_order_1,right_pick_order_2,right_pick_order_3,first_pick,is_win
0,1,2,3,4,1,2,3,0,2
1,1,2,3,4,1,2,3,0,2
2,1,2,3,4,1,2,3,1,2
3,1,2,3,4,1,2,3,0,1
4,1,2,3,4,1,2,3,0,2
...,...,...,...,...,...,...,...,...,...
1974,1,2,3,4,1,2,3,0,2
1975,1,2,3,4,1,2,3,0,1
1976,1,2,3,4,1,2,3,0,2
1977,1,2,3,4,1,2,3,1,1


In [13]:
X_test_sp_2p_trans.isna().sum()

left_attack_damage_1      0
left_attack_damage_2      0
left_attack_damage_3      0
left_attack_damage_4      0
left_receive_damage_1     0
left_receive_damage_2     0
left_receive_damage_3     0
left_receive_damage_4     0
left_kill_count_1         0
left_kill_count_2         0
left_kill_count_3         0
left_kill_count_4         0
right_attack_damage_1     0
right_attack_damage_2     0
right_attack_damage_3     0
right_receive_damage_1    0
right_receive_damage_2    0
right_receive_damage_3    0
right_kill_count_1        0
right_kill_count_2        0
right_kill_count_3        0
left_hero_code_1          0
left_hero_code_2          0
left_preban_1             0
left_preban_2             0
right_preban_1            0
right_preban_2            0
left_attribute_cd_1       0
left_attribute_cd_2       0
left_attribute_cd_3       0
left_attribute_cd_4       0
left_job_cd_1             0
left_job_cd_2             0
left_job_cd_3             0
left_job_cd_4             0
right_hero_code_1   

In [14]:
# Create a copy of training data after encoding
new_X_train_sp_2p = X_train_sp_2p.copy()

In [15]:
X_train_sp_2p.columns

Index(['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3',
       'left_pick_order_4', 'right_pick_order_1', 'right_pick_order_2',
       'right_pick_order_3', 'left_attack_damage_1', 'left_attack_damage_2',
       'left_attack_damage_3', 'left_attack_damage_4', 'left_receive_damage_1',
       'left_receive_damage_2', 'left_receive_damage_3',
       'left_receive_damage_4', 'left_kill_count_1', 'left_kill_count_2',
       'left_kill_count_3', 'left_kill_count_4', 'right_attack_damage_1',
       'right_attack_damage_2', 'right_attack_damage_3',
       'right_receive_damage_1', 'right_receive_damage_2',
       'right_receive_damage_3', 'right_kill_count_1', 'right_kill_count_2',
       'right_kill_count_3', 'left_hero_code_1', 'left_hero_code_2',
       'left_attribute_cd_1', 'left_attribute_cd_2', 'left_attribute_cd_3',
       'left_attribute_cd_4', 'left_job_cd_1', 'left_job_cd_2',
       'left_job_cd_3', 'left_job_cd_4', 'right_hero_code_1',
       'right_hero_code_2', 'rig

In [16]:
# Change columns names in the copied data to concatinate with the original data
new_X_train_sp_2p.columns = ['left_pick_order_1_orig', 'left_pick_order_2_orig', 'left_pick_order_3_orig', 'left_pick_order_4_orig',
                             'right_pick_order_1_orig', 'right_pick_order_2_orig', 'right_pick_order_3_orig',
                             'left_attack_damage_1_orig', 'left_attack_damage_2_orig', 'left_attack_damage_3_orig', 'left_attack_damage_4_orig',
                             'left_receive_damage_1_orig', 'left_receive_damage_2_orig', 'left_receive_damage_3_orig', 'left_receive_damage_4_orig',
                             'left_kill_count_1_orig', 'left_kill_count_2_orig', 'left_kill_count_3_orig', 'left_kill_count_4_orig',
                             'right_attack_damage_1_orig', 'right_attack_damage_2_orig', 'right_attack_damage_3_orig',
                             'right_receive_damage_1_orig', 'right_receive_damage_2_orig', 'right_receive_damage_3_orig',
                             'right_kill_count_1_orig', 'right_kill_count_2_orig', 'right_kill_count_3_orig',
                             'left_hero_code_1_orig', 'left_hero_code_2_orig',
                             'left_attribute_cd_1_orig', 'left_attribute_cd_2_orig', 'left_attribute_cd_3_orig', 'left_attribute_cd_4_orig',
                             'left_job_cd_1_orig', 'left_job_cd_2_orig', 'left_job_cd_3_orig', 'left_job_cd_4_orig',
                             'right_hero_code_1_orig', 'right_hero_code_2_orig', 'right_hero_code_3_orig', 
                             'right_attribute_cd_1_orig', 'right_attribute_cd_2_orig', 'right_attribute_cd_3_orig', 
                             'right_job_cd_1_orig', 'right_job_cd_2_orig', 'right_job_cd_3_orig',
                             'left_preban_1_orig', 'left_preban_2_orig', 
                             'right_preban_1_orig', 'right_preban_2_orig', 
                             'first_pick_orig', 'is_win_orig']

In [17]:
len(new_X_train_sp_2p.columns)

53

In [18]:
# Cocatenate original data with encoded data for later use in the function
dict_sp_2p = pd.concat((X_train_sp_2p_trans,new_X_train_sp_2p.reset_index(drop=True)), axis= 1).copy()
len(dict_sp_2p.columns)

106

In [19]:
dict_sp_2p[['left_hero_code_1', 'left_hero_code_1_orig']]

Unnamed: 0,left_hero_code_1,left_hero_code_1_orig
0,33.0,Sea Phantom Politis
1,25.0,New Moon Luna
2,19.0,Laia
3,13.0,Death Dealer Ray
4,19.0,Laia
...,...,...
7908,18.0,Jenua
7909,39.0,Zio
7910,15.0,Dragon Bride Senya
7911,13.0,Death Dealer Ray


<a id='sp2p_link'></a>
### Tunning Hyperparameters for the Second Pick - Phase 2

In [87]:
# RandomizedSearchCV parameters
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 500, num = 10)]
# Number of features to consider at every split
max_features = ['sqrt', 'log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 50, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {'estimator__n_estimators': n_estimators,
               'estimator__max_features': max_features,
               'estimator__max_depth': max_depth,
               'estimator__min_samples_split': min_samples_split,
               'estimator__min_samples_leaf': min_samples_leaf,
               'estimator__bootstrap': bootstrap}
print(random_grid)
{'bootstrap': [True, False],
 'max_depth': [10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, None],
 'max_features': ['sqrt', 'log2'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 233, 266, 300, 333, 366, 400, 433, 466, 500]}

{'estimator__n_estimators': [200, 233, 266, 300, 333, 366, 400, 433, 466, 500], 'estimator__max_features': ['sqrt', 'log2'], 'estimator__max_depth': [10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, None], 'estimator__min_samples_split': [2, 5, 10], 'estimator__min_samples_leaf': [1, 2, 4], 'estimator__bootstrap': [True, False]}


{'bootstrap': [True, False],
 'max_depth': [10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, None],
 'max_features': ['sqrt', 'log2'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 233, 266, 300, 333, 366, 400, 433, 466, 500]}

In [88]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = MultiOutputClassifier(RandomForestClassifier())
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=890)
# Fit the random search model
rf_random.fit(X_train_sp_2p_trans, y_train_sp_2p)

Fitting 3 folds for each of 100 candidates, totalling 300 fits
[CV] END estimator__bootstrap=True, estimator__max_depth=38, estimator__max_features=log2, estimator__min_samples_leaf=1, estimator__min_samples_split=5, estimator__n_estimators=266; total time=   7.1s
[CV] END estimator__bootstrap=True, estimator__max_depth=38, estimator__max_features=log2, estimator__min_samples_leaf=1, estimator__min_samples_split=5, estimator__n_estimators=266; total time=   7.0s
[CV] END estimator__bootstrap=True, estimator__max_depth=38, estimator__max_features=log2, estimator__min_samples_leaf=1, estimator__min_samples_split=5, estimator__n_estimators=266; total time=   6.9s
[CV] END estimator__bootstrap=True, estimator__max_depth=46, estimator__max_features=sqrt, estimator__min_samples_leaf=4, estimator__min_samples_split=5, estimator__n_estimators=300; total time=   8.1s
[CV] END estimator__bootstrap=True, estimator__max_depth=46, estimator__max_features=sqrt, estimator__min_samples_leaf=4, estimat

In [89]:
# View the best parameters from fitting the random search
rf_random.best_params_

{'estimator__n_estimators': 333,
 'estimator__min_samples_split': 2,
 'estimator__min_samples_leaf': 1,
 'estimator__max_features': 'sqrt',
 'estimator__max_depth': 42,
 'estimator__bootstrap': False}

: 

In [20]:
# Using RandomForestClassifier for multioutput and evaluaing the result after Ordinal Encoding
classifier_sp_2p = MultiOutputClassifier(RandomForestClassifier(random_state=890, max_depth = 42, n_estimators=333, min_samples_split = 2, min_samples_leaf = 1, max_features = 'sqrt', bootstrap = False))
classifier_sp_2p.fit(X_train_sp_2p_trans, y_train_sp_2p)
predictions_sp_2p = classifier_sp_2p.predict(X_test_sp_2p_trans)
classifier_sp_2p.score(X_test_sp_2p_trans, y_test_sp_2p)

0.5462354724608388

In [21]:
X_train_sp_2p_trans.isna().sum()

left_attack_damage_1      0
left_attack_damage_2      0
left_attack_damage_3      0
left_attack_damage_4      0
left_receive_damage_1     0
left_receive_damage_2     0
left_receive_damage_3     0
left_receive_damage_4     0
left_kill_count_1         0
left_kill_count_2         0
left_kill_count_3         0
left_kill_count_4         0
right_attack_damage_1     0
right_attack_damage_2     0
right_attack_damage_3     0
right_receive_damage_1    0
right_receive_damage_2    0
right_receive_damage_3    0
right_kill_count_1        0
right_kill_count_2        0
right_kill_count_3        0
left_hero_code_1          0
left_hero_code_2          0
left_preban_1             0
left_preban_2             0
right_preban_1            0
right_preban_2            0
left_attribute_cd_1       0
left_attribute_cd_2       0
left_attribute_cd_3       0
left_attribute_cd_4       0
left_job_cd_1             0
left_job_cd_2             0
left_job_cd_3             0
left_job_cd_4             0
right_hero_code_1   

In [22]:
#saving the model
joblib.dump(classifier_sp_2p, 'classifier_sp_2p.joblib')

['classifier_sp_2p.joblib']

<a id='sp3p_link'></a>
### Second Pick - Phase 3

In [None]:
df_reshaped = pd.read_pickle('./data_final.pickle')
# defining features and targets

# selected features for the 1st iter with Ordinal Encoding
sp_3p_features = df_reshaped[['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3', 'left_pick_order_4', 'left_pick_order_5',
                              'right_pick_order_1', 'right_pick_order_2', 'right_pick_order_3', 'right_pick_order_4', 'right_pick_order_5',
                              'left_attack_damage_1', 'left_attack_damage_2', 'left_attack_damage_3', 'left_attack_damage_4', 'left_attack_damage_5', 
                              'left_receive_damage_1', 'left_receive_damage_2', 'left_receive_damage_3', 'left_receive_damage_4', 'left_receive_damage_5',
                              'left_kill_count_1', 'left_kill_count_2', 'left_kill_count_3', 'left_kill_count_4', 'left_kill_count_5',
                              'right_attack_damage_1', 'right_attack_damage_2', 'right_attack_damage_3','right_attack_damage_4', 'right_attack_damage_5',
                              'right_receive_damage_1', 'right_receive_damage_2', 'right_receive_damage_3', 'right_receive_damage_4', 'right_receive_damage_5',
                              'right_kill_count_1', 'right_kill_count_2', 'right_kill_count_3', 'right_kill_count_4', 'right_kill_count_5',
                              'left_hero_code_1', 'left_hero_code_2', 'left_hero_code_3', 'left_hero_code_4', 
#                              'left_postban', 'right_postban',
                              'left_preban_1', 'left_preban_2',
                              'right_preban_1', 'right_preban_2',
                              'left_attribute_cd_1', 'left_attribute_cd_2', 'left_attribute_cd_3', 'left_attribute_cd_4', 'left_attribute_cd_5',
                              'left_job_cd_1', 'left_job_cd_2', 'left_job_cd_3', 'left_job_cd_4', 'left_job_cd_5',
                              'right_hero_code_1', 'right_hero_code_2', 'right_hero_code_3', 'right_hero_code_4', 'right_hero_code_5',
                              'right_attribute_cd_1', 'right_attribute_cd_2', 'right_attribute_cd_3','right_attribute_cd_4', 'right_attribute_cd_5',
                              'right_job_cd_1', 'right_job_cd_2', 'right_job_cd_3', 'right_job_cd_4', 'right_job_cd_5',
                              'first_pick', 'is_win']]

sp_3p_target = df_reshaped[['left_hero_code_5']]

X_train_sp_3p, X_test_sp_3p, y_train_sp_3p, y_test_sp_3p = train_test_split(sp_3p_features, sp_3p_target, test_size= 0.2, shuffle = True, random_state= 890)



# Scaling 
# Define numeric labels
sp_3p_numeric = ['left_attack_damage_1', 'left_attack_damage_2', 'left_attack_damage_3', 'left_attack_damage_4', 'left_attack_damage_5', 
                 'left_receive_damage_1', 'left_receive_damage_2', 'left_receive_damage_3', 'left_receive_damage_4', 'left_receive_damage_5',
                 'left_kill_count_1', 'left_kill_count_2', 'left_kill_count_3', 'left_kill_count_4', 'left_kill_count_5', 
                 'right_attack_damage_1', 'right_attack_damage_2', 'right_attack_damage_3','right_attack_damage_4', 'right_attack_damage_5',
                 'right_receive_damage_1', 'right_receive_damage_2', 'right_receive_damage_3', 'right_receive_damage_4', 'right_receive_damage_5',
                 'right_kill_count_1', 'right_kill_count_2', 'right_kill_count_3', 'right_kill_count_4', 'right_kill_count_5']


In [None]:
sp_3p_scaler = StandardScaler()
sp_3p_scaler.fit(X_train_sp_3p[sp_3p_numeric])

In [None]:
X_train_sp_3p_trans = pd.DataFrame()
X_test_sp_3p_trans = pd.DataFrame()
X_train_sp_3p_trans[sp_3p_numeric] = sp_3p_scaler.transform(X_train_sp_3p[sp_3p_numeric])
X_test_sp_3p_trans[sp_3p_numeric] = sp_3p_scaler.transform(X_test_sp_3p[sp_3p_numeric])

In [None]:
# Encoding the categorical features
# Initializing an ordinal encoder for categorical variables
sp_3p_enc = OrdinalEncoder(handle_unknown = 'use_encoded_value', unknown_value = -1)
# categorical labels for the 1st iter
sp_3p_categorical = ['left_hero_code_1', 'left_hero_code_2', 'left_hero_code_3', 'left_hero_code_4',
#                     'left_postban', 'right_postban',
                     'left_preban_1', 'left_preban_2',
                     'right_preban_1', 'right_preban_2',
                     'left_attribute_cd_1', 'left_attribute_cd_2', 'left_attribute_cd_3', 'left_attribute_cd_4', 'left_attribute_cd_5',
                     'left_job_cd_1', 'left_job_cd_2', 'left_job_cd_3', 'left_job_cd_4', 'left_job_cd_5',
                     'right_hero_code_1', 'right_hero_code_2', 'right_hero_code_3', 'right_hero_code_4', 'right_hero_code_5',
                     'right_attribute_cd_1', 'right_attribute_cd_2', 'right_attribute_cd_3','right_attribute_cd_4', 'right_attribute_cd_5', 
                     'right_job_cd_1', 'right_job_cd_2', 'right_job_cd_3', 'right_job_cd_4', 'right_job_cd_5']

X_train_sp_3p_trans[sp_3p_categorical] = sp_3p_enc.fit_transform(X_train_sp_3p[sp_3p_categorical].to_numpy())
X_test_sp_3p_trans[sp_3p_categorical] = sp_3p_enc.transform(X_test_sp_3p[sp_3p_categorical].to_numpy())

In [None]:
X_train_sp_3p_trans.index
X_test_sp_3p_trans.index

In [None]:
X_train_sp_3p = X_train_sp_3p.reset_index(drop=True)
X_test_sp_3p = X_test_sp_3p.reset_index(drop=True)

In [None]:
X_train_sp_3p.index
X_test_sp_3p.index

In [None]:
X_test_sp_3p.isna().sum()
X_test_sp_3p_trans.isna().sum()

In [None]:
# add features left out of scaling
l0f_sp_3p = ['left_pick_order_1', 'left_pick_order_2', 'left_pick_order_3', 'left_pick_order_4', 'left_pick_order_5',
             'right_pick_order_1', 'right_pick_order_2', 'right_pick_order_3', 'right_pick_order_4', 'right_pick_order_5',
             'first_pick', 'is_win']

X_train_sp_3p_trans[l0f_sp_3p] = X_train_sp_3p[l0f_sp_3p]
X_test_sp_3p_trans[l0f_sp_3p] = X_test_sp_3p[l0f_sp_3p]

In [None]:
X_test_sp_3p_trans[l0f_sp_3p]

In [None]:
X_test_sp_3p_trans.isna().sum()

In [None]:
# Create a copy of training data after encoding
new_X_train_sp_3p = X_train_sp_3p.copy()

In [None]:
X_train_sp_3p.columns

In [None]:
# Change columns names in the copied data to concatinate with the original data
new_X_train_sp_3p.columns = [sdfgsdf]

In [None]:
len(new_X_train_sp_3p.columns)

In [None]:
# Cocatenate original data with encoded data for later use in the function
dict_sp_3p = pd.concat((X_train_sp_3p_trans,new_X_train_sp_3p.reset_index(drop=True)), axis= 1).copy()
len(dict_sp_3p.columns)

In [None]:
dict_sp_3p[['left_hero_code_1', 'left_hero_code_1_orig']]

<a id='sp3p_link'></a>
### Tunning Hyperparameters for the Second Pick - Phase 3

In [None]:
# RandomizedSearchCV parameters
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 500, num = 10)]
# Number of features to consider at every split
max_features = ['sqrt', 'log2']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 50, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {'estimator__n_estimators': n_estimators,
               'estimator__max_features': max_features,
               'estimator__max_depth': max_depth,
               'estimator__min_samples_split': min_samples_split,
               'estimator__min_samples_leaf': min_samples_leaf,
               'estimator__bootstrap': bootstrap}
print(random_grid)
{'bootstrap': [True, False],
 'max_depth': [10, 14, 18, 22, 26, 30, 34, 38, 42, 46, 50, None],
 'max_features': ['sqrt', 'log2'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 233, 266, 300, 333, 366, 400, 433, 466, 500]}

In [None]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = MultiOutputClassifier(RandomForestClassifier())
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=890)
# Fit the random search model
rf_random.fit(X_train_sp_3p_trans, y_train_sp_3p)

In [None]:
# View the best parameters from fitting the random search
rf_random.best_params_

In [None]:
# Using RandomForestClassifier for multioutput and evaluaing the result after Ordinal Encoding
classifier_sp_3p = MultiOutputClassifier(RandomForestClassifier(random_state=890, max_depth = 14, n_estimators=266, min_samples_split = 2, min_samples_leaf = 2, max_features = 'sqrt', bootstrap = False))
classifier_sp_3p.fit(X_train_sp_3p_trans, y_train_sp_3p)
predictions_sp_3p = classifier_sp_3p.predict(X_test_sp_3p_trans)
classifier_sp_3p.score(X_test_sp_3p_trans, y_test_sp_3p)

In [None]:
X_train_sp_3p_trans.isna().sum()

In [None]:
#saving the model
joblib.dump(classifier_sp_3p, 'classifier_sp_3p.joblib')

## Tunning Hyperparameters

In [6]:
from sklearn.model_selection import RandomizedSearchCV
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap}
print(random_grid)
{'bootstrap': [True, False],
 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
 'max_features': ['auto', 'sqrt'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]}

{'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000], 'max_features': ['auto', 'sqrt'], 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None], 'min_samples_split': [2, 5, 10], 'min_samples_leaf': [1, 2, 4], 'bootstrap': [True, False]}


{'bootstrap': [True, False],
 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
 'max_features': ['auto', 'sqrt'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]}

In [10]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = RandomForestClassifier()
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = None)
# Fit the random search model
rf_random.fit(X_train, y_train.values.ravel())

Fitting 3 folds for each of 100 candidates, totalling 300 fits




[CV] END bootstrap=True, max_depth=30, max_features=sqrt, min_samples_leaf=1, min_samples_split=5, n_estimators=400; total time=   9.3s
[CV] END bootstrap=True, max_depth=30, max_features=sqrt, min_samples_leaf=1, min_samples_split=5, n_estimators=400; total time=   8.4s
[CV] END bootstrap=True, max_depth=30, max_features=sqrt, min_samples_leaf=1, min_samples_split=5, n_estimators=400; total time=   8.5s
[CV] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=1, min_samples_split=5, n_estimators=2000; total time=  28.8s
[CV] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=1, min_samples_split=5, n_estimators=2000; total time=  28.2s
[CV] END bootstrap=True, max_depth=10, max_features=sqrt, min_samples_leaf=1, min_samples_split=5, n_estimators=2000; total time=  28.6s
[CV] END bootstrap=False, max_depth=10, max_features=sqrt, min_samples_leaf=2, min_samples_split=5, n_estimators=1200; total time=  24.5s
[CV] END bootstrap=False, max_depth=10, max

123 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
123 fits failed with the following error:
Traceback (most recent call last):
  File "/home/ostap/projects/mike_env/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/ostap/projects/mike_env/lib/python3.10/site-packages/sklearn/base.py", line 1467, in wrapper
    estimator._validate_params()
  File "/home/ostap/projects/mike_env/lib/python3.10/site-packages/sklearn/base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "/home/ostap/projects/mike_env/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", li

In [12]:
# View the best parameters from fitting the random search
rf_random.best_params_

{'n_estimators': 400,
 'min_samples_split': 2,
 'min_samples_leaf': 1,
 'max_features': 'sqrt',
 'max_depth': None,
 'bootstrap': False}

In [16]:
# Using RandomForestClassifier for multioutput and evaluaing the result after Ordinal Encoding
classifier = MultiOutputClassifier(RandomForestClassifier(random_state=890, max_depth = None, n_estimators=400, min_samples_split = 2, min_samples_leaf = 1, max_features = 'sqrt', bootstrap = False))
classifier.fit(X_train, y_train)
predictions= classifier.predict(X_test)
classifier.score(X_test, y_test)

0.6654767975522693

# Writing a Pick-Suggesting Function

## First Pick - Phase 1 Function

In [97]:
# filter out battles where player on the left lost and picked second
first_pick_list =  df_reshaped[(df_reshaped['first_pick'] == 1) & (df_reshaped['is_win'] == 1)]
first_pick_list




Unnamed: 0,left_pick_order_1,left_pick_order_2,left_pick_order_3,left_pick_order_4,left_pick_order_5,left_hero_code_1,left_hero_code_2,left_hero_code_3,left_hero_code_4,left_hero_code_5,...,right_job_cd_3,right_job_cd_4,right_job_cd_5,right_postban,first_pick,is_win,left_preban_1,left_preban_2,right_preban_1,right_preban_2
0,1,2,3,4,5,Laia,Blood Moon Haste,Moon Bunny Dominiel,Melissa,Sez,...,assassin,warrior,assassin,Ambitious Tywin,1,1,Zio,New Moon Luna,Sea Phantom Politis,New Moon Luna
2,1,2,3,4,5,Sea Phantom Politis,Blood Moon Haste,Aria,Eternal Wanderer Ludwig,Requiem Roana,...,assassin,mage,assassin,Cidd,1,1,Zio,New Moon Luna,Ran,New Moon Luna
3,1,2,3,4,5,Laia,Blood Moon Haste,Moon Bunny Dominiel,Melissa,Roy Mustang,...,warrior,assassin,mage,Celine,1,1,Zio,New Moon Luna,Sea Phantom Politis,Peira
4,1,2,3,4,5,Laia,Ambitious Tywin,Moon Bunny Dominiel,Roy Mustang,Melissa,...,knight,warrior,mage,Destina,1,1,New Moon Luna,Zio,Nahkwol,Sea Phantom Politis
7,1,2,3,4,5,Laia,Blood Moon Haste,Moon Bunny Dominiel,Roy Mustang,Melissa,...,assassin,knight,assassin,Ran,1,1,New Moon Luna,Zio,Sea Phantom Politis,New Moon Luna
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9930,1,2,3,4,5,Laia,Abyssal Yufine,Crimson Armin,Urban Shadow Choux,Moon Bunny Dominiel,...,ranger,manauser,assassin,Desert Jewel Basar,1,1,Jenua,Sea Phantom Politis,New Moon Luna,Death Dealer Ray
9932,1,2,3,4,5,Laia,Nahkwol,Crimson Armin,Celine,ae-KARINA,...,ranger,assassin,mage,Urban Shadow Choux,1,1,Jenua,Sea Phantom Politis,Sea Phantom Politis,New Moon Luna
9933,1,2,3,4,5,Laia,Dragon Bride Senya,Crimson Armin,ae-KARINA,Solitaria of the Snow,...,warrior,knight,manauser,Midnight Gala Lilias,1,1,Jenua,Sea Phantom Politis,Laia,Abyssal Yufine
9938,1,2,3,4,5,Laia,Abyssal Yufine,Crimson Armin,Nahkwol,Elvira,...,warrior,manauser,manauser,New Moon Luna,1,1,Jenua,Sea Phantom Politis,Helen,Melany


In [98]:
first_pick_list['left_hero_code_1'].value_counts()

left_hero_code_1
Laia                       715
New Moon Luna              473
Dragon Bride Senya         358
Blood Moon Haste           344
Death Dealer Ray           330
Sea Phantom Politis        293
Jenua                      113
Ambitious Tywin             99
Zio                         80
Conqueror Lilias            69
Ran                         28
Celine                      24
Cidd                        15
Politis                     13
Lilias                      13
Abyssal Yufine              10
Peira                        8
Briar Witch Iseria           8
Urban Shadow Choux           6
Navy Captain Landy           5
Angel of Light Angelica      5
Unbound Knight Arowell       4
Crimson Armin                4
Sylvan Sage Vivian           3
Watcher Schuri               3
Belian                       2
Albedo                       2
Rimuru                       1
ae-KARINA                    1
Destina                      1
Abigail                      1
Lua                   

In [99]:
df_reshaped['left_hero_code_1'].value_counts()

left_hero_code_1
Laia                       2283
New Moon Luna              1446
Blood Moon Haste           1214
Dragon Bride Senya         1169
Death Dealer Ray           1130
Sea Phantom Politis         943
Jenua                       327
Ambitious Tywin             309
Zio                         251
Conqueror Lilias            217
Ran                         115
Celine                       94
Cidd                         70
Lilias                       54
Abyssal Yufine               45
Politis                      44
Nahkwol                      22
Briar Witch Iseria           20
Peira                        17
Urban Shadow Choux           17
Albedo                       16
Watcher Schuri               13
Crimson Armin                12
Unbound Knight Arowell       11
Angel of Light Angelica      10
Navy Captain Landy            9
Sylvan Sage Vivian            7
Belian                        3
Schuri                        3
Last Rider Krau               2
Wanderer Silk          

In [100]:
# Calculate winrate per hero picked first
winrate = pd.DataFrame((first_pick_list['left_hero_code_1'].value_counts(), df_reshaped['left_hero_code_1'].value_counts()), index = ['win', 'ttl']).T
winrate.dropna(inplace = True)
winrate['win_rate'] = winrate['win']/winrate['ttl']
winrate.index

Index(['Laia', 'New Moon Luna', 'Dragon Bride Senya', 'Blood Moon Haste',
       'Death Dealer Ray', 'Sea Phantom Politis', 'Jenua', 'Ambitious Tywin',
       'Zio', 'Conqueror Lilias', 'Ran', 'Celine', 'Cidd', 'Politis', 'Lilias',
       'Abyssal Yufine', 'Peira', 'Briar Witch Iseria', 'Urban Shadow Choux',
       'Navy Captain Landy', 'Angel of Light Angelica',
       'Unbound Knight Arowell', 'Crimson Armin', 'Sylvan Sage Vivian',
       'Watcher Schuri', 'Belian', 'Albedo', 'Rimuru', 'ae-KARINA', 'Destina',
       'Abigail', 'Lua'],
      dtype='object', name='left_hero_code_1')

In [101]:
# Write a function suggesting a pick based on the most frequent picks and highest win rates
def fp_1(prebans = []):
    """ 
    First pick suggestion
    
    Parameters:
    - List of heroes that were prebanned at the begining of a battle
    
    Returns:
    - str: a hero name based on the most frequently
    picked heroes with the highest win rate
    - str: list of prebans
    """
    mask = winrate.index.isin(prebans)
    hero_list_fp1 = winrate[~mask]
    hero_name = hero_list_fp1.sort_values(['win', 'win_rate'], ascending = False).index[0]
    print('First Pick:', hero_name)
    return hero_name, prebans

In [102]:
# Test the function
fp_1_var = fp_1(['Laia', 'Death Dealer Ray', 'Death Dealer Ray', 'New Moon Luna'])

First Pick: Dragon Bride Senya


<a id='fp23_func_link'></a>
## First Pick - Phase 2 Function

In [103]:
fp_2p_features

Unnamed: 0,left_pick_order_1,left_pick_order_2,left_pick_order_3,right_pick_order_1,right_pick_order_2,left_attack_damage_1,left_attack_damage_2,left_attack_damage_3,left_receive_damage_1,left_receive_damage_2,...,right_attribute_cd_1,right_attribute_cd_2,right_job_cd_1,right_job_cd_2,left_preban_1,left_preban_2,right_preban_1,right_preban_2,first_pick,is_win
0,1,2,3,1,2,0,1267,0,0,29297,...,dark,light,manauser,knight,Zio,New Moon Luna,Sea Phantom Politis,New Moon Luna,1,1
1,1,2,3,1,2,3112,0,53383,0,0,...,light,light,knight,warrior,Zio,New Moon Luna,Blood Moon Haste,Jenua,0,1
2,1,2,3,1,2,0,4084,6434,0,93232,...,light,fire,knight,assassin,Zio,New Moon Luna,Ran,New Moon Luna,1,1
3,1,2,3,1,2,2043,16469,1228,51842,19373,...,dark,light,manauser,knight,Zio,New Moon Luna,Sea Phantom Politis,Peira,1,1
4,1,2,3,1,2,0,0,0,0,0,...,dark,wind,manauser,manauser,New Moon Luna,Zio,Nahkwol,Sea Phantom Politis,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9941,1,2,3,1,2,23619,0,4764,74779,0,...,dark,light,warrior,knight,Sea Phantom Politis,Jenua,Ran,New Moon Luna,0,2
9942,1,2,3,1,2,3235,0,0,17022,75373,...,light,wind,knight,assassin,Sea Phantom Politis,Jenua,Sea Phantom Politis,Jenua,0,2
9943,1,2,3,1,2,497,3219,12001,34720,46092,...,dark,wind,manauser,assassin,Sea Phantom Politis,Jenua,Jenua,New Moon Luna,1,1
9944,1,2,3,1,2,11488,38100,0,85987,126078,...,dark,fire,manauser,mage,Sea Phantom Politis,Jenua,Navy Captain Landy,Belian,0,2


In [104]:
# Get mean value of encoded numeric features 
battle_stats = fp_2p_features[fp_2p_numeric].astype(float).mean().values.reshape(1,15)
battle_stats.shape

(1, 15)

In [105]:
battle_stats[0][0]

8358.296401132227

In [109]:
def fp_2_3(enemy_pick_1,
           enemy_pick_2, 
#           left_postban, 
#           right_postban,
           left_preban_1 = fp_1_var[1][0],
           left_preban_2 = fp_1_var[1][1],
           right_preban_1 = fp_1_var[1][2],
           right_preban_2 = fp_1_var[1][3],
           my_fp1 = fp_1_var[0]):
    
    """
    Second and third picks suggestion

    - Parameters: 1st and 2nd heroes picked by the opponent
    
    Returns:
    - array: hero names
    """
        
    test_features_2_3 = pd.DataFrame(columns = X_train_fp_2p_trans.columns)
    # configure stats
    #print(test_features_2_3.loc[0, 'left_preban_1'])
    #print(dict_fp_2p[dict_fp_2p['left_preban_1_orig'] == left_preban_1]['left_preban_1'].values[0])
    test_features_2_3.loc[0, 'left_preban_1'] = dict_fp_2p[dict_fp_2p['left_preban_1_orig'] == left_preban_1]['left_preban_1'].values[0] # left_preban_1 
    test_features_2_3.loc[0, 'left_preban_2'] = dict_fp_2p[dict_fp_2p['left_preban_2_orig'] == left_preban_2]['left_preban_2'].values[0] # left_preban_2 
    test_features_2_3.loc[0, 'right_preban_1'] = dict_fp_2p[dict_fp_2p['right_preban_1_orig'] == right_preban_1]['right_preban_1'].values[0] # right_preban_1 
    test_features_2_3.loc[0, 'right_preban_2'] = dict_fp_2p[dict_fp_2p['right_preban_2_orig'] == right_preban_2]['right_preban_2'].values[0] # right_preban_2 
    test_features_2_3.loc[0, 'right_hero_code_1'] = dict_fp_2p[dict_fp_2p['right_hero_code_1_orig'] == enemy_pick_1]['right_hero_code_1'].values[0] # right_hero_code_1 
    test_features_2_3.loc[0, 'right_hero_code_2'] = dict_fp_2p[dict_fp_2p['right_hero_code_2_orig'] == enemy_pick_2]['right_hero_code_2'].values[0] # right_hero_code_2
    #test_features_2_3.loc[0, 'left_postban'] = dict_fp_2p[dict_fp_2p['left_postban_orig'] == left_postban]['left_postban'].values[0] # predict after all picks are made
    #test_features_2_3.loc[0, 'right_postban'] = dict_fp_2p[dict_fp_2p['right_postban_orig'] == right_postban]['right_postban'].values[0] # predict after all picks are made
    
    #print(dict_fp_2p[dict_fp_2p['left_hero_code_1_orig'] == my_fp1]['left_attribute_cd_1'].values)
    test_features_2_3.loc[0, 'left_attribute_cd_1'] = dict_fp_2p[dict_fp_2p['left_hero_code_1_orig'] == my_fp1]['left_attribute_cd_1'].values[0] # left_attribute_cd_1
    # test_features_2_3.loc[0, 'left_attribute_cd_2'] = dict_fp_2p[dict_fp_2p['left_hero_code_1_orig'] == hero_names_2_3[0]]['left_attribute_cd_2'].values[0] # left_attribute_cd_2
    test_features_2_3.loc[0, 'left_hero_code_1'] = dict_fp_2p[dict_fp_2p['left_hero_code_1_orig'] == my_fp1]['left_hero_code_1'].values[0] # left_hero_code_1
    # test_features_2_3.loc[0, 'left_attribute_cd_3'] = dict_fp_2p[dict_fp_2p['left_hero_code_1_orig'] == hero_names_2_3[1]]['left_attribute_cd_3'].values[0] # left_attribute_cd_3
    test_features_2_3.loc[0, 'left_job_cd_1'] = dict_fp_2p[dict_fp_2p['left_hero_code_1_orig'] == my_fp1]['left_job_cd_1'].values[0] # left_job_cd_1
    # test_features_2_3.loc[0, 'left_job_cd_2'] = dict_fp_2p[dict_fp_2p['left_hero_code_1_orig'] == hero_names_2_3[0]]['left_job_cd_2'].values[0].values[0] # left_job_cd_2
    # test_features_2_3.loc[0, 'left_job_cd_3'] = dict_fp_2p[dict_fp_2p['left_hero_code_3_orig'] == hero_names_2_3[1]]['left_job_cd_3'].values[0] # left_job_cd_3
    test_features_2_3.loc[0, 'right_attribute_cd_1'] = dict_fp_2p[dict_fp_2p['right_hero_code_1_orig'] == enemy_pick_1]['right_attribute_cd_1'].values[0] # right_attribute_cd_1
    test_features_2_3.loc[0, 'right_attribute_cd_2'] = dict_fp_2p[dict_fp_2p['right_hero_code_2_orig'] == enemy_pick_2]['right_attribute_cd_2'].values[0] # right_attribute_cd_2
    test_features_2_3.loc[0, 'right_job_cd_1'] = dict_fp_2p[dict_fp_2p['right_hero_code_1_orig'] == enemy_pick_1]['right_job_cd_1'].values[0] # right_job_cd_1
    test_features_2_3.loc[0, 'right_job_cd_2'] = dict_fp_2p[dict_fp_2p['right_hero_code_2_orig'] == enemy_pick_2]['right_job_cd_2'].values[0] # right_job_cd_2

    
    # supply mean numeric values as default encoded numeric features
    battle_stats = fp_2p_features[fp_2p_numeric].astype(float).mean().values.reshape(1,15)
    for i in range(len(test_features_2_3[fp_2p_numeric].columns)):
        # print(battle_stats[i])
        # print(test_features_2_3[fp_2p_numeric[i]])
        test_features_2_3[fp_2p_numeric[i]] = battle_stats[0][i]
    
    # supply unencoded numeric values as default numeric features
    test_features_2_3['left_pick_order_1'] = 1 
    test_features_2_3['left_pick_order_2'] = 2
    test_features_2_3['left_pick_order_3'] = 3
    test_features_2_3['right_pick_order_1'] = 1
    test_features_2_3['right_pick_order_2'] = 2
    test_features_2_3['first_pick'] = 1
    test_features_2_3['is_win'] = 1
    
              
    # make prediction
    # print(len(test_features_2_3.columns))
    # print(test_features_2_3.head())
    hero_names_2_3 = classifier_fp_2p.predict(test_features_2_3)
    print('2nd & 3rd pick:', hero_names_2_3)
    return hero_names_2_3, enemy_pick_1, enemy_pick_2

In [110]:
# Test the function
fp_2_3(enemy_pick_1 = 'Blood Moon Haste',
       enemy_pick_2 = 'Ocean Breeze Luluca') 
#       left_postban = 'Ambitious Tywin', 
#       right_postban = 'New Moon Luna')


2nd & 3rd pick: [['Albedo' 'Abyssal Yufine']]


(array([['Albedo', 'Abyssal Yufine']], dtype=object),
 'Blood Moon Haste',
 'Ocean Breeze Luluca')

In [58]:
fp_2_3_var = fp_2_3(enemy_pick_1 = 'Blood Moon Haste', enemy_pick_2 = 'Ocean Breeze Luluca')

2nd & 3rd pick: [['Albedo' 'Abyssal Yufine']]


In [65]:
type(fp_2_3_var[0][0])
fp_2_3_var[0][1]

'Abyssal Yufine'

<a id='fp45_func_link'></a>
## First Pick - Phase 3 Function

In [None]:
def fp_4_5(enemy_pick_3,
           enemy_pick_4, 
#           left_postban, 
#           right_postban,
           enemy_pick_1 = nani?
           enemy_pick_2 = nani?
           left_preban_1 = fp_1_var[1][0],
           left_preban_2 = fp_1_var[1][1],
           right_preban_1 = fp_1_var[1][2],
           right_preban_2 = fp_1_var[1][3],
           my_fp1 = fp_1_var[0],
           my_fp2 = fp_2_3_var[0][0],
           my_fp3 = fp_2_3_var[0][1]):
    
    """
    Second and third picks suggestion

    - Parameters: 3rd and 4th heroes picked by the opponent
    
    Returns:
    List of hero names
    """
    
    test_features_4_5 = pd.DataFrame(columns = X_train_fp_3p_trans.columns)
    # configure stats
    # prebans
    test_features_4_5.loc[0, 'left_preban_1'] = dict_fp_3p[dict_fp_3p['left_preban_1_orig'] == left_preban_1]['left_preban_1'].values[0] # left_preban_1
    test_features_4_5.loc[0, 'left_preban_2'] = dict_fp_3p[dict_fp_3p['left_preban_2_orig'] == left_preban_2]['left_preban_2'].values[0] # left_preban_2
    test_features_4_5.loc[0, 'right_preban_1'] = dict_fp_3p[dict_fp_3p['right_preban_1_orig'] == right_preban_1]['right_preban_1'].values[0] # right_preban_1
    test_features_4_5.loc[0, 'right_preban_2'] = dict_fp_3p[dict_fp_3p['right_preban_2_orig'] == right_preban_2]['right_preban_2'].values[0] # right_preban_2
    
    # right side picks
    test_features_4_5.loc[0, 'right_attribute_cd_1'] = dict_fp_3p[dict_fp_3p['right_hero_code_1_orig'] == enemy_pick_1]['right_attribute_cd_1'].values[0] # right_attribute_cd_1
    test_features_4_5.loc[0, 'right_attribute_cd_2'] = dict_fp_3p[dict_fp_3p['right_hero_code_2_orig'] == enemy_pick_2]['right_attribute_cd_2'].values[0] # right_attribute_cd_2
    test_features_4_5.loc[0, 'right_attribute_cd_3'] = dict_fp_3p[dict_fp_3p['right_hero_code_3_orig'] == enemy_pick_3]['right_attribute_cd_3'].values[0] # right_attribute_cd_3
    test_features_4_5.loc[0, 'right_attribute_cd_4'] = dict_fp_3p[dict_fp_3p['right_hero_code_4_orig'] == enemy_pick_4]['right_attribute_cd_4'].values[0] # right_attribute_cd_4
    test_features_4_5.loc[0, 'right_job_cd_1'] = dict_fp_3p[dict_fp_3p['right_hero_code_1_orig'] == enemy_pick_1]['right_job_cd_1'].values[0] # right_job_cd_1
    test_features_4_5.loc[0, 'right_job_cd_2'] = dict_fp_3p[dict_fp_3p['right_hero_code_2_orig'] == enemy_pick_2]['right_job_cd_2'].values[0] # right_job_cd_2
    test_features_4_5.loc[0, 'right_job_cd_3'] = dict_fp_3p[dict_fp_3p['right_hero_code_3_orig'] == enemy_pick_3]['right_job_cd_3'].values[0] # right_job_cd_3
    test_features_4_5.loc[0, 'right_job_cd_4'] = dict_fp_3p[dict_fp_3p['right_hero_code_4_orig'] == enemy_pick_4]['right_job_cd_4'].values[0] # right_job_cd_4    
    test_features_4_5.loc[0, 'right_hero_code_1'] = dict_fp_3p[dict_fp_3p['right_hero_code_1_orig'] == enemy_pick_1]['right_hero_code_1'].values[0] # right_hero_code_1
    test_features_4_5.loc[0, 'right_hero_code_2'] = dict_fp_3p[dict_fp_3p['right_hero_code_2_orig'] == enemy_pick_2]['right_hero_code_2'].values[0] # right_hero_code_2
    test_features_4_5.loc[0, 'right_hero_code_3'] = dict_fp_3p[dict_fp_3p['right_hero_code_3_orig'] == enemy_pick_3]['right_hero_code_3'].values[0] # right_hero_code_3
    test_features_4_5.loc[0, 'right_hero_code_4'] = dict_fp_3p[dict_fp_3p['right_hero_code_4_orig'] == enemy_pick_4]['right_hero_code_4'].values[0] # right_hero_code_4
    
    # left side picks
    test_features_4_5.loc[0, 'left_attribute_cd_1'] = dict_fp_3p[dict_fp_3p['left_hero_code_1_orig'] == my_fp1]['left_attribute_cd_1'].values[0] # left_attribute_cd_1
    test_features_4_5.loc[0, 'left_attribute_cd_2'] = dict_fp_3p[dict_fp_3p['left_hero_code_2_orig'] == my_fp2]['left_attribute_cd_2'].values[0] # left_attribute_cd_2
    test_features_4_5.loc[0, 'left_attribute_cd_3'] = dict_fp_3p[dict_fp_3p['left_hero_code_3_orig'] == my_fp3]['left_attribute_cd_3'].values[0] # left_attribute_cd_3
    test_features_4_5.loc[0, 'left_job_cd_1'] = dict_fp_3p[dict_fp_3p['left_hero_code_1_orig'] == my_fp1]['left_job_cd_1'].values[0] # left_job_cd_1
    test_features_4_5.loc[0, 'left_job_cd_2'] = dict_fp_3p[dict_fp_3p['left_hero_code_2_orig'] == my_fp2]['left_job_cd_2'].values[0] # left_job_cd_2
    test_features_4_5.loc[0, 'left_job_cd_3'] = dict_fp_3p[dict_fp_3p['left_hero_code_3_orig'] == my_fp3]['left_job_cd_3'].values[0] # left_job_cd_3
    test_features_4_5.loc[0, 'left_hero_code_1'] = dict_fp_3p[dict_fp_3p['left_hero_code_1_orig'] == my_fp1]['left_hero_code_1'].values[0] # left_hero_code_1
    test_features_4_5.loc[0, 'left_hero_code_2'] = dict_fp_3p[dict_fp_3p['left_hero_code_2_orig'] == my_fp2]['left_hero_code_2'].values[0] # left_hero_code_2
    test_features_4_5.loc[0, 'left_hero_code_3'] = dict_fp_3p[dict_fp_3p['left_hero_code_3_orig'] == my_fp3]['left_hero_code_3'].values[0] # left_hero_code_3
        
    
    # supply mean numeric values as default encoded numeric features
    battle_stats = fp_2p_features[fp_2p_numeric].astype(float).mean().values.reshape(1,15)
    for i in range(len(test_features_2_3[fp_2p_numeric].columns)):
        test_features_2_3[fp_2p_numeric[i]] = battle_stats[0][i]
    
    # supply unencoded numeric values as default numeric features
    test_features_4_5['left_pick_order_1'] = 1 
    test_features_4_5['left_pick_order_2'] = 2
    test_features_4_5['left_pick_order_3'] = 3
    test_features_4_5['left_pick_order_4'] = 4
    test_features_4_5['left_pick_order_5'] = 5
    test_features_4_5['right_pick_order_1'] = 1
    test_features_4_5['right_pick_order_2'] = 2
    test_features_4_5['right_pick_order_3'] = 3
    test_features_4_5['right_pick_order_4'] = 4
    test_features_4_5['first_pick'] = 1
    test_features_4_5['is_win'] = 1
    
              
    # make prediction
    hero_names_3_4 = classifier_fp_3p.predict(test_features_4_5)
    print('2nd & 3rd pick:', hero_names_3_4)
    return hero_names_3_4

In [None]:
test_features_4_5.columns

In [1]:
#omega git test

In [2]:
#super omega git test