In [1]:
import pandas as pd
import sys
from sklearn.pipeline import Pipeline
import numpy as np
import string
import os
from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV



In [2]:
games = pd.read_csv('../data/games.csv', index_col='game_id')
train = pd.read_csv('../data/train.csv', index_col='game_id')
turns = pd.read_csv('../data/turns.csv', index_col='game_id')

In [3]:
os.path.abspath('..')
sys.path.append(os.path.abspath('..'))


In [7]:
import functions
import transformers
import processors
import importlib
import builders

importlib.reload(functions)
importlib.reload(transformers)
importlib.reload(processors)
importlib.reload(builders)

import searcher
import searcher_results_orgenizer

from searcher import Searcher
from searcher_results_orgenizer import SearcherResultsOrgenizer

from transformers.columns_setter_transformer import ColumnsSetterTransformer
from transformers.extract_set_column_transformer import ExtractSetColumnsTransformer
from transformers.name_dropper_transformer import NameDropperTransformer
from transformers.select_transformer import SelectTransformer
from transformers.select_rows_transformer import SelectRowsTransformer
from transformers.series_from_group_transformer import SeriesFromGroupTransformer
from transformers.map_set_transformer import MapSetTransformer
from transformers.one_hot_encoder_transformer import OneHotEncoderTransformer
from transformers.add_to_dict_transformer import AddToDictTransformer
from transformers.get_from_dict_transformer import GetFromDictTransformer
from transformers.select_pos_rows_transformer import SelectPosRowsTransformer


from functions.bot_extractor import BotExtarctor
from functions.is_bot_extractor import IsBotExtarctor

from processors.basic_pre_processor import BasicPreProcessor

from builders.preprocessor_builder import PreprocessorBuilder
from builders.reg_pipe_builder import RegPipeBuilder


importlib.reload(functions.is_bot_extractor)
importlib.reload(functions.bot_extractor)

importlib.reload(transformers.columns_setter_transformer)
importlib.reload(transformers.extract_set_column_transformer)
importlib.reload(transformers.name_dropper_transformer)
importlib.reload(transformers.select_transformer)
importlib.reload(transformers.select_rows_transformer)
importlib.reload(transformers.series_from_group_transformer)
importlib.reload(transformers.map_set_transformer)
importlib.reload(transformers.one_hot_encoder_transformer)
importlib.reload(transformers.add_to_dict_transformer)
importlib.reload(transformers.get_from_dict_transformer)
importlib.reload(transformers.select_pos_rows_transformer)


importlib.reload(processors.basic_pre_processor)

importlib.reload(builders.preprocessor_builder)
importlib.reload(builders.reg_pipe_builder)

importlib.reload(searcher)
importlib.reload(searcher_results_orgenizer)







NameError: name 'searcher_results_orgenizer' is not defined

In [None]:
def get_n_first(n: int, col: str, df: pd.DataFrame):
    df[col].groupby()
    

In [6]:
G_NAME = 'games'
T_NAME = 'turns'
DATA_NAME = 'train'

names = ['BetterBot', 'STEEBot', 'HastyBot']

prePipe = Pipeline([('get_relavent_turns', SelectRowsTransformer(train.index, target=T_NAME)),
                     ('get_relavent_games',  SelectRowsTransformer(train.index.unique(), target=G_NAME)),
                     ('train_set_is_player', ExtractSetColumnsTransformer({'is_player': IsBotExtarctor(names, 'nickname', True)},
                                                                           src=DATA_NAME, dest=DATA_NAME)),
                    ('turns_set_is_player', ExtractSetColumnsTransformer({'is_player': IsBotExtarctor(names, 'nickname', True)},
                                                                           src=T_NAME, dest=T_NAME)),  
                     ('get_bot_rating', ExtractSetColumnsTransformer({'bot_rating': lambda train: train[~train['is_player']]['rating']},
                                                                      src=DATA_NAME, dest=G_NAME)),
                     ('get_bots_names', ExtractSetColumnsTransformer({'bot_name': lambda train: train[~train['is_player']]['nickname']},
                                                                     src=DATA_NAME, dest=G_NAME)),
                    ('data_drop_bot_rating', SelectPosRowsTransformer('is_player', target=DATA_NAME)),
                    ])

featureTransformers = [('extract_first', ColumnsSetterTransformer({'is_bot_first': IsBotExtarctor(names, name_col='first')}, target=G_NAME)),
                       ('turns_word_info_mappers', MapSetTransformer({'move_len': (lambda x: len(x) if type(x) == str else np.NAN, 'move'),
                                                                      'reused_num': (lambda x: x.count('.') if type(x) == str else np.NAN, 'move'),
                                                                    # 'jokers_num': (lambda x: sum(1 for c in x if c.islower()) if (type(x) == str) and (x not in ['(challenge)', '(time)']) else np.NAN, 'move')
                                                                     }, target=T_NAME)),
                       ('points_moments', ExtractSetColumnsTransformer({'p_points_1_moment': 
                                                                      (lambda turns: turns[turns['is_player']]['points'].groupby('game_id').mean()),
                                                                        'p_points_std':
                                                                      (lambda turns: turns[turns['is_player']]['points'].groupby('game_id').std()),
                                                                        'a_poits_1_moment': 
                                                                        lambda turns: turns['points'].groupby('game_id').mean()
                                                                       }, src=T_NAME, dest=G_NAME)),
                       ('from_turns', ExtractSetColumnsTransformer({'moves_avg': 
                                                                    lambda turns: turns[turns['is_player']]['move_len'].groupby('game_id').mean(),
                                                                    'reuded_sum': 
                                                                    lambda turns: turns[turns['is_player']]['move_len'].groupby('game_id').sum()
                                                                   }, src=T_NAME, dest=G_NAME)),
                       ('hot', OneHotEncoderTransformer({'time_control_name': games['time_control_name'].unique(),
                                                          'game_end_reason':  games['game_end_reason'].unique(),
                                                          'lexicon':  games['lexicon'].unique(),
                                                          'rating_mode':  games['rating_mode'].unique(),
                                                          'bot_name': names}
                                                        , target=G_NAME)),
                       ('drops', NameDropperTransformer(['first', 'created_at', 'time_control_name', 'game_end_reason', 'lexicon', 'rating_mode', 'bot_name'], target=G_NAME)),                                              
                      ]


In [7]:
preprocessor = PreprocessorBuilder(games, G_NAME, turns, T_NAME, prePipe).build()
n_games, n_turns, n_ratings = preprocessor.process(train, DATA_NAME)


In [8]:
reg_pipe = RegPipeBuilder(n_games, G_NAME, n_turns, T_NAME, featureTransformers, RandomForestRegressor(random_state=0)).build()

In [20]:
cv_res = cross_validate(reg_pipe, n_games, n_ratings, cv=5,
                         scoring=('neg_root_mean_squared_error'),
                         return_train_score=True,)


In [21]:
cv_res

{'fit_time': array([61.85449243, 57.80915213, 54.07170725, 48.74675083, 49.76511073]),
 'score_time': array([4.77567911, 4.16277671, 3.96476746, 3.92584729, 3.58136368]),
 'test_score': array([-105.12417386, -106.42150451, -107.10097048, -106.60122195,
        -105.098516  ]),
 'train_score': array([-39.74436997, -39.51698433, -39.47580448, -39.33310896,
        -39.65112949])}

In [34]:
param_grid = {   
    'estimator__max_depth': [5, 7],
    'estimator__min_samples_split': [2]
}
search = GridSearchCV(reg_pipe, param_grid, n_jobs=-1, scoring=('neg_root_mean_squared_error'))
search.fit(n_games, n_ratings)

In [36]:
search.cv_results_

{'mean_fit_time': array([40.99107461, 45.1506043 ]),
 'std_fit_time': array([0.6840991 , 6.09360958]),
 'mean_score_time': array([10.46209173,  5.78470645]),
 'std_score_time': array([0.49439422, 1.78353383]),
 'param_estimator__max_depth': masked_array(data=[5, 7],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'param_estimator__min_samples_split': masked_array(data=[2, 2],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'params': [{'estimator__max_depth': 5, 'estimator__min_samples_split': 2},
  {'estimator__max_depth': 7, 'estimator__min_samples_split': 2}],
 'split0_test_score': array([-132.04980941, -120.4646758 ]),
 'split1_test_score': array([-131.97117091, -120.91456439]),
 'split2_test_score': array([-132.11619909, -121.62944417]),
 'split3_test_score': array([-131.30004001, -120.76794965]),
 'split4_test_score': array([-130.96438467, -119.36288025]),
 'mean_test_score': array([-131.68032082, -120

In [226]:
param_grid = {   
    'estimator__max_depth': [3, 5],
}
search = GridSearchCV(reg_pipe, param_grid, n_jobs=2, scoring=('r2', 'neg_root_mean_squared_error'), refit=False)
search.fit(n_games, n_ratings)

In [1]:
turns


NameError: name 'turns' is not defined

In [222]:
grid = {
    'random_forest_1':
    {
    'estimator': RandomForestRegressor(random_state=0),
    'param_grid':
        {
        'max_depth': [4, 6],
        'max_features': ['sqrt', 'log2']
        },
    'scores': ['neg_root_mean_squared_error']
    },
    'random_forest_2':
    {
    'estimator': RandomForestRegressor(random_state=0),
    'param_grid':
        {
        'max_depth': [2, 3],
        'max_features': ['sqrt', None]
        },
    'scores': ['neg_root_mean_squared_error']
    },
}

In [223]:
my_searcher = Searcher(reg_pipe[:-1], grid, 2)
my_searcher.search(n_games, n_ratings)
my_searcher.get_results()

{('random_forest_1',
  'neg_root_mean_squared_error'): GridSearchCV(estimator=Pipeline(steps=[('add_turns',
                                         AddToDictTransformer(dfs={'turns':          turn_number   nickname     rack location      move  points  score  \
 game_id                                                                      
 1                  1  BetterBot  DDEGITT       8G       DIG      10     10   
 1                  2      stevy  AEHOPUX       7H       HAP      18     18   
 1                  3  BetterBot  DEELTTU       6I      LUTE      16     26   
 1                  4      stevy  EMORSUX       5K        UM      16     34   
 1                  5  BetterBot  ACDEITU       L5  ..DICATE      28     54   
 ...              ...        ...      ...      ...       ...     ...    ...   
 72773             22      adola  ABINRRU      15N        IN      18    376   
 72773             23   HastyBot   EGHIIP      H12      ....
                                             

In [221]:
SearcherResultsOrgenizer().orgenize(my_searcher.get_results()).sort_values('mean_test_score')

Unnamed: 0_level_0,Unnamed: 1_level_0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,max_depth,max_features
model_name,scorer,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
second,neg_root_mean_squared_error,7.225087,0.924061,4.964619,0.695078,-166.872265,-163.992401,-163.751527,-163.876581,-164.675848,-164.633725,1.164102,2,sqrt
second,neg_root_mean_squared_error,11.779568,0.644232,4.247202,0.166907,-160.675893,-158.119858,-159.207012,-157.547383,-158.437539,-158.797537,1.081446,2,
second,neg_root_mean_squared_error,6.956863,0.42136,4.73473,0.369975,-154.065204,-151.047536,-150.998049,-150.708186,-151.246658,-151.613127,1.238072,3,sqrt
second,neg_root_mean_squared_error,13.706248,1.201252,4.214188,0.487239,-149.613615,-147.978877,-149.054538,-147.429262,-148.000286,-148.415316,0.79744,3,
first,neg_root_mean_squared_error,7.233594,0.360966,4.506705,0.618077,-145.691808,-142.559752,-142.664066,-142.497111,-143.120901,-143.306728,1.212403,4,sqrt
first,neg_root_mean_squared_error,6.481647,0.455683,4.416649,0.571667,-145.691808,-142.559752,-142.664066,-142.497111,-143.120901,-143.306728,1.212403,4,log2
first,neg_root_mean_squared_error,7.929903,0.577155,4.503124,0.490224,-133.154428,-131.142826,-131.518018,-130.356627,-130.801026,-131.394585,0.959722,6,sqrt
first,neg_root_mean_squared_error,8.084514,0.767394,5.077051,0.92675,-133.154428,-131.142826,-131.518018,-130.356627,-130.801026,-131.394585,0.959722,6,log2
