In [22]:
import pandas as pd
import sys
from sklearn.pipeline import Pipeline
import numpy as np
import string
import os
from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV



In [2]:
games = pd.read_csv('../data/games.csv', index_col='game_id')
train = pd.read_csv('../data/train.csv', index_col='game_id')
turns = pd.read_csv('../data/turns.csv', index_col='game_id')

In [3]:
os.path.abspath('..')
sys.path.append(os.path.abspath('..'))


In [14]:
import functions
import transformers
import processors
import importlib
import builders

importlib.reload(functions)
importlib.reload(transformers)
importlib.reload(processors)
importlib.reload(builders)


from transformers.columns_setter_transformer import ColumnsSetterTransformer
from transformers.extract_set_column_transformer import ExtractSetColumnsTransformer
from transformers.name_dropper_transformer import NameDropperTransformer
from transformers.select_transformer import SelectTransformer
from transformers.select_rows_transformer import SelectRowsTransformer
from transformers.series_from_group_transformer import SeriesFromGroupTransformer
from transformers.map_set_transformer import MapSetTransformer
from transformers.one_hot_encoder_transformer import OneHotEncoderTransformer
from transformers.add_to_dict_transformer import AddToDictTransformer
from transformers.get_from_dict_transformer import GetFromDictTransformer
from transformers.select_pos_rows_transformer import SelectPosRowsTransformer


from functions.bot_extractor import BotExtarctor
from functions.is_bot_extractor import IsBotExtarctor

from processors.basic_pre_processor import BasicPreProcessor

from builders.preprocessor_builder import PreprocessorBuilder
from builders.reg_pipe_builder import RegPipeBuilder


importlib.reload(functions.is_bot_extractor)
importlib.reload(functions.bot_extractor)

importlib.reload(transformers.columns_setter_transformer)
importlib.reload(transformers.extract_set_column_transformer)
importlib.reload(transformers.name_dropper_transformer)
importlib.reload(transformers.select_transformer)
importlib.reload(transformers.select_rows_transformer)
importlib.reload(transformers.series_from_group_transformer)
importlib.reload(transformers.map_set_transformer)
importlib.reload(transformers.one_hot_encoder_transformer)
importlib.reload(transformers.add_to_dict_transformer)
importlib.reload(transformers.get_from_dict_transformer)
importlib.reload(transformers.select_pos_rows_transformer)


importlib.reload(processors.basic_pre_processor)

importlib.reload(builders.preprocessor_builder)
importlib.reload(builders.reg_pipe_builder)









<module 'builders.reg_pipe_builder' from 'c:\\Users\\Bina4\\Desktop\\Guy_hafifa\\scrabble\\builders\\reg_pipe_builder.py'>

In [17]:
G_NAME = 'games'
T_NAME = 'turns'
DATA_NAME = 'train'

names = ['BetterBot', 'STEEBot', 'HastyBot']

prePipe = Pipeline([('get_relavent_turns', SelectRowsTransformer(train.index, target=T_NAME)),
                     ('get_relavent_games',  SelectRowsTransformer(train.index.unique(), target=G_NAME)),
                     ('train_set_is_player', ExtractSetColumnsTransformer({'is_player': IsBotExtarctor(names, 'nickname', True)},
                                                                           src=DATA_NAME, dest=DATA_NAME)),
                    ('turns_set_is_player', ExtractSetColumnsTransformer({'is_player': IsBotExtarctor(names, 'nickname', True)},
                                                                           src=T_NAME, dest=T_NAME)),  
                     ('get_bot_rating', ExtractSetColumnsTransformer({'bot_rating': lambda train: train[~train['is_player']]['rating']},
                                                                      src=DATA_NAME, dest=G_NAME)),
                     ('get_bots_names', ExtractSetColumnsTransformer({'bot_name': lambda train: train[~train['is_player']]['nickname']},
                                                                     src=DATA_NAME, dest=G_NAME)),
                    ('data_drop_bot_rating', SelectPosRowsTransformer('is_player', target=DATA_NAME)),
                    ])

featureTransformers = [('extract_first', ColumnsSetterTransformer({'is_bot_first': IsBotExtarctor(names, name_col='first')}, target=G_NAME)),
                       ('turns_word_info_mappers', MapSetTransformer({'move_len': (lambda x: len(x) if type(x) == str else np.NAN, 'move'),
                                                                      'reused_num': (lambda x: x.count('.') if type(x) == str else np.NAN, 'move'),
                                                                    # 'jokers_num': (lambda x: sum(1 for c in x if c.islower()) if (type(x) == str) and (x not in ['(challenge)', '(time)']) else np.NAN, 'move')
                                                                     }, target=T_NAME)),
                       ('points_moments', ExtractSetColumnsTransformer({'p_points_1_moment': 
                                                                      (lambda turns: turns[turns['is_player']]['points'].groupby('game_id').mean()),
                                                                        'p_points_std':
                                                                      (lambda turns: turns[turns['is_player']]['points'].groupby('game_id').std()),
                                                                        'a_poits_1_moment': 
                                                                        lambda turns: turns['points'].groupby('game_id').mean()
                                                                       }, src=T_NAME, dest=G_NAME)),
                       ('from_turns', ExtractSetColumnsTransformer({'moves_avg': 
                                                                    lambda turns: turns[turns['is_player']]['move_len'].groupby('game_id').mean(),
                                                                    'reuded_sum': 
                                                                    lambda turns: turns[turns['is_player']]['move_len'].groupby('game_id').sum()
                                                                   }, src=T_NAME, dest=G_NAME)),
                       ('hot', OneHotEncoderTransformer({'time_control_name': games['time_control_name'].unique(),
                                                          'game_end_reason':  games['game_end_reason'].unique(),
                                                          'lexicon':  games['lexicon'].unique(),
                                                          'rating_mode':  games['rating_mode'].unique(),
                                                          'bot_name': names}
                                                        , target=G_NAME)),
                       ('drops', NameDropperTransformer(['first', 'created_at', 'time_control_name', 'game_end_reason', 'lexicon', 'rating_mode', 'bot_name'], target=G_NAME)),                                              
                      ]


In [18]:
preprocessor = PreprocessorBuilder(games, G_NAME, turns, T_NAME, prePipe).build()
n_games, n_turns, n_ratings = preprocessor.process(train, DATA_NAME)


In [19]:
reg_pipe = RegPipeBuilder(n_games, G_NAME, n_turns, T_NAME, featureTransformers, RandomForestRegressor(random_state=0)).build()

In [20]:
cv_res = cross_validate(reg_pipe, n_games, n_ratings, cv=5,
                         scoring=('neg_root_mean_squared_error'),
                         return_train_score=True,)


In [21]:
cv_res

{'fit_time': array([61.85449243, 57.80915213, 54.07170725, 48.74675083, 49.76511073]),
 'score_time': array([4.77567911, 4.16277671, 3.96476746, 3.92584729, 3.58136368]),
 'test_score': array([-105.12417386, -106.42150451, -107.10097048, -106.60122195,
        -105.098516  ]),
 'train_score': array([-39.74436997, -39.51698433, -39.47580448, -39.33310896,
        -39.65112949])}

In [24]:
param_grid = {   
    'estimator__max_depth': [3, 5, 10, 20],
}
search = GridSearchCV(reg_pipe, param_grid, n_jobs=2, scoring=('neg_root_mean_squared_error'))
search.fit(n_games, n_ratings)

KeyboardInterrupt: 