In [1]:
import pandas as pd
import sys
from sklearn.pipeline import Pipeline
import numpy as np
import string
import os
from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

from tsfresh import extract_features
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction import MinimalFCParameters
from tsfresh.feature_extraction import EfficientFCParameters



In [2]:
os.path.abspath('..')
sys.path.append(os.path.abspath('..'))


In [3]:
import importlib


import board_helpers
from board_helpers.move_info import MoveInfo
from board_helpers.tiles_counter import TilesCounter
from board_helpers.board_consts import TileType
importlib.reload(board_helpers)
importlib.reload(board_helpers.move_info)
importlib.reload(board_helpers.tiles_counter)
importlib.reload(board_helpers.board_consts)



import transformers
from transformers.columns_setter_transformer import ColumnsSetterTransformer
from transformers.extract_set_column_transformer import ExtractSetColumnsTransformer
from transformers.name_dropper_transformer import NameDropperTransformer
from transformers.select_transformer import SelectTransformer
from transformers.select_rows_transformer import SelectRowsTransformer
from transformers.series_from_group_transformer import SeriesFromGroupTransformer
from transformers.map_set_transformer import MapSetTransformer
from transformers.one_hot_encoder_transformer import OneHotEncoderTransformer
from transformers.add_to_dict_transformer import AddToDictTransformer
from transformers.get_from_dict_transformer import GetFromDictTransformer
from transformers.select_pos_rows_transformer import SelectPosRowsTransformer
importlib.reload(transformers)
importlib.reload(transformers.columns_setter_transformer)
importlib.reload(transformers.extract_set_column_transformer)
importlib.reload(transformers.name_dropper_transformer)
importlib.reload(transformers.select_transformer)
importlib.reload(transformers.select_rows_transformer)
importlib.reload(transformers.series_from_group_transformer)
importlib.reload(transformers.map_set_transformer)
importlib.reload(transformers.one_hot_encoder_transformer)
importlib.reload(transformers.add_to_dict_transformer)
importlib.reload(transformers.get_from_dict_transformer)
importlib.reload(transformers.select_pos_rows_transformer)

import functions
from functions.bot_extractor import BotExtarctor
from functions.is_bot_extractor import IsBotExtarctor
importlib.reload(functions)
importlib.reload(functions.is_bot_extractor)
importlib.reload(functions.bot_extractor)

import processors
from processors.basic_pre_processor import BasicPreProcessor
importlib.reload(processors)
importlib.reload(processors.basic_pre_processor)

import builders
from builders.preprocessor_builder import PreprocessorBuilder
from builders.reg_pipe_builder import RegPipeBuilder
importlib.reload(builders)
importlib.reload(builders.preprocessor_builder)
importlib.reload(builders.reg_pipe_builder)

import searcher
import searcher_results_orgenizer
from searcher import Searcher
from searcher_results_orgenizer import SearcherResultsOrgenizer
importlib.reload(searcher)
importlib.reload(searcher_results_orgenizer)







<module 'searcher_results_orgenizer' from 'c:\\Users\\guyv2\\OneDrive\\שולחן העבודה\\hafifa\\scrabble\\searcher_results_orgenizer.py'>

In [4]:
t_turns = pd.read_csv('../data/t_turns.csv')

In [5]:
t_turns

Unnamed: 0,game_id,turn_number,points,score,is_player,turn_type_Challenge,turn_type_End,turn_type_Exchange,turn_type_Pass,turn_type_Play,...,W,X,Y,Z,.,TileType.N,TileType.L2,TileType.L3,TileType.W2,TileType.W3
0,1,1,10,10,0,0,0,0,0,1,...,0,0,0,0,0,3,0,0,0,0
1,1,2,18,18,1,0,0,0,0,1,...,0,0,0,0,0,2,1,0,0,0
2,1,3,16,26,0,0,0,0,0,1,...,0,0,0,0,0,3,0,1,0,0
3,1,4,16,34,1,0,0,0,0,1,...,0,0,0,0,0,1,0,0,1,0
4,1,5,28,54,0,0,0,0,0,1,...,0,0,0,0,2,6,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2005493,72773,22,18,376,1,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,1
2005494,72773,23,24,331,0,0,0,0,0,1,...,0,0,0,0,1,2,1,0,0,1
2005495,72773,24,7,383,1,0,0,0,0,1,...,0,0,0,0,1,3,0,0,0,1
2005496,72773,25,11,342,0,0,0,0,0,1,...,0,0,0,0,0,2,1,0,0,0


In [6]:
extracted_features = extract_features(t_turns, column_id="game_id", column_sort="turn_number", n_jobs=6)

Feature Extraction:   0%|          | 0/30 [00:00<?, ?it/s]

In [None]:
extracted_features.to_csv('../data/extracted_features_turns.csv', index_label='game_id')

In [None]:
games = pd.read_csv('../data/games.csv', index_col='game_id')
train = pd.read_csv('../data/train.csv', index_col='game_id')
turns = pd.read_csv('../data/extracted_features_turns.csv', index_col='game_id')

In [None]:
G_NAME = 'games'
T_NAME = 'turns'
DATA_NAME = 'train'

names = ['BetterBot', 'STEEBot', 'HastyBot']

prePipe = Pipeline([('get_relavent_turns', SelectRowsTransformer(train.index, target=T_NAME)),
                     ('get_relavent_games',  SelectRowsTransformer(train.index.unique(), target=G_NAME)),
                     ('train_set_is_player', ExtractSetColumnsTransformer({'is_player': IsBotExtarctor(names, 'nickname', True)},
                                                                           src=DATA_NAME, dest=DATA_NAME)),
                     ('get_bot_rating', ExtractSetColumnsTransformer({'bot_rating': lambda train: train[~train['is_player']]['rating']},
                                                                      src=DATA_NAME, dest=G_NAME)),
                     ('get_bots_names', ExtractSetColumnsTransformer({'bot_name': lambda train: train[~train['is_player']]['nickname']},
                                                                     src=DATA_NAME, dest=G_NAME)),
                    ('data_drop_bot_rating', SelectPosRowsTransformer('is_player', target=DATA_NAME)),
                    ])

In [None]:
preprocessor = PreprocessorBuilder(games, G_NAME, turns, T_NAME, prePipe).build()
n_games, n_turns, n_ratings = preprocessor.process(train, DATA_NAME)

In [None]:
impute(extracted_features)
features_filtered = select_features(n_turns, n_ratings, n_jobs=6)

In [None]:
features_filtered.to_csv('../data/features_filtered.csv', index_label='game_id')