Exp026_LightGBM

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [25]:
#!nvidia-smi

In [2]:
input_dir = "/content/drive/MyDrive/07_Competition/signate-471/data/"
output_dir = "/content/drive/MyDrive/07_Competition/signate-471/log/"
submission_dir = "/content/drive/MyDrive/07_Competition/signate-471/submission/"
model_dir = "/content/drive/MyDrive/07_Competition/signate-471/model_bin/"
pred_dir = "/content/drive/MyDrive/07_Competition/signate-471/pred/"

In [3]:
! pip install texthero
!pip install transformers pycld2



In [27]:
import logging
import datetime
import warnings

import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

from tqdm import tqdm
from sklearn.metrics import fbeta_score
from sklearn.utils import class_weight
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import TruncatedSVD, PCA
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from scipy.optimize import minimize, minimize_scalar
from contextlib import contextmanager

import torch
import lightgbm as lgb
from time import time

import texthero as hero
import transformers
import pycld2 as cld2

In [28]:
class CFG:
  exp = "exp26h"
  seed = 71
  fold = 5

CONFIG = CFG()

In [6]:
os.makedirs(model_dir+CONFIG.exp+"/", exist_ok=True)
os.makedirs(pred_dir+CONFIG.exp+"/", exist_ok=True)
os.makedirs(output_dir+CONFIG.exp+"/", exist_ok=True)

In [7]:
class Logger:
    """log を残す用のクラス"""
    def __init__(self, path):
        self.general_logger = logging.getLogger(path)
        stream_handler = logging.StreamHandler()
        file_general_handler = logging.FileHandler(os.path.join(path, 'Experiment.log'))
        if len(self.general_logger.handlers) == 0:
            self.general_logger.addHandler(stream_handler)
            self.general_logger.addHandler(file_general_handler)
            self.general_logger.setLevel(logging.INFO)

    def info(self, message):
        # display time
        self.general_logger.info('[{}] - {}'.format(self.now_string(), message))

    @staticmethod
    def now_string():
        return str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

class Util:
    """pkl保存&load"""
    @classmethod
    def dump(cls, value, path):
        joblib.dump(value, path, compress=True)

    @classmethod
    def load(cls, path):
        return joblib.load(path)

In [29]:
def get_train_data(train):
    # 交差検証 用の番号を振ります。
    Fold = StratifiedKFold(n_splits=CONFIG.fold, shuffle=True, random_state=CONFIG.seed)
    for n, (train_index, val_index) in enumerate(Fold.split(train, train["judgement"])):
        train.loc[val_index, "fold"] = int(n)
    train["fold"] = train["fold"].astype(np.uint8)

    return train

def get_test_data(test):
    return test

In [30]:
def cleansing_hero_only_text(input_df, text_col):
    ## get text only 
    custom_pipeline = [
        hero.preprocessing.fillna,
        hero.preprocessing.lowercase,
        hero.preprocessing.remove_digits,
        hero.preprocessing.remove_punctuation,
        hero.preprocessing.remove_diacritics,
        hero.preprocessing.remove_stopwords,
        hero.preprocessing.remove_whitespace,
        hero.preprocessing.stem
    ]
    texts = hero.clean(input_df[text_col], custom_pipeline)
    return texts

def clean_puncts(x):


    puncts = [',', '.', '"', ':', ')', '(', '-', '!', '?', '|', ';', "'", '$', '&', '/', '[', ']', '>', '%', '=', '#', '*', '+', '\\', '•',  '~', '@', '£',
                '·', '_', '{', '}', '©', '^', '®', '`',  '<', '→', '°', '€', '™', '›',  '♥', '←', '×', '§', '″', '′', 'Â', '█', '½', 'à', '…',
                '“', '★', '”', '–', '●', 'â', '►', '−', '¢', '²', '¬', '░', '¶', '↑', '±', '¿', '▾', '═', '¦', '║', '―', '¥', '▓', '—', '‹', '─',
                '▒', '：', '¼', '⊕', '▼', '▪', '†', '■', '’', '▀', '¨', '▄', '♫', '☆', 'é', '¯', '♦', '¤', '▲', 'è', '¸', '¾', 'Ã', '⋅', '‘', '∞', '«',
                '∙', '）', '↓', '、', '│', '（', '»', '，', '♪', '╩', '╚', '³', '・', '╦', '╣', '╔', '╗', '▬', '❤', 'ï', 'Ø', '¹', '≤', '‡', '√', '（', '）', '～',
                '➡', '％', '⇒', '▶', '「', '➄', '➆',  '➊', '➋', '➌', '➍', '⓪', '①', '②', '③', '④', '⑤', '⑰', '❶', '❷', '❸', '❹', '❺', '❻', '❼', '❽',  
                '＝', '※', '㈱', '､', '△', '℮', 'ⅼ', '‐', '｣', '┝', '↳', '◉', '／', '＋', '○',
                '【', '】', '✅', '☑', '➤', 'ﾞ', '↳', '〶', '☛', '｢', '⁺', '『', '≫',
                'Â©', '<sub>','Aﾎｲ', 'ﾎｲ', "ﾃｩ"
            ] 


def remove_double(text):
    text = text.replace("  ", " ")
    return text

def split_copyright(text):
    if "Copyright" in text:
        text = text.split('Copyright')[0]
        return text
    else:
        return text

def preprocess_text(text):
    text = clean_puncts(text)
    #text = remove_double(text)
    text = split_copyright(text)

In [31]:
def basic_text_features_transforme(input_df, text_columns, name=""):
    """basic な text 特徴量"""
    def _get_features(dataframe, column):
        _df = pd.DataFrame()
        _df[column + name + '_num_chars'] = dataframe[column].apply(len)
        _df[column + name + '_num_exclamation_marks'] = dataframe[column].apply(lambda x: x.count('!'))
        _df[column + name + '_num_question_marks'] = dataframe[column].apply(lambda x: x.count('?'))
        _df[column + name + '_num_punctuation'] = dataframe[column].apply(lambda x: sum(x.count(w) for w in '.,;:'))
        _df[column + name + '_num_symbols'] = dataframe[column].apply(lambda x: sum(x.count(w) for w in '*&$%'))
        _df[column + name + '_num_words'] = dataframe[column].apply(lambda x: len(x.split()))
        _df[column + name + '_num_unique_words'] = dataframe[column].apply(lambda x: len(set(w for w in x.split())))
        _df[column + name + '_words_vs_unique'] = _df[column + name + '_num_unique_words'] / _df[column + name + '_num_words']
        _df[column + name + '_words_vs_chars'] = _df[column + name + '_num_words'] / _df[column + name + '_num_chars']
        _df[column + name + '_num_specific_words'] = dataframe[column].apply(lambda x: sum(x.count(w) for w in  ['pcr', 'samples', 'detection', 'assay', 'accuracy', 'assays', 'tested',  'detect']))
        
        return _df
    
    # main の処理
    output_df_ = pd.DataFrame()
    output_df_[text_columns] = input_df[text_columns].fillna('missing').astype(str)
    output_lst = []
    for c in text_columns:
        output_df = _get_features(output_df_, c)
        output_lst.append(output_df)
    output_df = pd.concat(output_lst, axis=1)
    return output_df


In [32]:
def vectorize_text(input_df, 
                   text_columns,
                   vectorizer=CountVectorizer(),
                   transformer=TruncatedSVD(n_components=128),
                   name='html_count_svd'):
    """countベースのtext特徴量"""
    
    output_df = pd.DataFrame()
    output_df[text_columns] = input_df[text_columns].fillna('missing').astype(str)
    features = []
    for c in text_columns:
        sentence = vectorizer.fit_transform(output_df[c])
        feature = transformer.fit_transform(sentence)
        num_p = feature.shape[1]
        feature = pd.DataFrame(feature, columns=[name+str(num_p) + f'={i:03}' for i in range(num_p)])
        features.append(feature)
    output_df = pd.concat(features, axis=1)
    return output_df

In [33]:
def get_basic_text_features(input_df):
    output_df = basic_text_features_transforme(input_df, 
                                               text_columns=["title", "abstract"])
    return output_df

def get_tfidf_features__svd64(input_df):
    output_df = vectorize_text(input_df,
                               text_columns=["title", "abstract"],
                               vectorizer=TfidfVectorizer(min_df=0.001, max_df=0.99),
                               transformer=TruncatedSVD(n_components=64),
                               name="tfidf_svd_")
    return output_df

def get_count_features__svd64(input_df):
    output_df = vectorize_text(input_df, 
                               text_columns=["title", "abstract"],
                               vectorizer=CountVectorizer(min_df=0.001, max_df=0.99),
                               transformer=TruncatedSVD(n_components=64),
                               name="count_svd_")
    return output_df

In [34]:
def preprocess(train, test):
    """前処理の実行関数"""
    input_df = pd.concat([train, test]).reset_index(drop=True)
    funcs = [get_basic_text_features,
             get_tfidf_features__svd64, 
             get_count_features__svd64]

    output = []
    for func in funcs:
            _df = func(input_df)
            output.append(_df)
    output = pd.concat(output, axis=1)

    train_x = output.iloc[:len(train)]
    train_y = train["judgement"]
    test_x = output.iloc[len(train):].reset_index(drop=True)

    return train_x, train_y, test_x

In [74]:
pd.set_option("display.max_colwidth", 50)
train = pd.read_csv(input_dir + "train.csv")
test = pd.read_csv(input_dir + "test.csv")
sub = pd.read_csv(input_dir + "sample_submit.csv", header=None)
sub.columns = ["id", "judgement"]


train["abstract"]=train["abstract"].fillna("NA")
test["abstract"]=test["abstract"].fillna("NA")

# copyright以降は削除したい
train["abstract"] = train["abstract"].apply(lambda x: split_copyright(x))
test["abstract"] = test["abstract"].apply(lambda x: split_copyright(x))

# titleの単語数が3以下のものは除外してみる
train["title_word_len"] = train["title"].str.split(" ").str.len()
train = train[train["title_word_len"]>3]

# titleが他言語の場合は除外
train["title_lang"] = train["title"].fillna("").map(lambda x: cld2.detect(x)[2][0][1])
train = train[(train["title_lang"]=="en")|(train["title_lang"]=="un")].reset_index(drop=True)

In [75]:
train

Unnamed: 0,id,title,abstract,judgement,title_word_len,title_lang
0,0,One-year age changes in MRI brain volumes in o...,Longitudinal studies indicate that declines in...,0,10,en
1,1,Supportive CSF biomarker evidence to enhance t...,The present study was undertaken to validate t...,0,23,en
2,2,Occurrence of basal ganglia germ cell tumors w...,Objective: To report a case series in which ba...,0,10,en
3,3,New developments in diagnosis and therapy of C...,The etiology and pathogenesis of idiopathic ch...,0,13,en
4,4,Prolonged shedding of SARS-CoV-2 in an elderly...,,0,16,en
...,...,...,...,...,...,...
26916,27140,The amyloidogenic pathway of amyloid precursor...,Amyloid beta-protein (A beta) is the main cons...,0,15,en
26917,27141,Technologic developments in radiotherapy and s...,We present a review of current technological p...,0,7,en
26918,27142,Novel screening cascade identifies MKK4 as key...,Phosphorylation of Tau at serine 422 promotes ...,0,13,en
26919,27143,Visualization of the gall bladder on F-18 FDOP...,The ability to label dihydroxyphenylalanine (D...,0,13,en


In [76]:
# ラベル変更
# https://signate.jp/competitions/471/discussions/20210816152356-59

train.loc[train["id"]==2488, "judgement"] = 0
train.loc[train["id"]==7708, "judgement"] = 0

In [77]:
train["title"] = cleansing_hero_only_text(train,"title")
train["abstract"] = cleansing_hero_only_text(train,"abstract")
test["title"] = cleansing_hero_only_text(test,"title")
test["abstract"] = cleansing_hero_only_text(test,"abstract")

In [78]:
train_x, train_y, test_x = preprocess(train, test)

In [79]:
train_x.shape

(26921, 276)

In [80]:
train_y.shape

(26921,)

In [81]:
test_x.shape

(40834, 276)

## Bert Predの読み込み

In [100]:
exp_list = ["exp20h","exp22h"]
#exp_list = ["exp20h"]
train_bert = pd.DataFrame()
test_bert = pd.DataFrame()

for dataset in exp_list:    
    _df = pd.read_csv(pred_dir+f"{dataset}/oof_df.csv")
    _preds = pd.read_csv(pred_dir+f"{dataset}/pred_df.csv")

    train_bert = pd.concat([train_bert, _df["preds"]], axis=1)
    test_bert = pd.concat([test_bert, _preds["judgement"]], axis=1)

    train_bert.rename(columns={"preds":dataset}, inplace=True)
    test_bert.rename(columns={"judgement":dataset}, inplace=True)

In [116]:
train_bert

Unnamed: 0,exp20h,exp22h
0,0.005017,0.004337
1,0.000276,0.001180
2,0.001697,0.009072
3,0.000016,0.000102
4,0.002729,0.073801
...,...,...
26916,0.474465,0.689895
26917,0.000541,0.000801
26918,0.000483,0.000329
26919,0.000833,0.003440


In [117]:
test_bert

Unnamed: 0,exp20h,exp22h
0,0.000159,0.000228
1,0.001748,0.004413
2,0.001512,0.001216
3,0.001418,0.001583
4,0.001102,0.000888
...,...,...
40829,0.000968,0.001188
40830,0.000203,0.000302
40831,0.003782,0.003455
40832,0.043563,0.044998


In [103]:
test_bert.shape

(40834, 2)

In [104]:
train_x_mod = pd.concat([train_bert,train_x,train[["judgement"]].reset_index(drop=True)],axis=1)
test_x_mod = pd.concat([test_bert,test_x],axis=1)

In [118]:
train_x_mod = pd.concat([train_bert,train[["judgement"]].reset_index(drop=True)],axis=1)
test_x_mod = test_bert.copy()

In [119]:
train_x_mod

Unnamed: 0,exp20h,exp22h,judgement
0,0.005017,0.004337,0
1,0.000276,0.001180,0
2,0.001697,0.009072,0
3,0.000016,0.000102,0
4,0.002729,0.073801,0
...,...,...,...
26916,0.474465,0.689895,0
26917,0.000541,0.000801,0
26918,0.000483,0.000329,0
26919,0.000833,0.003440,0


In [120]:
test_x_mod

Unnamed: 0,exp20h,exp22h
0,0.000159,0.000228
1,0.001748,0.004413
2,0.001512,0.001216
3,0.001418,0.001583
4,0.001102,0.000888
...,...,...
40829,0.000968,0.001188
40830,0.000203,0.000302
40831,0.003782,0.003455
40832,0.043563,0.044998


## モデル作成　(Light GBM)

In [121]:
@contextmanager
def timer(logger=None, format_str='{:.3f}[s]', prefix=None, suffix=None):
    if prefix: format_str = str(prefix) + format_str
    if suffix: format_str = format_str + str(suffix)
    start = time()
    yield
    d = time() - start
    out_str = format_str.format(d)
    if logger:
        logger.info(out_str)
    else:
        print(out_str) 

In [122]:
def opt_fbeta_threshold(y_true, y_pred):
    """fbeta score計算時のthresholdを最適化"""
    def opt_(x): 
        return -fbeta_score(y_true, y_pred >= x, beta=7)
    result = minimize_scalar(opt_, bounds=(0, 1), method='bounded') 
    best_threshold = result['x'].item()
    return best_threshold

def metrics(y_true, y_pred):
    """fbeta(beta=7)の閾値最適化評価関数"""
    bt = opt_fbeta_threshold(y_true, y_pred)
    print(f"bt:{bt}")
    score = fbeta_score(y_true, y_pred >= bt, beta=7)
    return score

In [123]:
def fit_lgbm(X, y, cv, params: dict=None, verbose: int=50):
    metric_func = fbeta_score
    if params is None:
        params = {}

    models = []
    # training data の target と同じだけのゼロ配列を用意
    # float にしないと悲しい事件が起こるのでそこだけ注意
    oof_pred = np.zeros_like(y, dtype=np.float)

    for i, (idx_train, idx_valid) in enumerate(cv): 
        # この部分が交差検証のところです。データセットを cv instance によって分割します
        # training data を trian/valid に分割
        x_train, y_train = X[idx_train], y[idx_train]
        x_valid, y_valid = X[idx_valid], y[idx_valid]

        clf = lgb.LGBMModel(**params)

        with timer(prefix='fit fold={} '.format(i + 1)):
            clf.fit(x_train, y_train, 
                    eval_set=[(x_valid, y_valid)],  
                    early_stopping_rounds=verbose,
                    verbose=verbose)
        
        y_true = y_valid
        y_pred = clf.predict(x_valid)

        # beta_score用のborder算出

        best_threshold = opt_fbeta_threshold(y_true, y_pred)
        print(f"Best_BetaScore_Border： {best_threshold}")
        
        
        pred = np.where(y_pred < best_threshold, 0, 1)
        print(f'Fold {i} Beta Score: {metric_func(y_valid, pred, beta=7.0) :.4f}')

        oof_pred[idx_valid] = pred
        models.append(clf)

    score = metric_func(y, oof_pred, beta=7.0) 
    print('FINISHED | Beta Score: {:.4f}'.format(score))
    return oof_pred, models

In [125]:
def fit_lgbm(X, y, cv, params: dict=None, verbose: int=50):
    metric_func = fbeta_score
    if params is None:
        params = {}

    models = []
    # training data の target と同じだけのゼロ配列を用意
    # float にしないと悲しい事件が起こるのでそこだけ注意
    oof_pred = np.zeros_like(y, dtype=np.float)

    for i, (idx_train, idx_valid) in enumerate(cv): 
        # この部分が交差検証のところです。データセットを cv instance によって分割します
        # training data を trian/valid に分割
        x_train, y_train = X[idx_train], y[idx_train]
        x_valid, y_valid = X[idx_valid], y[idx_valid]

        clf = lgb.LGBMClassifier(**params)

        # beta_score用のborder算出
        border = y_valid.sum() / len(x_valid)

        print(f"Best_BetaScore_Border： {border}")

        with timer(prefix='fit fold={} '.format(i + 1)):
            clf.fit(x_train, y_train, 
                    eval_set=[(x_valid, y_valid)],  
                    early_stopping_rounds=verbose,
                    verbose=verbose)
        
        # border で1, 0を分離するので、確率で算出する

        pred_i = clf.predict_proba(x_valid)[:, 1]

        pred_i = np.where(pred_i < border, 0, 1)
        print(f'Fold {i} Beta Score: {metric_func(y_valid, pred_i, beta=7.0) :.4f}')

        oof_pred[idx_valid] = pred_i
        models.append(clf)

    score = metric_func(y, oof_pred, beta=7.0) 
    print('FINISHED | Beta Score: {:.4f}'.format(score))
    return oof_pred, models

In [126]:
params = {
    'learning_rate': 0.01,
    'n_estimators': 10000,
    'objective':"binary",
    'class_weight':"balanced",
    'max_depth': 5,
    "early_stopping_rounds": 100,
    "random_state":71
}

fold = StratifiedKFold(n_splits=CONFIG.fold, shuffle=True, random_state=CONFIG.seed)
cv = list(fold.split(train_x_mod, train_y))

train_x_input = train_x_mod.iloc[:,:-1]

oof, models = fit_lgbm(train_x_input.values, train_y, cv, params=params)

Best_BetaScore_Border： 0.02321262766945218
Training until validation scores don't improve for 100 rounds.
[50]	valid_0's binary_logloss: 0.684192
[100]	valid_0's binary_logloss: 0.677373
[150]	valid_0's binary_logloss: 0.670332
[200]	valid_0's binary_logloss: 0.665099



Found `early_stopping_rounds` in params. Will use it instead of argument



[250]	valid_0's binary_logloss: 0.660218
[300]	valid_0's binary_logloss: 0.655632
[350]	valid_0's binary_logloss: 0.649965
[400]	valid_0's binary_logloss: 0.64492
[450]	valid_0's binary_logloss: 0.638642
[500]	valid_0's binary_logloss: 0.632196
[550]	valid_0's binary_logloss: 0.626257
[600]	valid_0's binary_logloss: 0.621832
[650]	valid_0's binary_logloss: 0.616977
[700]	valid_0's binary_logloss: 0.61217
[750]	valid_0's binary_logloss: 0.608029
[800]	valid_0's binary_logloss: 0.604168
[850]	valid_0's binary_logloss: 0.600827
[900]	valid_0's binary_logloss: 0.596035
[950]	valid_0's binary_logloss: 0.591831
[1000]	valid_0's binary_logloss: 0.588331
[1050]	valid_0's binary_logloss: 0.585046
[1100]	valid_0's binary_logloss: 0.580605
[1150]	valid_0's binary_logloss: 0.576404
[1200]	valid_0's binary_logloss: 0.573227
[1250]	valid_0's binary_logloss: 0.569447
[1300]	valid_0's binary_logloss: 0.566395
[1350]	valid_0's binary_logloss: 0.562915
[1400]	valid_0's binary_logloss: 0.55931
[1450]	val


Found `early_stopping_rounds` in params. Will use it instead of argument



[250]	valid_0's binary_logloss: 0.650681
[300]	valid_0's binary_logloss: 0.644118
[350]	valid_0's binary_logloss: 0.638532
[400]	valid_0's binary_logloss: 0.63351
[450]	valid_0's binary_logloss: 0.629285
[500]	valid_0's binary_logloss: 0.624135
[550]	valid_0's binary_logloss: 0.619519
[600]	valid_0's binary_logloss: 0.615279
[650]	valid_0's binary_logloss: 0.611268
[700]	valid_0's binary_logloss: 0.607417
[750]	valid_0's binary_logloss: 0.603543
[800]	valid_0's binary_logloss: 0.600356
[850]	valid_0's binary_logloss: 0.596683
[900]	valid_0's binary_logloss: 0.593279
[950]	valid_0's binary_logloss: 0.58978
[1000]	valid_0's binary_logloss: 0.585024
[1050]	valid_0's binary_logloss: 0.581493
[1100]	valid_0's binary_logloss: 0.577013
[1150]	valid_0's binary_logloss: 0.572216
[1200]	valid_0's binary_logloss: 0.567815
[1250]	valid_0's binary_logloss: 0.56292
[1300]	valid_0's binary_logloss: 0.558075
[1350]	valid_0's binary_logloss: 0.553589
[1400]	valid_0's binary_logloss: 0.549137
[1450]	val


Found `early_stopping_rounds` in params. Will use it instead of argument



[250]	valid_0's binary_logloss: 0.654336
[300]	valid_0's binary_logloss: 0.649849
[350]	valid_0's binary_logloss: 0.644666
[400]	valid_0's binary_logloss: 0.63932
[450]	valid_0's binary_logloss: 0.63339
[500]	valid_0's binary_logloss: 0.628857
[550]	valid_0's binary_logloss: 0.624308
[600]	valid_0's binary_logloss: 0.619956
[650]	valid_0's binary_logloss: 0.615661
[700]	valid_0's binary_logloss: 0.611372
[750]	valid_0's binary_logloss: 0.607438
[800]	valid_0's binary_logloss: 0.604079
[850]	valid_0's binary_logloss: 0.600517
[900]	valid_0's binary_logloss: 0.596964
[950]	valid_0's binary_logloss: 0.592981
[1000]	valid_0's binary_logloss: 0.5899
[1050]	valid_0's binary_logloss: 0.586296
[1100]	valid_0's binary_logloss: 0.582836
[1150]	valid_0's binary_logloss: 0.578047
[1200]	valid_0's binary_logloss: 0.573207
[1250]	valid_0's binary_logloss: 0.568416
[1300]	valid_0's binary_logloss: 0.565164
[1350]	valid_0's binary_logloss: 0.561834
[1400]	valid_0's binary_logloss: 0.557863
[1450]	vali


Found `early_stopping_rounds` in params. Will use it instead of argument



[250]	valid_0's binary_logloss: 0.64924
[300]	valid_0's binary_logloss: 0.641741
[350]	valid_0's binary_logloss: 0.634476
[400]	valid_0's binary_logloss: 0.62728
[450]	valid_0's binary_logloss: 0.620818
[500]	valid_0's binary_logloss: 0.615006
[550]	valid_0's binary_logloss: 0.60979
[600]	valid_0's binary_logloss: 0.604496
[650]	valid_0's binary_logloss: 0.599332
[700]	valid_0's binary_logloss: 0.594125
[750]	valid_0's binary_logloss: 0.589166
[800]	valid_0's binary_logloss: 0.5847
[850]	valid_0's binary_logloss: 0.580109
[900]	valid_0's binary_logloss: 0.575152
[950]	valid_0's binary_logloss: 0.570115
[1000]	valid_0's binary_logloss: 0.566055
[1050]	valid_0's binary_logloss: 0.562414
[1100]	valid_0's binary_logloss: 0.558491
[1150]	valid_0's binary_logloss: 0.554964
[1200]	valid_0's binary_logloss: 0.55138
[1250]	valid_0's binary_logloss: 0.548251
[1300]	valid_0's binary_logloss: 0.544335
[1350]	valid_0's binary_logloss: 0.540771
[1400]	valid_0's binary_logloss: 0.536999
[1450]	valid_

KeyboardInterrupt: ignored