In [1]:
import pandas as pd
import numpy as np
import json
import os
import random
import string
import re

from pathlib import Path
from tqdm import tqdm

import gensim
import lightgbm as lgb
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error,mean_squared_log_error
from sklearn.preprocessing import LabelEncoder
from nltk.corpus import stopwords
from bs4 import BeautifulSoup

In [2]:
from google.cloud import storage as gcs
import io
from io import BytesIO
import glob

In [3]:
project_name = "hogehoge_project" #プロジェクト名
bucket_name = "hogehoge_bucket" #データを格納しているバケット名
folder_path = "hogehoge_folder/" #データのあるフォルダパス

In [4]:
client = gcs.Client(project_name)
bucket = client.get_bucket(bucket_name)

In [5]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/jupyter/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [6]:
# StopWord の再定義
stop = set(stopwords.words("english"))

# 句読点の追加。string.punctuation = ['!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~']
punctuation = list(string.punctuation)

# 手動で追加
org_stop = ["Subject"]

# stopwordsの定義更新
add_stop = punctuation + org_stop
stop.update(add_stop)

In [7]:
# htmlの分割
def strip_html(text):
    soup = BeautifulSoup(text, "html.parser")
    return soup.get_text()

# []で囲まれた文章の削除（脚注、linkなど）
def remove_between_square_brackets(text):
    return re.sub('\[[^]]*\]', '', text)
# URLの削除
def remove_URL(text):
    return re.sub(r'http\S+', '', text)

# stopwordsの削除
def remove_stopwords(text):
    final_text = []
    for i in text.split():
        if i.strip().lower() not in stop:
            if i.strip().isalpha():
                final_text.append(i.strip())
    return " ".join(final_text)

# 上記の関数をまとめて適用する関数を定義
def denoise_text(text):
    text = strip_html(text)
    text = remove_between_square_brackets(text)
    text = remove_URL(text)
    text = remove_stopwords(text)
    return text

In [8]:
# バージョン情報から論文の更新情報を抽出する関数を定義
def preprocess(input_df):
    output_df = input_df.copy()

    output_df["first_created_unixtime"] = pd.to_datetime(input_df.versions.apply(lambda p: p[0]["created"])).astype(int) / 1e9
    output_df["last_created_unixtime"] = pd.to_datetime(input_df.versions.apply(lambda p: p[-1]["created"])).astype(int) / 1e9
    output_df["diff_created_unixtime"] = output_df["last_created_unixtime"] - output_df["first_created_unixtime"]
    output_df["num_created"] = input_df.versions.apply(lambda p: len(p))

    return output_df

In [9]:
# 言語データをベクトル化するためのインスタンスを作成
train_path = folder_path + "GoogleNews-vectors-negative300.bin"
blob = gcs.Blob(train_path, bucket)
content = blob.download_as_string()
emb_model = gensim.models.KeyedVectors.load_word2vec_format(BytesIO(content), binary=True)

In [10]:
# テキストデータをベクトル化する関数を定義　※平均
def get_text_emb(emb_model, text):
    tokens = text.split(" ")
    embs = []
    for token in tokens:
        emb = None
        try:
            emb = emb_model.get_vector(token)
        except KeyError:
            # 小文字化した単語はvocabularyに含まれているかもしれない
            try:
                emb = emb_model.get_vector(token.lower())
            except KeyError:
                emb = np.zeros(300)
        if emb is None:
            raise RuntimeError("emb is none")
        embs.append(emb)
    mean_embs = np.mean(embs, axis=0)
    return mean_embs

In [11]:
# テキストデータをベクトル化する関数を定義　※最大
def get_text_emb_max(emb_model, text):
    tokens = text.split(" ")
    embs = []
    for token in tokens:
        emb = None
        try:
            emb = emb_model.get_vector(token)
        except KeyError:
            # 小文字化した単語はvocabularyに含まれているかもしれない
            try:
                emb = emb_model.get_vector(token.lower())
            except KeyError:
                emb = np.zeros(300)
        if emb is None:
            raise RuntimeError("emb is none")
        embs.append(emb)
    mean_embs = np.max(embs, axis=0)
    return mean_embs

In [12]:
# 各種の定数を定義
NFOLDS = 5
SEED = 42

def set_seed(seed=42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)

set_seed(SEED)

In [13]:
# jsonファイルの読み込み
def get_json_from_gcp(file_name):
    train_path = folder_path + file_name
    blob = gcs.Blob(train_path, bucket)
    content = blob.download_as_string()
    df = pd.read_json(BytesIO(content), lines=True)
    return df

## start point

In [14]:
train_df = get_json_from_gcp("train_data.json")

In [15]:
test_df = get_json_from_gcp("test_data.json")

In [16]:
# 訓練データで「cites」が欠損しているデータを削除
train_df = train_df.dropna(subset=['cites'])

In [16]:
#　categoriesラベルエンコーディング
t_all_df = pd.concat([train_df, test_df], sort=False).reset_index(drop=True)

In [17]:
t_all_df["categories"].fillna("missing", inplace=True)

In [18]:
le = LabelEncoder()
le = le.fit(t_all_df["categories"])
t_all_df["categories"] = le.transform(t_all_df["categories"])

In [19]:
#  doiをリスト抽出しラベルエンコーディング
t_l_jnl = t_all_df["doi"].values.tolist()

tmp = []
for i in t_l_jnl:
    x = i.find("/")
    tmp.append(i[0:x])

t_doi_df = pd.Series(tmp)
t_all_df["s_doi"] = t_doi_df

In [20]:
le = LabelEncoder()
le = le.fit(t_all_df["s_doi"])
t_all_df["s_doi"] = le.transform(t_all_df["s_doi"])

In [21]:
train_df = t_all_df[~t_all_df["cites"].isnull()].reset_index(drop=True)
test_df = t_all_df[t_all_df["cites"].isnull()].reset_index(drop=True)

In [22]:
# 訓練データとテストデータから特長量を作成し格納するリストを作成
train = []
train_2 = []
train_3 = []
train_4 = []
train_5 = []
train_6 = []
train_7 = []
train_8 = []
train_feat = []
target = []

test = []
test_2 = []
test_3 = []
test_4 = []
test_5 = []
test_6 = []
test_7 = []
test_8 = []
test_feat = []
test_index = []

In [24]:
# 訓練データの「abstract」をベクトル化　※平均
for abstract in train_df["abstract"]:
    abstract = denoise_text(abstract)
    mean_embs = get_text_emb(emb_model, abstract)
    train.append(mean_embs)

In [25]:
# 訓練データの「abstract」をベクトル化　※最大
for abstract in train_df["abstract"]:
    abstract = denoise_text(abstract)
    mean_embs = get_text_emb_max(emb_model, abstract)
    train_8.append(mean_embs)

In [26]:
# 訓練データの「doi_cites」の対数を取る
for doi_cites in train_df["doi_cites"]:
    log_doi_cites = [np.log1p(int(doi_cites))]
    train_feat.append(log_doi_cites)

In [27]:
# 訓練データの目的変数「cites」を格納
for cites in train_df["cites"]:
    target.append(cites)

In [28]:
# 訓練データの「title」をベクトル化 ※平均
for title in train_df["title"]:
    title = denoise_text(title)
    mean_embs = get_text_emb(emb_model, title)
    train_2.append(mean_embs)

In [29]:
# テストデータの「abstract」をベクトル化 ※平均
for abstract in test_df["abstract"]:
    abstract = denoise_text(abstract)
    mean_embs = get_text_emb(emb_model, abstract)
    test.append(mean_embs)

In [30]:
# テストデータの「abstract」をベクトル化 ※最大
for abstract in test_df["abstract"]:
    abstract = denoise_text(abstract)
    mean_embs = get_text_emb_max(emb_model, abstract)
    test_8.append(mean_embs)

In [31]:
# テストデータの「doi_cites」の対数を取る
for doi_cites in test_df["doi_cites"]:
    log_doi_cites = [np.log1p(int(doi_cites))]
    test_feat.append(log_doi_cites)

In [32]:
# テストデータの「id」を格納
for id in test_df["id"]:
    test_index.append(id)

In [33]:
# テストデータの「title」をベクトル化 ※平均
for title in test_df["title"]:
    title = denoise_text(title)
    mean_embs = get_text_emb(emb_model, title)
    test_2.append(mean_embs)

In [34]:
# 訓練データの「cat」を格納
for cat in train_df["categories"]:
    train_3.append(cat)

tmp = np.array(train_3)
train_3 = tmp.reshape(-1, 1)

In [35]:
# テストデータの「cat」を格納
for cat in test_df["categories"]:
    test_3.append(cat)

tmp = np.array(test_3)
test_3 = tmp.reshape(-1, 1)

In [36]:
# 訓練データの「s_doi」を格納
for s_doi in train_df["s_doi"]:
    train_4.append(s_doi)

tmp = np.array(train_4)
train_4 = tmp.reshape(-1, 1)

In [37]:
# テストデータの「s_doi」を格納
for s_doi in test_df["s_doi"]:
    test_4.append(s_doi)

tmp = np.array(test_4)
test_4 = tmp.reshape(-1, 1)

In [38]:
#　訓練データの更新情報を取得して、特長量として格納
train_pre_df = preprocess(train_df)
train_pre_tmp_df = train_pre_df.loc[:, ["first_created_unixtime", "last_created_unixtime", "diff_created_unixtime", "num_created"]]
train_5 = train_pre_tmp_df.values.tolist()

In [39]:
#　テストデータの更新情報を取得して、特長量として格納
test_pre_df = preprocess(test_df)
test_pre_tmp_df = test_pre_df.loc[:, ["first_created_unixtime", "last_created_unixtime", "diff_created_unixtime", "num_created"]]
test_5 = test_pre_tmp_df.values.tolist()

In [40]:
#  訓練データのコメントからページ数情報を抽出し格納
l = train_pre_df["comments"].values.tolist()
tmp_com = []
for i in l:
    if "pages" in str(i):
        pos = i.find("pages")
        txt = i[:pos]
        txt = re.sub(r"\D", "", txt)
        tmp_com.append(txt)
    else:
        tmp_com.append(0)

In [41]:
numlist = []
for txt in tmp_com:
    try:
        num = int(txt)
        numlist.append(num)
    except ValueError:
        numlist.append(0)

In [42]:
numlist2 = []
for num in numlist:
    if num >= 1000:
        numlist2.append(0)
    else:
        numlist2.append(num)

In [43]:
tmp = np.array(numlist2)
train_6 = tmp.reshape(-1, 1)

In [44]:
#  テストデータのコメントからページ数情報を抽出し格納
l = test_df["comments"].values.tolist()
tmp_com = []
for i in l:
    if "pages" in str(i):
        pos = i.find("pages")
        txt = i[:pos]
        txt = re.sub(r"\D", "", txt)
        tmp_com.append(txt)
    else:
        tmp_com.append(0)

In [45]:
numlist = []
for txt in tmp_com:
    try:
        num = int(txt)
        numlist.append(num)
    except ValueError:
        numlist.append(0)

In [46]:
numlist2 = []
for num in numlist:
    if num >= 1000:
        numlist2.append(0)
    else:
        numlist2.append(num)

In [47]:
tmp = np.array(numlist2)
test_6 = tmp.reshape(-1, 1)

In [48]:
#  訓練データのコメントから図表情報を抽出し格納
m = train_pre_df["comments"].values.tolist()
tmp_com = []
for i in m:
    if "figures" in str(i):
        pos = i.find("figures")
        txt = i[:pos]
        txt = re.sub(r"\D", "", txt)
        tmp_com.append(txt)
    else:
        tmp_com.append(0)

In [49]:
numlist = []
for txt in tmp_com:
    try:
        num = int(txt)
        numlist.append(num)
    except ValueError:
        numlist.append(0)

In [50]:
numlist2 = []
for num in numlist:
    if num >= 10000:
        numlist2.append(0)
    else:
        numlist2.append(num)

In [51]:
tmp = np.array(numlist2)
train_7 = tmp.reshape(-1, 1)

In [52]:
#  テストデータのコメントから図表情報を抽出し格納
m = test_df["comments"].values.tolist()
tmp_com = []
for i in m:
    if "figures" in str(i):
        pos = i.find("figures")
        txt = i[:pos]
        txt = re.sub(r"\D", "", txt)
        tmp_com.append(txt)
    else:
        tmp_com.append(0)

In [53]:
numlist = []
for txt in tmp_com:
    try:
        num = int(txt)
        numlist.append(num)
    except ValueError:
        numlist.append(0)

In [54]:
numlist2 = []
for num in numlist:
    if num >= 10000:
        numlist2.append(0)
    else:
        numlist2.append(num)

In [55]:
tmp = np.array(numlist2)
test_7 = tmp.reshape(-1, 1)

In [56]:
train = np.concatenate([np.array(train), np.array(train_2), np.array(train_3), np.array(train_4), np.array(train_5), np.array(train_6), np.array(train_7), np.array(train_8), np.array(train_feat)], axis=1)
target = np.array(np.log1p(target))
test = np.concatenate([np.array(test), np.array(test_2), np.array(test_3), np.array(test_4), np.array(test_5), np.array(test_6), np.array(test_7), np.array(test_8), np.array(test_feat)], axis=1)

In [57]:
# 各データのサイズの確認
print(train.shape)
print(target.shape)
print(test.shape)

(15117, 909)
(15117,)
(59084, 909)


(15117, 610)
(15117,)
(59084, 610)

In [60]:
#####################################################3
### LGBで学習、予測する関数の定義
########################################################
def Train_and_Pred(train,target,test):
    # --------------------------------------
    # パラメータ定義
    # --------------------------------------
    lgb_params = {
                    'objective': 'root_mean_squared_error',
                    'boosting_type': 'gbdt',
                    'n_estimators': 50000,
                    'colsample_bytree': 0.5,
                    'subsample': 0.5,
                    'subsample_freq': 3,
                    'reg_alpha': 8,
                    'reg_lambda': 2,
                    'random_state': SEED,
                    "bagging_fraction": 0.8402379446262978,
                    "bagging_freq": 4,
                    "feature_fraction": 0.74623605968501,
                    "lambda_l1": 0.01113869595673112,
                    "lambda_l2": 8.706009358617911e-07,
                    "learning_rate": 0.012307412937706345,
                    "min_child_samples": 18,
                    "num_leaves": 8,        
                  }

    # --------------------------------------
    # 学習と予測
    # --------------------------------------
    kf = KFold(n_splits=NFOLDS, shuffle=True, random_state=SEED)
    lgb_oof = np.zeros(train.shape[0])
    lgb_pred = 0

    for fold, (trn_idx, val_idx) in enumerate(kf.split(X=train)):
        X_train, y_train = train[trn_idx], target[trn_idx]
        X_valid, y_valid = train[val_idx], target[val_idx]
        X_test = test

        # LightGBM
        model = lgb.LGBMRegressor(**lgb_params)
        model.fit(X_train, y_train,
                  eval_set=(X_valid, y_valid),
                  eval_metric='rmse',
                  verbose=False,
                  early_stopping_rounds=500
                  )

        lgb_oof[val_idx] = model.predict(X_valid)
        lgb_pred += model.predict(X_test) / NFOLDS
        rmsle = mean_squared_error(y_valid, lgb_oof[val_idx], squared=False)
        print(f"fold {fold} lgb score: {rmsle}")

    rmsle = mean_squared_error(target, lgb_oof, squared=False)
    print("+-" * 40)
    print(f"score: {rmsle}")
    print(f"model score: {model.score(train, target)}")
    
    # ------------------------------------------------------------------------------
    # 提出ファイルの作成
    # ------------------------------------------------------------------------------
    test_predicted = np.expm1(lgb_pred)

    submit_df = pd.DataFrame({'id': test_index})
    submit_df['cites'] = np.where(test_predicted < 0, 0, test_predicted)
    submit_df.to_csv("sub27.csv", index=False)
    blob = bucket.blob("sub27.csv")
    blob.upload_from_filename(filename="sub27.csv")
    return rmsle

In [61]:
#学習と予測の実行
Train_and_Pred(train, target, test)

fold 0 lgb score: 0.5239869189669257
fold 1 lgb score: 0.5013367799068057
fold 2 lgb score: 0.5218132120825543
fold 3 lgb score: 0.4910580967490293
fold 4 lgb score: 0.501724314684782
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
score: 0.508145326437862
model score: 0.8809856583254436


0.508145326437862

In [58]:
from catboost import CatBoost, CatBoostRegressor, CatBoostClassifier
from catboost import Pool

In [59]:
#####################################################3
### catboostで学習、予測する関数の定義
########################################################
def Train_and_Pred_cat(train,target,test):
    # --------------------------------------
    # パラメータ定義
    # --------------------------------------
    cat_params = {
                    'iterations': 5000,
                    }

    # --------------------------------------
    # 学習と予測
    # --------------------------------------
    kf = KFold(n_splits=NFOLDS, shuffle=True, random_state=SEED)
    cat_oof = np.zeros(train.shape[0])
    cat_pred = 0

    for fold, (trn_idx, val_idx) in enumerate(kf.split(X=train)):
        X_train, y_train = train[trn_idx], target[trn_idx]
        X_valid, y_valid = train[val_idx], target[val_idx]
        X_test = test

        # catboost
        model = CatBoostRegressor(**cat_params)
        model.fit(X_train, y_train,
                  eval_set=(X_valid, y_valid),
                  verbose=False,
                  early_stopping_rounds=500
                  )

        cat_oof[val_idx] = model.predict(X_valid)
        cat_pred += model.predict(X_test) / NFOLDS
        rmsle = mean_squared_error(y_valid, cat_oof[val_idx], squared=False)
        print(f"fold {fold} cat score: {rmsle}")

    rmsle = mean_squared_error(target, cat_oof, squared=False)
    print("+-" * 40)
    print(f"score: {rmsle}")
    print(f"model score: {model.score(train, target)}")
    
    # ------------------------------------------------------------------------------
    # 提出ファイルの作成
    # ------------------------------------------------------------------------------
    test_predicted = np.expm1(cat_pred)

    submit_df = pd.DataFrame({'id': test_index})
    submit_df['cites'] = np.where(test_predicted < 0, 0, test_predicted)
    submit_df.to_csv("sub29.csv", index=False)
    blob = bucket.blob("sub29.csv")
    blob.upload_from_filename(filename="sub29.csv")
    return rmsle

In [60]:
#学習と予測の実行 cat
Train_and_Pred_cat(train, target, test)

fold 0 cat score: 0.525006069820722
fold 1 cat score: 0.5004416056724608
fold 2 cat score: 0.520675864495167
fold 3 cat score: 0.48808685147541087
fold 4 cat score: 0.498801607142225
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-
score: 0.5067962994647837
model score: 0.9044651176085674


0.5067962994647837

## score memo
score: 0.5091439804187227
model score: 0.8776748976366855

score: 0.5349412183809286
model score: 0.8561340308642265

score: 0.510865780648061 -> PB:0.512208
model score: 0.8613939830797712

score: 0.5083981751554011
model score: 0.8801454028408289

#catboost params_non iter:5000
score: 0.5089691056151895
model score: 0.9088950014953863

#catboost params_mix
score: 0.5141818558985992
model score: 0.8414627399770431
score: 0.5104255055887893
model score: 0.8661647531047596

score: 0.5134752086890247
model score: 0.8708150081478201

#catboost params_non iter:5000 +figures info 1000>cut
score: 0.508860847861468
model score: 0.9071390413198503

#catboost params_non iter:5000 +figures info 10000>cut
score: 0.5084451492511196
model score: 0.8748347769753742

#LGBM ↑
score: 0.5084060348263081
model score: 0.873635256762187

#LGBM +auth
score: 0.508145326437862
model score: 0.8809856583254436

#cat +auth
score: 0.5082768701494705
model score: 0.8713432729258084

#catboost params_non iter:5000 +figures info 10000>cut +abstract max
score: 0.5067962994647837
model score: 0.9044651176085674

## パラメーターの最適化

In [49]:
import optuna
from sklearn.model_selection import train_test_split

In [50]:
X_train, X_valid, y_train, y_valid = train_test_split(train, target, test_size=0.2, random_state=1234, shuffle=False, stratify=None)

In [146]:
#####################################################3
### LGBのパラメーター最適化
########################################################
def objective(trial):
    # --------------------------------------
    # パラメータ定義
    # --------------------------------------
    lgb_params = {
                    'objective': 'root_mean_squared_error',
                    'boosting_type': 'gbdt',
                    'n_estimators': 1000,
                    'colsample_bytree': 0.5,
                    'subsample': 0.5,
                    'subsample_freq': 3,
                    'reg_alpha': 8,
                    'reg_lambda': 2,
                    'random_state': SEED,
                    "bagging_fraction": trial.suggest_uniform("bagging_fraction",0.4,0.9),
                    "bagging_freq": trial.suggest_int("bagging_freq",1,10),
                    "feature_fraction": trial.suggest_uniform("feature_fraction",0.4,0.9),
                    "lambda_l1": 0.01113869595673112,
                    "lambda_l2": 8.706009358617911e-07,
                    "learning_rate": 0.012307412937706345,
                    "min_child_samples": 18,
                    "num_leaves": 8,        
                  }

    # --------------------------------------
    # モデル構築
    # --------------------------------------
    
    lgb_train = lgb.Dataset(X_train, y_train)
    lgb_eval = lgb.Dataset(X_valid, y_valid, reference=lgb_train)
    
    model_lgb = lgb.train(lgb_params, lgb_train,
                         valid_sets=lgb_eval,
                         num_boost_round=100,
                         early_stopping_rounds=20,
                         verbose_eval=10,)
    y_pred = model_lgb.predict(X_valid, num_iteration=model_lgb.best_iteration)
    score = np.sqrt(mean_squared_error(y_valid, y_pred))
    
    return score

In [51]:
#####################################################3
### catboostのパラメーター最適化
########################################################
def objective_cat(trial):
    # --------------------------------------
    # パラメータ定義
    # --------------------------------------
    cat_params = {
                    'iterations' : 5000,                         
                    'depth' : trial.suggest_int('depth', 4, 10),                                       
                    'learning_rate' : trial.suggest_loguniform('learning_rate', 0.01, 0.3),               
                    'random_strength' :trial.suggest_int('random_strength', 0, 100),                       
                    'bagging_temperature' :trial.suggest_loguniform('bagging_temperature', 0.01, 100.00), 
                    'od_type': trial.suggest_categorical('od_type', ['IncToDec', 'Iter']),
                    'od_wait' :trial.suggest_int('od_wait', 10, 50)       
                  }

    # --------------------------------------
    # モデル構築
    # --------------------------------------
    
    train_pool = Pool(X_train, y_train)
    test_pool = Pool(X_valid, y_valid)
    
    # 学習
    model = CatBoostRegressor(**cat_params)
    model.fit(train_pool)
    # 予測
    preds = model.predict(test_pool)
    pred_labels = np.rint(preds)
    y_pred = model.predict(X_valid)
    score = np.sqrt(mean_squared_error(y_valid, y_pred))
    
    return score

In [None]:
study = optuna.create_study(sampler=optuna.samplers.RandomSampler(seed=0))
study.optimize(objective, n_trials=50)
study.best_params

{'bagging_fraction': 0.8457772186128385,
 'bagging_freq': 1,
 'feature_fraction': 0.8232043362355639}
 
 {'bagging_fraction': 0.8402379446262978,
 'bagging_freq': 4,
 'feature_fraction': 0.74623605968501}

In [None]:
study = optuna.create_study(sampler=optuna.samplers.RandomSampler(seed=0))
study.optimize(objective_cat, n_trials=50)
study.best_params

{'iterations': 187,
 'depth': 5,
 'learning_rate': 0.10719804484767982,
 'random_strength': 3,
 'bagging_temperature': 0.4789624637920164,
 'od_type': 'IncToDec',
 'od_wait': 13}
 
 {'iterations': 746,
 'depth': 4,
 'learning_rate': 0.08182456890976228,
 'random_strength': 18,
 'bagging_temperature': 32.64440528937261,
 'od_type': 'Iter',
 'od_wait': 21}
 
 Trial 8 finished with value: 0.4876234943471408 and parameters: {'depth': 4, 'learning_rate': 0.010659941309425505, 'random_strength': 53, 'bagging_temperature': 0.03969167884674241, 'od_type': 'IncToDec', 'od_wait': 27}. Best is trial 8 with value: 0.4876234943471408.