<a href="https://colab.research.google.com/github/KoMurase/movie_research_2/blob/master/parameter_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#参考 : https://github.com/optuna/optuna/blob/master/examples/lightgbm_tuner_cv.py

https://optuna.readthedocs.io/en/stable/index.html

In [None]:
!cp -r '/content/drive/MyDrive/Movies/data/mojo_japan/' '/content/'

!cp -r '/content/drive/MyDrive/Movies/data/metadata_filmarks/' '/content/'

In [None]:
!pip install japanize_matplotlib
import pandas as pd
import codecs 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt 
import japanize_matplotlib

import warnings

import os
from tqdm import tqdm
import gc



In [None]:
import lightgbm as lgb
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import KFold

from sklearn.model_selection import train_test_split

#評価
from sklearn.metrics import r2_score
#r2_score(y_true, y_pred)

In [None]:
#import optuna
#from optuna.integration import lightgbm_tuner
!pip install optuna
import optuna.integration.lightgbm as lgb



In [None]:
path = '/content/drive/MyDrive/Movies/preprocessed/'

In [None]:
def read_data(path, encode_type):
  
  with codecs.open(path, "r", encode_type, "ignore") as file:
    output = pd.read_csv(file, delimiter=",")
      
  return output

In [None]:
train = read_data(path+'train_2016-2018.csv', encode_type='utf-8')
test = read_data(path+'test_2019.csv', encode_type='utf-8')

train = train.loc[train['Title'].drop_duplicates(keep='first').index]

In [None]:
train.shape

(512, 9017)

In [None]:
def preprocess_Theaters(input_df:pd.DataFrame)->pd.DataFrame:
  output_df = input_df.copy()
  #plt.figure(figsize=(25,10))
  #sns.countplot(input_df.loc[input_df['Theaters']!='-', 'Theaters'].astype(int).sort_values())
  #plt.xticks(rotation=90)
  #plt.show()
  #print('- of number : {}'.format(input_df[input_df['Theaters']=='-'].__len__()))
  mean_ = input_df.loc[input_df['Theaters']!='-','Theaters'].astype(int).mean()

  #print(f'mean of Theaters:{int(mean_)}')
  output_df['Theaters'] = output_df['Theaters'].replace('-', mean_).astype(int)

  return output_df

def lable_encoding(input_df:pd.DataFrame, cat_cols:list)->pd.DataFrame:
  output_df = input_df.copy()
  # カテゴリ変数は出現順にLabelEncoding
  for c in cat_cols:
    _dict = {j:i for i,j in enumerate(input_df[c].unique())}
    output_df[c] = input_df[c].map(_dict)
    
  return output_df

#上映館数の前処理
data = pd.concat([train, test], axis=0).reset_index(drop=True)
data = preprocess_Theaters(data)

cols=['Country', 'Opening_Day']
data = lable_encoding(data, cols)

train = data[0:len(train)]
test = data[len(train):]


print(train.shape, test.shape)

(512, 9017) (168, 9017)


In [None]:
#アルゴリズムの方で分岐しているから必要ないかもしれない特徴量

def make_describe_col(input_df:pd.DataFrame, col:str):
  output_df = input_df.copy() 
  p_25 = output_df[col].describe()['25%']
  p_50 = output_df[col].describe()['50%']
  p_75 = output_df[col].describe()['75%']

  # 25%未満
  output_df[f'{col}_[x<25%]'] = output_df[col].apply(
      lambda x: 1 if x<p_25 else 0 
  )
  # 25%以上 50%未満
  output_df[f'{col}_[25<=x<50%]'] = output_df[col].apply(
      lambda x: 1 if x>=p_25 and x<p_50 else 0 
  )
  # 50%以上 75%未満
  output_df[f'{col}_[50<=x<75%]'] = output_df[col].apply(
      lambda x: 1 if x>=p_50 and x<p_75 else 0 
  )
  # 75%以上
  output_df[f'{col}_[75%<x]'] = output_df[col].apply(
      lambda x: 1 if x>=p_75 else 0 
  )

  return output_df

train = make_describe_col(train, 'Theaters')
train = make_describe_col(train, 'Time')

test = make_describe_col(test, 'Theaters')
test = make_describe_col(test, 'Time')

#正規化

In [None]:
from sklearn import preprocessing

def normalization(df:pd.DataFrame, cols:list):
  output_df = df.copy()

  ss = preprocessing.StandardScaler()
  output_df[cols]= ss.fit_transform(output_df[cols])
  output_df[cols].add_suffix('_ss')

  return output_df 

In [None]:
use_columns = [
        'レビュー数',
        '平均スコア', 
        'Theaters' ]

train = normalization(train, use_columns)
test = normalization(test, use_columns)

In [None]:
#レビュー，平均スコアを含める場合
#drop_cols=['Title','Title(ja)','directors','writers','casts']

#----------------------------------------------------------------
#レビュー，平均スコアを含めない場合
drop_cols=['Title','Title(ja)','directors','writers','casts','レビュー数','平均スコア']

In [None]:
#Total Grossを予測する場合
train_features = train.drop(drop_cols+['Gross'], axis=1) #'レビュー数','平均スコア'], axis=1)

#test_features = test.drop(drop_cols+['Gross'], axis=1)     #'レビュー数','平均スコア'], axis=1)


In [None]:
X = train_features.drop('Total Gross', axis=1)
y = train_features['Total Gross']

#対数スケールに変換
y = np.log1p(y)

dtrain = lgb.Dataset(X, y)

#train_X, valid_X, train_y, valid_y = train_test_split(X,y, test_size=0.2, random_state=42)
#dtrain = lgb.Dataset(train_X, train_y)
#dvalid = lgb.Dataset(valid_X, valid_y)

In [None]:
params = {
    'objective' : 'regression',
    'metric' : 'rmse',
    'verbosity':-1,
    "boosting_type": "gbdt",
}

In [None]:
tuner = lgb.LightGBMTunerCV(
        params, dtrain, 
        verbose_eval=1000, 
        early_stopping_rounds=100, 
        folds=KFold(n_splits=5),
    )

tuner.run()

[32m[I 2021-01-31 13:10:24,111][0m A new study created in memory with name: no-name-2dd8bd97-0dfe-4eb0-b9a7-e991cb54546a[0m
feature_fraction, val_score: 1.641041:  14%|#4        | 1/7 [00:00<00:03,  1.85it/s][32m[I 2021-01-31 13:10:24,662][0m Trial 0 finished with value: 1.6410407647034575 and parameters: {'feature_fraction': 0.8999999999999999}. Best is trial 0 with value: 1.6410407647034575.[0m
feature_fraction, val_score: 1.641041:  29%|##8       | 2/7 [00:00<00:02,  1.99it/s][32m[I 2021-01-31 13:10:25,077][0m Trial 1 finished with value: 1.6462717536497002 and parameters: {'feature_fraction': 0.8}. Best is trial 0 with value: 1.6410407647034575.[0m
feature_fraction, val_score: 1.609695:  43%|####2     | 3/7 [00:01<00:01,  2.10it/s][32m[I 2021-01-31 13:10:25,491][0m Trial 2 finished with value: 1.6096949852383315 and parameters: {'feature_fraction': 0.4}. Best is trial 2 with value: 1.6096949852383315.[0m
feature_fraction, val_score: 1.609695:  57%|#####7    | 4/7 [00:01

In [None]:
print("Best score:", tuner.best_score)
best_params = tuner.best_params
print("Best params:", best_params)
print("  Params: ")

for key, value in best_params.items():
        print("    {}: {}".format(key, value))

Best score: 1.554649094469505
Best params: {'objective': 'regression', 'metric': 'rmse', 'verbosity': -1, 'boosting_type': 'gbdt', 'feature_pre_filter': False, 'lambda_l1': 0.0, 'lambda_l2': 0.0, 'num_leaves': 2, 'feature_fraction': 0.4, 'bagging_fraction': 0.7194396763571909, 'bagging_freq': 3, 'min_child_samples': 25}
  Params: 
    objective: regression
    metric: rmse
    verbosity: -1
    boosting_type: gbdt
    feature_pre_filter: False
    lambda_l1: 0.0
    lambda_l2: 0.0
    num_leaves: 2
    feature_fraction: 0.4
    bagging_fraction: 0.7194396763571909
    bagging_freq: 3
    min_child_samples: 25


In [None]:
best_params

{'bagging_fraction': 0.7194396763571909,
 'bagging_freq': 3,
 'boosting_type': 'gbdt',
 'feature_fraction': 0.4,
 'feature_pre_filter': False,
 'lambda_l1': 0.0,
 'lambda_l2': 0.0,
 'metric': 'rmse',
 'min_child_samples': 25,
 'num_leaves': 2,
 'objective': 'regression',
 'verbosity': -1}

#レビューデータを含める場合

In [None]:
review = pd.read_csv('/content/drive/MyDrive/Movies/preprocessed/keiyousi_2016-2019.csv')
review_ = review.drop_duplicates()
review_ = review_.drop(['Title(ja)','title_url'], axis=1)
train = train.merge(review_, how='left', on='Title')
#test = test.merge(review_, how='left', on='Title')

train_features = train.drop(drop_cols+['Gross'], axis=1)

X = train_features.drop('Total Gross', axis=1)
y = train_features['Total Gross']
#対数スケールに変換
y = np.log1p(y)

dtrain = lgb.Dataset(X, y)

In [None]:
params = {
    'objective' : 'regression',
    'metric' : 'rmse',
    'verbosity':-1,
    "boosting_type": "gbdt",
}

tuner = lgb.LightGBMTunerCV(
        params, dtrain, 
        verbose_eval=1000, 
        early_stopping_rounds=100, 
        folds=KFold(n_splits=5),
    )

tuner.run()

[32m[I 2021-01-31 13:10:47,433][0m A new study created in memory with name: no-name-37e6db71-9ae5-4d9c-926e-cf502cb12e83[0m
feature_fraction, val_score: 1.582472:  14%|#4        | 1/7 [00:01<00:09,  1.55s/it][32m[I 2021-01-31 13:10:49,002][0m Trial 0 finished with value: 1.5824717246710158 and parameters: {'feature_fraction': 0.7}. Best is trial 0 with value: 1.5824717246710158.[0m
feature_fraction, val_score: 1.527033:  29%|##8       | 2/7 [00:02<00:07,  1.44s/it][32m[I 2021-01-31 13:10:50,178][0m Trial 1 finished with value: 1.5270333600802566 and parameters: {'feature_fraction': 0.4}. Best is trial 1 with value: 1.5270333600802566.[0m
feature_fraction, val_score: 1.527033:  43%|####2     | 3/7 [00:04<00:06,  1.64s/it][32m[I 2021-01-31 13:10:52,273][0m Trial 2 finished with value: 1.637672995002987 and parameters: {'feature_fraction': 1.0}. Best is trial 1 with value: 1.5270333600802566.[0m
feature_fraction, val_score: 1.527033:  57%|#####7    | 4/7 [00:06<00:05,  1.74s/i

In [None]:
print("Best score:", tuner.best_score)
best_params_with_reviews = tuner.best_params
print("Best params:", best_params_with_reviews)
print("  Params: ")

for key, value in best_params_with_reviews.items():
        print("    {}: {}".format(key, value))

Best score: 1.507466770307179
Best params: {'objective': 'regression', 'metric': 'rmse', 'verbosity': -1, 'boosting_type': 'gbdt', 'feature_pre_filter': False, 'lambda_l1': 0.16188546288064465, 'lambda_l2': 5.755195400161392e-05, 'num_leaves': 31, 'feature_fraction': 0.4, 'bagging_fraction': 0.9451939836214276, 'bagging_freq': 7, 'min_child_samples': 20}
  Params: 
    objective: regression
    metric: rmse
    verbosity: -1
    boosting_type: gbdt
    feature_pre_filter: False
    lambda_l1: 0.16188546288064465
    lambda_l2: 5.755195400161392e-05
    num_leaves: 31
    feature_fraction: 0.4
    bagging_fraction: 0.9451939836214276
    bagging_freq: 7
    min_child_samples: 20


#タイトルデータを含める場合

In [None]:
train = read_data(path+'train_2016-2018_title.csv', encode_type='utf-8')
test = read_data(path+'test_2019_title.csv', encode_type='utf-8')
train = train.loc[train['Title'].drop_duplicates(keep='first').index]

#上映館数の前処理
data = pd.concat([train, test], axis=0).reset_index(drop=True)
data = preprocess_Theaters(data)
cols=['Country', 'Opening_Day']
data = lable_encoding(data, cols)
train = data[0:len(train)]
test = data[len(train):]
print(train.shape, test.shape)


train = make_describe_col(train, 'Theaters')
train = make_describe_col(train, 'Time')

use_columns = [
        'レビュー数',
        '平均スコア', 
        'Gross', 
        'Total Gross',
        'Theaters' ]

train = normalization(train, use_columns)
test = normalization(test, use_columns)


Columns (5628) have mixed types.Specify dtype option on import or set low_memory=False.



(514, 9063) (167, 9063)


In [None]:
#review = pd.read_csv('/content/drive/MyDrive/Movies/preprocessed/keiyousi_2016-2019.csv')
#review_ = review.drop_duplicates()
#review_ = review_.drop(['Title(ja)','title_url'], axis=1)
#train = train.merge(review_, how='left', on='Title')
#test = test.merge(review_, how='left', on='Title')

train_features = train.drop(drop_cols+['Gross'], axis=1)

X = train_features.drop('Total Gross', axis=1)
y = train_features['Total Gross']
#対数スケールに変換
y = np.log1p(y)

dtrain = lgb.Dataset(X, y)

#parameter setting----
params = {
    'objective' : 'regression',
    'metric' : 'rmse',
    'verbosity':-1,
    "boosting_type": "gbdt",
}

tuner = lgb.LightGBMTunerCV(
        params, dtrain, 
        verbose_eval=1000, 
        early_stopping_rounds=100, 
        folds=KFold(n_splits=5),
    )

tuner.run()

[32m[I 2021-01-31 13:12:17,851][0m A new study created in memory with name: no-name-9db1e6f7-b882-49fb-bde3-ac992a6a2b23[0m
feature_fraction, val_score: 0.568870:  14%|#4        | 1/7 [00:00<00:02,  2.52it/s][32m[I 2021-01-31 13:12:18,259][0m Trial 0 finished with value: 0.5688698392201681 and parameters: {'feature_fraction': 0.5}. Best is trial 0 with value: 0.5688698392201681.[0m
feature_fraction, val_score: 0.559198:  29%|##8       | 2/7 [00:00<00:01,  2.57it/s][32m[I 2021-01-31 13:12:18,630][0m Trial 1 finished with value: 0.5591983129917596 and parameters: {'feature_fraction': 0.4}. Best is trial 1 with value: 0.5591983129917596.[0m
feature_fraction, val_score: 0.559198:  43%|####2     | 3/7 [00:01<00:01,  2.51it/s][32m[I 2021-01-31 13:12:19,048][0m Trial 2 finished with value: 0.5760432373422791 and parameters: {'feature_fraction': 0.6}. Best is trial 1 with value: 0.5591983129917596.[0m
feature_fraction, val_score: 0.559198:  57%|#####7    | 4/7 [00:01<00:01,  2.39it

In [None]:
print("Best score:", tuner.best_score)
best_params_with_title = tuner.best_params
print("Best params:", best_params_with_title)
print("  Params: ")

for key, value in best_params_with_title.items():
        print("    {}: {}".format(key, value))

Best score: 0.5549455885410852
Best params: {'objective': 'regression', 'metric': 'rmse', 'verbosity': -1, 'boosting_type': 'gbdt', 'feature_pre_filter': False, 'lambda_l1': 0.0, 'lambda_l2': 0.0, 'num_leaves': 14, 'feature_fraction': 0.44800000000000006, 'bagging_fraction': 1.0, 'bagging_freq': 0, 'min_child_samples': 20}
  Params: 
    objective: regression
    metric: rmse
    verbosity: -1
    boosting_type: gbdt
    feature_pre_filter: False
    lambda_l1: 0.0
    lambda_l2: 0.0
    num_leaves: 14
    feature_fraction: 0.44800000000000006
    bagging_fraction: 1.0
    bagging_freq: 0
    min_child_samples: 20


In [None]:
review = pd.read_csv('/content/drive/MyDrive/Movies/preprocessed/keiyousi_2016-2019.csv')
review_ = review.drop_duplicates()
review_ = review_.drop(['Title(ja)','title_url'], axis=1)
train = train.merge(review_, how='left', on='Title')
test = test.merge(review_, how='left', on='Title')

train_features = train.drop(drop_cols+['Gross'], axis=1)

X = train_features.drop('Total Gross', axis=1)
y = train_features['Total Gross']
#対数スケールに変換
y = np.log1p(y)

dtrain = lgb.Dataset(X, y)

#parameter setting----
params = {
    'objective' : 'regression',
    'metric' : 'rmse',
    'verbosity':-1,
    "boosting_type": "gbdt",
}

tuner = lgb.LightGBMTunerCV(
        params, dtrain, 
        verbose_eval=1000, 
        early_stopping_rounds=100, 
        folds=KFold(n_splits=5),
    )

tuner.run()

[32m[I 2021-01-31 13:12:44,817][0m A new study created in memory with name: no-name-cdf8cf6b-9eda-435c-874b-79a337589c9d[0m
feature_fraction, val_score: 0.599547:  14%|#4        | 1/7 [00:03<00:22,  3.81s/it][32m[I 2021-01-31 13:12:48,639][0m Trial 0 finished with value: 0.5995474708743558 and parameters: {'feature_fraction': 1.0}. Best is trial 0 with value: 0.5995474708743558.[0m
feature_fraction, val_score: 0.599547:  29%|##8       | 2/7 [00:06<00:17,  3.42s/it][32m[I 2021-01-31 13:12:51,144][0m Trial 1 finished with value: 0.604843906716273 and parameters: {'feature_fraction': 0.7}. Best is trial 0 with value: 0.5995474708743558.[0m
feature_fraction, val_score: 0.593845:  43%|####2     | 3/7 [00:07<00:11,  2.81s/it][32m[I 2021-01-31 13:12:52,526][0m Trial 2 finished with value: 0.5938448456156009 and parameters: {'feature_fraction': 0.5}. Best is trial 2 with value: 0.5938448456156009.[0m
feature_fraction, val_score: 0.593845:  57%|#####7    | 4/7 [00:09<00:07,  2.51s/i

In [None]:
print("Best score:", tuner.best_score)
best_params_with_title_reviews = tuner.best_params
print("Best params:", best_params_with_title_reviews)
print("  Params: ")

for key, value in best_params_with_title_reviews.items():
        print("    {}: {}".format(key, value))

Best score: 0.5786677332502647
Best params: {'objective': 'regression', 'metric': 'rmse', 'verbosity': -1, 'boosting_type': 'gbdt', 'feature_pre_filter': False, 'lambda_l1': 0.0010027973801652824, 'lambda_l2': 0.0009528230030937256, 'num_leaves': 31, 'feature_fraction': 0.5, 'bagging_fraction': 0.6529279068292234, 'bagging_freq': 1, 'min_child_samples': 20}
  Params: 
    objective: regression
    metric: rmse
    verbosity: -1
    boosting_type: gbdt
    feature_pre_filter: False
    lambda_l1: 0.0010027973801652824
    lambda_l2: 0.0009528230030937256
    num_leaves: 31
    feature_fraction: 0.5
    bagging_fraction: 0.6529279068292234
    bagging_freq: 1
    min_child_samples: 20


In [None]:
#レビュー，平均スコアを含める場合
drop_cols=['Title','Title(ja)','directors','writers','casts']

train_features = train.drop(drop_cols+['Gross'], axis=1)

X = train_features.drop('Total Gross', axis=1)
y = train_features['Total Gross']
#対数スケールに変換
y = np.log1p(y)

dtrain = lgb.Dataset(X, y)

#parameter setting----
params = {
    'objective' : 'regression',
    'metric' : 'rmse',
    'verbosity':-1,
    "boosting_type": "gbdt",
}

tuner = lgb.LightGBMTunerCV(
        params, dtrain, 
        verbose_eval=1000, 
        early_stopping_rounds=100, 
        folds=KFold(n_splits=5),
    )

tuner.run()

[32m[I 2021-01-31 13:14:26,909][0m A new study created in memory with name: no-name-b2346a5e-91b2-47e0-b98e-74e5ed5b949c[0m
feature_fraction, val_score: 0.508519:  14%|#4        | 1/7 [00:02<00:15,  2.53s/it][32m[I 2021-01-31 13:14:29,450][0m Trial 0 finished with value: 0.5085185397679755 and parameters: {'feature_fraction': 0.8999999999999999}. Best is trial 0 with value: 0.5085185397679755.[0m
feature_fraction, val_score: 0.508519:  29%|##8       | 2/7 [00:04<00:11,  2.26s/it][32m[I 2021-01-31 13:14:31,066][0m Trial 1 finished with value: 0.5093173869939356 and parameters: {'feature_fraction': 0.5}. Best is trial 0 with value: 0.5085185397679755.[0m
feature_fraction, val_score: 0.508519:  43%|####2     | 3/7 [00:07<00:10,  2.51s/it][32m[I 2021-01-31 13:14:34,190][0m Trial 2 finished with value: 0.5227341717945675 and parameters: {'feature_fraction': 1.0}. Best is trial 0 with value: 0.5085185397679755.[0m
feature_fraction, val_score: 0.508519:  57%|#####7    | 4/7 [00:09

In [None]:
print("Best score:", tuner.best_score)
best_params_with_title_reviews_leakcols = tuner.best_params
print("Best params:", best_params_with_title_reviews_leakcols)
print("  Params: ")

for key, value in best_params_with_title_reviews_leakcols.items():
        print("    {}: {}".format(key, value))

Best score: 0.4676664755828709
Best params: {'objective': 'regression', 'metric': 'rmse', 'verbosity': -1, 'boosting_type': 'gbdt', 'feature_pre_filter': False, 'lambda_l1': 6.632751044983265e-06, 'lambda_l2': 0.0003847139173392962, 'num_leaves': 10, 'feature_fraction': 0.7, 'bagging_fraction': 0.5727164785688057, 'bagging_freq': 3, 'min_child_samples': 5}
  Params: 
    objective: regression
    metric: rmse
    verbosity: -1
    boosting_type: gbdt
    feature_pre_filter: False
    lambda_l1: 6.632751044983265e-06
    lambda_l2: 0.0003847139173392962
    num_leaves: 10
    feature_fraction: 0.7
    bagging_fraction: 0.5727164785688057
    bagging_freq: 3
    min_child_samples: 5


In [None]:
best_params['data'] = 'baseline'
best_params_with_reviews['data'] = 'baseline_with_reviews'
best_params_with_title['data'] = 'best_params_with_title'
best_params_with_title_reviews['data'] = 'best_params_with_title_reviews'
best_params_with_title_reviews_leakcols['data'] = 'best_params_with_title_reviews_leakcols'


all_best_params = pd.DataFrame(
[best_params,
best_params_with_reviews,
best_params_with_title,
best_params_with_title_reviews,
best_params_with_title_reviews_leakcols
 ])

In [None]:
all_best_params

Unnamed: 0,objective,metric,verbosity,boosting_type,feature_pre_filter,lambda_l1,lambda_l2,num_leaves,feature_fraction,bagging_fraction,bagging_freq,min_child_samples,data
0,regression,rmse,-1,gbdt,False,0.0,0.0,2,0.4,0.71944,3,25,baseline
1,regression,rmse,-1,gbdt,False,0.161885,5.8e-05,31,0.4,0.945194,7,20,baseline_with_reviews
2,regression,rmse,-1,gbdt,False,0.0,0.0,14,0.448,1.0,0,20,best_params_with_title
3,regression,rmse,-1,gbdt,False,0.001003,0.000953,31,0.5,0.652928,1,20,best_params_with_title_reviews
4,regression,rmse,-1,gbdt,False,7e-06,0.000385,10,0.7,0.572716,3,5,best_params_with_title_reviews_leakcols


In [None]:
all_best_params.to_csv(
    '/content/drive/MyDrive/Movies/result/best_params.csv'
    ,index=False)

In [None]:
p={}
parameters = all_best_params[all_best_params['data']=='baseline'] ### 
parameters

Unnamed: 0,objective,metric,verbosity,boosting_type,feature_pre_filter,lambda_l1,lambda_l2,num_leaves,feature_fraction,bagging_fraction,bagging_freq,min_child_samples,data
0,regression,rmse,-1,gbdt,False,0.0,0.0,2,0.4,0.71944,3,25,baseline


In [None]:
for c in parameters.columns:
  print(c+':'+str(parameters[c].values[0]))
  print()
  if c != 'data':
    p.update({c:parameters[c].values[0]})
p

objective:regression

metric:rmse

verbosity:-1

boosting_type:gbdt

feature_pre_filter:False

lambda_l1:0.0

lambda_l2:0.0

num_leaves:2

feature_fraction:0.4

bagging_fraction:0.7194396763571909

bagging_freq:3

min_child_samples:25

data:baseline



{'bagging_fraction': 0.7194396763571909,
 'bagging_freq': 3,
 'boosting_type': 'gbdt',
 'feature_fraction': 0.4,
 'feature_pre_filter': False,
 'lambda_l1': 0.0,
 'lambda_l2': 0.0,
 'metric': 'rmse',
 'min_child_samples': 25,
 'num_leaves': 2,
 'objective': 'regression',
 'verbosity': -1}