In [None]:
pip install optuna

Collecting optuna
  Downloading optuna-2.10.0-py3-none-any.whl (308 kB)
[K     |████████████████████████████████| 308 kB 32.8 MB/s 
[?25hCollecting alembic
  Downloading alembic-1.7.4-py3-none-any.whl (209 kB)
[K     |████████████████████████████████| 209 kB 53.4 MB/s 
[?25hCollecting cmaes>=0.8.2
  Downloading cmaes-0.8.2-py3-none-any.whl (15 kB)
Collecting cliff
  Downloading cliff-3.9.0-py3-none-any.whl (80 kB)
[K     |████████████████████████████████| 80 kB 10.0 MB/s 
Collecting colorlog
  Downloading colorlog-6.5.0-py2.py3-none-any.whl (11 kB)
Collecting Mako
  Downloading Mako-1.1.5-py2.py3-none-any.whl (75 kB)
[K     |████████████████████████████████| 75 kB 4.2 MB/s 
Collecting stevedore>=2.0.1
  Downloading stevedore-3.5.0-py3-none-any.whl (49 kB)
[K     |████████████████████████████████| 49 kB 5.5 MB/s 
[?25hCollecting pbr!=2.1.0,>=2.0.0
  Downloading pbr-5.6.0-py2.py3-none-any.whl (111 kB)
[K     |████████████████████████████████| 111 kB 70.8 MB/s 
[?25hCollecting c

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold
import pickle
import optuna
import numpy as np
import pandas as pd
import lightgbm as lgb

In [None]:
df = pd.read_csv('train_data.csv', index_col=0)
weight = []

In [None]:
df=df[df["Weight"]!='Not Available']

In [None]:
for index,row in df.iterrows():
    weight.append(row["Weight"].split()[0])

In [None]:
df["weight"]=weight

In [None]:
df = df.astype({"weight": int})

In [None]:
X = df.drop(["Weight","weight","ID"],axis=1).to_numpy()

In [None]:
y = df["weight"].to_numpy()

In [None]:
kf = KFold(n_splits=5)

In [None]:
def define_model_layers(trial):
      param_grid = {
      "n_estimators": trial.suggest_int("n_estimators", 1000, 3000),
      "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
      "num_leaves": trial.suggest_int("num_leaves", 20, 3000, step=20),
      "max_depth": trial.suggest_int("max_depth", 3, 12),
      "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 200, 10000, step=100),
      "max_bin": trial.suggest_int("max_bin", 200, 300),
      "lambda_l1": trial.suggest_int("lambda_l1", 0, 100, step=5),
      "lambda_l2": trial.suggest_int("lambda_l2", 0, 100, step=5),
      "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
      "bagging_fraction": trial.suggest_float(
          "bagging_fraction", 0.2, 0.9, step=0.1
      ),
      "bagging_freq": trial.suggest_categorical("bagging_freq", [1]),
      "feature_fraction": trial.suggest_float(
          "feature_fraction", 0.2, 0.9, step=0.1
      ),
      }
    
      return param_grid

In [None]:

def eval_metric(y_true,y_pred):
    return "mae",mean_absolute_error(y_true, y_pred),False

In [None]:
def objective(trial):
    param_grid = define_model_layers(trial)
    
    mae = []
    
    for train_index, test_index in kf.split(X):
            
            X_train, X_test, y_train, y_test = X[train_index],X[test_index],y[train_index],y[test_index]
            model = lgb.LGBMRegressor(objective="regression", **param_grid)
            
            model.fit(
            X_train,
            y_train,
            eval_set=[(X_test, y_test)],
            eval_metric=eval_metric,
            early_stopping_rounds=100,
            verbose = False,
            )
            y_pred = model.predict(X_test)
            mae.append(mean_absolute_error(y_test, y_pred))
            
    acc = sum(mae)/len(mae)

    return acc
            

In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective,n_trials=100)

print("Best trial:")
trial = study.best_trial

print(" Value: ",trial.value)

print(" Params: ")

for key,value in trial.params.items():
    print("  {}: {}".format(key,value))

[32m[I 2021-11-02 19:05:23,389][0m A new study created in memory with name: no-name-8cb6ab2a-1d85-4d46-9e56-7bbd1575f6fe[0m
[32m[I 2021-11-02 19:05:31,874][0m Trial 0 finished with value: 28.298817038853393 and parameters: {'n_estimators': 2612, 'learning_rate': 0.04369283104047973, 'num_leaves': 1720, 'max_depth': 3, 'min_data_in_leaf': 9900, 'max_bin': 276, 'lambda_l1': 80, 'lambda_l2': 70, 'min_gain_to_split': 14.076781696534576, 'bagging_fraction': 0.6000000000000001, 'bagging_freq': 1, 'feature_fraction': 0.30000000000000004}. Best is trial 0 with value: 28.298817038853393.[0m
[32m[I 2021-11-02 19:05:36,834][0m Trial 1 finished with value: 28.09636584666684 and parameters: {'n_estimators': 1260, 'learning_rate': 0.20344103901856653, 'num_leaves': 2580, 'max_depth': 9, 'min_data_in_leaf': 8300, 'max_bin': 213, 'lambda_l1': 40, 'lambda_l2': 5, 'min_gain_to_split': 9.859029748392624, 'bagging_fraction': 0.9, 'bagging_freq': 1, 'feature_fraction': 0.8}. Best is trial 1 with va

Best trial:
 Value:  27.957504917293647
 Params: 
  n_estimators: 1540
  learning_rate: 0.12451150300990464
  num_leaves: 2180
  max_depth: 4
  min_data_in_leaf: 400
  max_bin: 233
  lambda_l1: 15
  lambda_l2: 0
  min_gain_to_split: 10.488481350873686
  bagging_fraction: 0.8
  bagging_freq: 1
  feature_fraction: 0.9
