## Import Library and Load data

In [1]:
#Modifikasi
import warnings
import zipfile
import numpy as np
import pandas as pd
from pathlib import Path
pd.set_option('display.max_columns', 100)

#Perhitungan
from sklearn.preprocessing import PolynomialFeatures

import matplotlib.pyplot as plt

#Imputasi
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression


# Modeling
from sklearn.model_selection import train_test_split,GridSearchCV,RandomizedSearchCV, GroupKFold,KFold, TimeSeriesSplit   
from sklearn.metrics import classification_report, accuracy_score, roc_curve, auc,roc_auc_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input, RepeatVector, TimeDistributed
from feature_engine.creation import CyclicalFeatures


#Feature Selection
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from mlxtend.feature_selection import SequentialFeatureSelector as SFS

# Feature Importance
from sklearn.ensemble import ExtraTreesClassifier

In [2]:
train = pd.read_csv('raw_dataset/Train.csv')
test = pd.read_csv('raw_dataset/Test.csv')

train.drop(['ID','ID_Zindi'], axis=1, inplace=True)
test.drop(['ID','ID_Zindi'], axis=1, inplace=True)

## Data Cleaning

In [3]:
train['Date'] = pd.to_datetime(train['Date'], dayfirst=True, errors='coerce')

Date = train.copy()
Date['Date'] = pd.to_datetime(Date['Date'])


# 3. Menetapkan kolom Date sebagai index
train.set_index('Date', inplace=True)

  train['Date'] = pd.to_datetime(train['Date'], dayfirst=True, errors='coerce')


In [4]:
test['Date'] = pd.to_datetime(test['Date'], dayfirst=True, errors='coerce')

Date_test = test.copy()
Date_test['Date'] = pd.to_datetime(Date['Date'])


# 3. Menetapkan kolom Date sebagai index
test.set_index('Date', inplace=True)

  test['Date'] = pd.to_datetime(test['Date'], dayfirst=True, errors='coerce')


## Imputasi

In [5]:
# df = train.copy()
# df.drop(columns=['ID_Zindi','ID'],inplace=True)
# test.drop(columns=['ID_Zindi','ID'],inplace=True)

# def impute_missing_values(df, cols_to_impute, drop_cols=['LAT', 'LON'], n_estimators=100, random_state=42):
#     for col in cols_to_impute:
#         if df[col].isna().sum() > 0:  # Cek apakah ada nilai NaN pada kolom
#             non_missing_data = df[df[col].notna()]  # Data tanpa nilai NaN untuk training
#             X_train = non_missing_data.drop(columns=[col] + drop_cols)  # Fitur training tanpa kolom target
#             y_train = non_missing_data[col]  # Target untuk training
            
#             # Inisiasi Random Forest Regressor dan training
#             rf_imputer = RandomForestRegressor(n_estimators=n_estimators, random_state=random_state)
#             rf_imputer.fit(X_train, y_train)
            
#             # Melakukan prediksi untuk mengisi nilai NaN
#             X_pred = df[df[col].isna()].drop(columns=[col] + drop_cols)
#             df.loc[df[col].isna(), col] = rf_imputer.predict(X_pred)
    
#     return df


# cols_to_impute_rf = ['AAI', 'CloudFraction','LST', 'NO2_trop', 'NO2_strat', 'NO2_total', 'TropopausePressure']
# df = impute_missing_values(df, cols_to_impute_rf)
# test = impute_missing_values(test, cols_to_impute_rf)

# # Imputasi untuk kolom dengan missing data sedikit (Mean Imputation)
# cols_to_impute_mean = ['GT_NO2']
# mean_imputer = SimpleImputer(strategy='mean')
# df[cols_to_impute_mean] = mean_imputer.fit_transform(df[cols_to_impute_mean])

# # Time series imputation using Iterative Imputer (Multiple Imputation)
# time_series_cols = ['Precipitation']
# time_series_imputer = IterativeImputer(random_state=42)
# df[time_series_cols] = time_series_imputer.fit_transform(df[time_series_cols])
# test[time_series_cols] = time_series_imputer.fit_transform(test[time_series_cols])

# Feature Engineering

In [41]:
# df.to_csv('train_imputed_with_rf_regressor.csv', index=False)
# test.to_csv('test_imputed_with_rf_regressor.csv', index=False)

data = pd.read_csv('final_dataset/train_imputed_with_rf_regressor.csv')
data.drop(['LAT','LON'],axis=1,inplace=True)


In [42]:
dtest = pd.read_csv('final_dataset/test_imputed_with_rf_regressor.csv')

dtest.drop(['LAT','LON'],axis=1,inplace=True)

In [43]:
data.shape, train.shape

((86584, 9), (86584, 11))

In [44]:
kmeans = KMeans(n_clusters=3)
data['Kmeans'] = kmeans.fit_predict(data[['NO2_strat', 'NO2_total', 'NO2_trop']])

scaler = StandardScaler()
data['TropopausePressure'] = scaler.fit_transform(data[['TropopausePressure']])

# Misalnya, data berisi kolom 'Precipitation', 'LST', 'AAI'
poly = PolynomialFeatures(degree=2)

# Menerapkan transformasi polinomial pada kolom-kolom yang ditentukan
poly_features = poly.fit_transform(data[['Precipitation', 'LST', 'AAI']])

# Membuat DataFrame baru dengan nama kolom yang sesuai
poly_feature_columns = poly.get_feature_names_out(['Precipitation', 'LST', 'AAI'])

# Menggabungkan hasilnya dengan data asli (jika perlu)
poly_data = pd.DataFrame(poly_features, columns=poly_feature_columns)

poly_data.drop(['1','Precipitation','LST','AAI',],axis=1,inplace=True)

result = pd.concat([data, poly_data], axis=1)

result['GT_NO2'] = pd.to_numeric(result['GT_NO2'], errors='coerce')  # Pastikan target numerik

result["month"] = train.index.month
result["year"] = train.index.year
result['day'] = train.index.day

# Sinusoidal Encoding Bulan ``
result['month_Sin'] = np.sin(2 * np.pi * result['month'] / 12)
result['month_Cos'] = np.cos(2 * np.pi * result['month'] / 12)

# Fitur Elapsed months (pastikan tahun dimulai dari min tahun di result)
min_year = result['year'].min()
result['Elapsed_months'] = (result['year'] - min_year) * 12 + result['month']

# Quarter & Semester
result['Quarter'] = ((result['month'] - 1) // 3 + 1).astype(int)

result['Semester'] = ((result['month'] - 1) // 6 + 1).astype(int)

# year-month Identifier (Format YYYYMM)
result['yearmonth'] = (result['year'] * 100 + result['month']).astype(int)

In [45]:
target_col = 'GT_NO2'  # Nama kolom target
cols = [col for col in result.columns if col != target_col] + [target_col]
result = result[cols]

In [47]:
result.head()

Unnamed: 0,Precipitation,LST,AAI,CloudFraction,NO2_strat,NO2_total,NO2_trop,TropopausePressure,Kmeans,Precipitation^2,Precipitation LST,Precipitation AAI,LST^2,LST AAI,AAI^2,month,year,day,month_Sin,month_Cos,Elapsed_months,Quarter,Semester,yearmonth,GT_NO2
0,0.0,277.4618,0.230527,0.559117,2.4e-05,0.000117,0.000163,-0.858776,1,0.0,0.0,0.0,76985.050459,63.962302,0.053142,1,2019,1,0.5,0.866025,1,1,1,201901,31.0
1,3.047342,277.9274,-0.074006,0.869309,2.4e-05,0.000127,0.000123,-0.858437,1,9.286295,846.939922,-0.225522,77243.639671,-20.568365,0.005477,1,2019,1,0.5,0.866025,1,1,1,201901,42.0
2,0.0,277.1622,0.02447,0.67416,2.4e-05,8.6e-05,8.9e-05,-0.859968,1,0.0,0.0,0.0,76818.885109,6.782106,0.000599,1,2019,1,0.5,0.866025,1,1,1,201901,31.0
3,1.200467,277.4052,-0.010442,0.920054,2.4e-05,0.000124,0.000123,-0.85877,1,1.441121,333.015752,-0.012535,76953.644987,-2.896627,0.000109,1,2019,1,0.5,0.866025,1,1,1,201901,30.0
4,1.274564,278.9034,-0.176178,0.747464,2.4e-05,0.000116,0.000164,-0.85948,1,1.624513,355.480174,-0.22455,77787.106532,-49.136681,0.031039,1,2019,1,0.5,0.866025,1,1,1,201901,58.0


# Model Creation

In [18]:
import optuna
# model
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

#### LightGBM

In [50]:
# Fungsi evaluasi MSE
def mse(y_true, y_pred):
    return mean_squared_error(y_true, y_pred)

# Definisikan objective function untuk Optuna
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'max_depth': trial.suggest_int('max_depth', 3, 9),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 50)
    }
    
    # Out-of-fold prediction
    oof_predictions = np.zeros(len(result))
    kf = KFold(n_splits=10, shuffle=True, random_state=42)
    
    for fold, (data_idx, val_idx) in enumerate(kf.split(result.drop('GT_NO2', axis=1))):
        X_data, X_val = result.drop('GT_NO2', axis=1).iloc[data_idx], result.drop('GT_NO2', axis=1).iloc[val_idx]
        y_data, y_val = result['GT_NO2'].iloc[data_idx], result['GT_NO2'].iloc[val_idx]
        
        model = LGBMRegressor(**params, random_state=42)
        model.fit(X_data, y_data)
        
        oof_predictions[val_idx] = model.predict(X_val)
    
    return mse(result['GT_NO2'], oof_predictions)

# Lakukan optimasi hyperparameter
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50)

# Gunakan parameter terbaik untuk model final
best_params = study.best_params
print(f"Best Hyperparameters: {best_params}")

lgbm = LGBMRegressor(**best_params, random_state=42)
y = result['GT_NO2']

# Out-of-fold predictions dengan parameter terbaik
oof_predictions = np.zeros(len(result))
kf = KFold(n_splits=10, shuffle=True, random_state=42)
fold_scores = []

for fold, (data_idx, val_idx) in enumerate(kf.split(result.drop('GT_NO2', axis=1))):
    X_data, X_val = result.drop('GT_NO2', axis=1).iloc[data_idx], result.drop('GT_NO2', axis=1).iloc[val_idx]
    y_data, y_val = result['GT_NO2'].iloc[data_idx], result['GT_NO2'].iloc[val_idx]
    
    lgbm.fit(X_data, y_data)
    val_pred = lgbm.predict(X_val)
    oof_predictions[val_idx] = val_pred
    
    fold_mse = mse(y_val, val_pred)
    fold_scores.append(fold_mse)
    print(f"Fold {fold+1} MSE: {fold_mse:.4f}")

# Menampilkan Rata-rata MSE
print(f"\nRata-rata MSE: {np.mean(fold_scores):.4f}")


[I 2025-02-14 11:17:51,170] A new study created in memory with name: no-name-de18edba-7850-48fc-8a7d-59112c01a9c8


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005064 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008882 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008010 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:18:01,315] Trial 0 finished with value: 105.60658443865938 and parameters: {'n_estimators': 202, 'max_depth': 3, 'learning_rate': 0.18746675872187465, 'subsample': 0.5687499482347409, 'colsample_bytree': 0.746896531506422, 'min_child_samples': 25}. Best is trial 0 with value: 105.60658443865938.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008882 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009511 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009196 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total 

[I 2025-02-14 11:18:09,133] Trial 1 finished with value: 103.63078717089086 and parameters: {'n_estimators': 66, 'max_depth': 5, 'learning_rate': 0.12039343138190267, 'subsample': 0.9401102150573821, 'colsample_bytree': 0.9014719637619254, 'min_child_samples': 23}. Best is trial 1 with value: 103.63078717089086.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008356 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009685 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006241 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:18:32,689] Trial 2 finished with value: 95.77958112700064 and parameters: {'n_estimators': 278, 'max_depth': 7, 'learning_rate': 0.04755659819072122, 'subsample': 0.5712210053660982, 'colsample_bytree': 0.8346911772739694, 'min_child_samples': 19}. Best is trial 2 with value: 95.77958112700064.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010741 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007228 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.014556 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train s

[I 2025-02-14 11:19:07,344] Trial 3 finished with value: 96.92667279670286 and parameters: {'n_estimators': 518, 'max_depth': 4, 'learning_rate': 0.08764071145819904, 'subsample': 0.5023062750430378, 'colsample_bytree': 0.677620122805259, 'min_child_samples': 46}. Best is trial 2 with value: 95.77958112700064.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006552 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010595 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007476 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total 

[I 2025-02-14 11:19:28,831] Trial 4 finished with value: 110.62721508163405 and parameters: {'n_estimators': 339, 'max_depth': 3, 'learning_rate': 0.05985487683197004, 'subsample': 0.5342653289465735, 'colsample_bytree': 0.9905439469203203, 'min_child_samples': 45}. Best is trial 2 with value: 95.77958112700064.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010655 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007961 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006703 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total 

[I 2025-02-14 11:19:37,881] Trial 5 finished with value: 94.13050997549911 and parameters: {'n_estimators': 73, 'max_depth': 7, 'learning_rate': 0.29802198818531034, 'subsample': 0.9634088844266964, 'colsample_bytree': 0.9934729960060461, 'min_child_samples': 42}. Best is trial 5 with value: 94.13050997549911.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006920 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005444 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023916 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total 

[I 2025-02-14 11:21:10,283] Trial 6 finished with value: 86.30569369530839 and parameters: {'n_estimators': 866, 'max_depth': 9, 'learning_rate': 0.2022779290651887, 'subsample': 0.7337489041353416, 'colsample_bytree': 0.5469804830968504, 'min_child_samples': 44}. Best is trial 6 with value: 86.30569369530839.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012253 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012276 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011415 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:23:25,606] Trial 7 finished with value: 85.5975989398512 and parameters: {'n_estimators': 950, 'max_depth': 9, 'learning_rate': 0.17763522324448583, 'subsample': 0.6838900006238526, 'colsample_bytree': 0.8765156885919634, 'min_child_samples': 12}. Best is trial 7 with value: 85.5975989398512.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012069 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011534 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011115 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:25:24,962] Trial 8 finished with value: 85.2457313406473 and parameters: {'n_estimators': 787, 'max_depth': 9, 'learning_rate': 0.13033124557846498, 'subsample': 0.9063195933212314, 'colsample_bytree': 0.7284460357292832, 'min_child_samples': 38}. Best is trial 8 with value: 85.2457313406473.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011574 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016985 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017470 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train s

[I 2025-02-14 11:26:22,698] Trial 9 finished with value: 105.27167252203668 and parameters: {'n_estimators': 733, 'max_depth': 3, 'learning_rate': 0.0539754361905386, 'subsample': 0.9871943796900784, 'colsample_bytree': 0.6604017030837286, 'min_child_samples': 30}. Best is trial 8 with value: 85.2457313406473.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009623 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009753 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009321 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:27:54,334] Trial 10 finished with value: 88.17418820873488 and parameters: {'n_estimators': 652, 'max_depth': 7, 'learning_rate': 0.2536616945704365, 'subsample': 0.8470010300576307, 'colsample_bytree': 0.5197231548466654, 'min_child_samples': 34}. Best is trial 8 with value: 85.2457313406473.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012211 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012014 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013517 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:30:07,616] Trial 11 finished with value: 85.68994734112782 and parameters: {'n_estimators': 996, 'max_depth': 9, 'learning_rate': 0.14917132814475975, 'subsample': 0.6994889166718361, 'colsample_bytree': 0.8287260427564618, 'min_child_samples': 5}. Best is trial 8 with value: 85.2457313406473.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.010733 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011053 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011851 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:32:16,769] Trial 12 finished with value: 86.2903742642326 and parameters: {'n_estimators': 973, 'max_depth': 8, 'learning_rate': 0.19619248335242978, 'subsample': 0.8423520279729195, 'colsample_bytree': 0.7688327704475703, 'min_child_samples': 14}. Best is trial 8 with value: 85.2457313406473.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012034 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013398 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012512 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:34:09,956] Trial 13 finished with value: 85.08888210498033 and parameters: {'n_estimators': 802, 'max_depth': 9, 'learning_rate': 0.12826856501620268, 'subsample': 0.6628860503088755, 'colsample_bytree': 0.8989509870699288, 'min_child_samples': 36}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011392 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017869 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011520 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total 

[I 2025-02-14 11:36:01,391] Trial 14 finished with value: 85.89573818565331 and parameters: {'n_estimators': 748, 'max_depth': 8, 'learning_rate': 0.11618348206159276, 'subsample': 0.635845374966769, 'colsample_bytree': 0.6412003952417642, 'min_child_samples': 37}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011907 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012680 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011771 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:37:26,597] Trial 15 finished with value: 103.40673324299865 and parameters: {'n_estimators': 480, 'max_depth': 6, 'learning_rate': 0.012183273777937681, 'subsample': 0.8052525683283447, 'colsample_bytree': 0.7468651285515969, 'min_child_samples': 50}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.012481 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.010156 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005370 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:38:42,291] Trial 16 finished with value: 86.09606142430302 and parameters: {'n_estimators': 630, 'max_depth': 8, 'learning_rate': 0.1463120621229064, 'subsample': 0.8976083535390573, 'colsample_bytree': 0.9144024321755674, 'min_child_samples': 38}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005717 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005942 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005972 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:39:35,975] Trial 17 finished with value: 87.36093363594945 and parameters: {'n_estimators': 822, 'max_depth': 6, 'learning_rate': 0.2317959841483108, 'subsample': 0.7803184103194473, 'colsample_bytree': 0.7983035863624456, 'min_child_samples': 32}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005189 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005146 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005268 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:40:20,104] Trial 18 finished with value: 86.82937371854408 and parameters: {'n_estimators': 610, 'max_depth': 9, 'learning_rate': 0.10946535081339831, 'subsample': 0.6140951526951421, 'colsample_bytree': 0.6035838329246007, 'min_child_samples': 38}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010342 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009324 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005026 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train s

[I 2025-02-14 11:41:20,083] Trial 19 finished with value: 86.2769902627157 and parameters: {'n_estimators': 833, 'max_depth': 8, 'learning_rate': 0.08226764809590743, 'subsample': 0.8977122594117964, 'colsample_bytree': 0.7217826547662183, 'min_child_samples': 27}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009353 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009009 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004864 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train s

[I 2025-02-14 11:41:47,218] Trial 20 finished with value: 90.07621973682149 and parameters: {'n_estimators': 420, 'max_depth': 5, 'learning_rate': 0.15871792245917948, 'subsample': 0.6650331606255916, 'colsample_bytree': 0.7000633714558503, 'min_child_samples': 41}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005073 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006095 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006018 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:42:45,728] Trial 21 finished with value: 86.1017929647369 and parameters: {'n_estimators': 914, 'max_depth': 9, 'learning_rate': 0.172561366555516, 'subsample': 0.7068468847937455, 'colsample_bytree': 0.898053465088338, 'min_child_samples': 5}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005911 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006149 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005265 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:43:34,414] Trial 22 finished with value: 85.31960752298198 and parameters: {'n_estimators': 746, 'max_depth': 9, 'learning_rate': 0.1336151580318879, 'subsample': 0.6715903899833561, 'colsample_bytree': 0.8471465555109637, 'min_child_samples': 12}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005974 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006000 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005907 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:44:22,188] Trial 23 finished with value: 85.8622589178145 and parameters: {'n_estimators': 728, 'max_depth': 8, 'learning_rate': 0.13211222639116366, 'subsample': 0.7637330738457901, 'colsample_bytree': 0.9483150552494104, 'min_child_samples': 17}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006009 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007328 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005939 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:45:14,497] Trial 24 finished with value: 85.63064824621716 and parameters: {'n_estimators': 783, 'max_depth': 9, 'learning_rate': 0.09897379307919733, 'subsample': 0.6219700568845141, 'colsample_bytree': 0.8478249930326038, 'min_child_samples': 35}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005474 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004580 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005737 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:45:58,550] Trial 25 finished with value: 86.13811945004812 and parameters: {'n_estimators': 676, 'max_depth': 7, 'learning_rate': 0.13172165890282392, 'subsample': 0.7375076195222663, 'colsample_bytree': 0.7813682717269658, 'min_child_samples': 30}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005637 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005881 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005996 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:46:34,689] Trial 26 finished with value: 86.65534839227095 and parameters: {'n_estimators': 554, 'max_depth': 8, 'learning_rate': 0.2212075862023559, 'subsample': 0.6445499484009874, 'colsample_bytree': 0.9515871953533519, 'min_child_samples': 10}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005809 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005695 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005318 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:47:36,217] Trial 27 finished with value: 85.84895580720533 and parameters: {'n_estimators': 888, 'max_depth': 9, 'learning_rate': 0.08180488105645858, 'subsample': 0.5915115388987293, 'colsample_bytree': 0.8047073816020409, 'min_child_samples': 20}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006234 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005933 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006630 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:48:28,146] Trial 28 finished with value: 85.56479622022081 and parameters: {'n_estimators': 803, 'max_depth': 8, 'learning_rate': 0.1609332666536094, 'subsample': 0.8045578314114907, 'colsample_bytree': 0.873681766171498, 'min_child_samples': 23}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005481 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005505 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006046 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:49:09,108] Trial 29 finished with value: 86.02786139258514 and parameters: {'n_estimators': 593, 'max_depth': 9, 'learning_rate': 0.1408226596233584, 'subsample': 0.6667660123934896, 'colsample_bytree': 0.7496240089018479, 'min_child_samples': 26}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005708 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005866 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006647 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:50:05,432] Trial 30 finished with value: 96.55186398832359 and parameters: {'n_estimators': 693, 'max_depth': 7, 'learning_rate': 0.017712713619386714, 'subsample': 0.7155875034978014, 'colsample_bytree': 0.943162361943453, 'min_child_samples': 31}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006002 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005981 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006163 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:50:56,489] Trial 31 finished with value: 85.60117924711751 and parameters: {'n_estimators': 801, 'max_depth': 8, 'learning_rate': 0.16474362892335606, 'subsample': 0.8240737357470778, 'colsample_bytree': 0.8669274394474886, 'min_child_samples': 8}. Best is trial 13 with value: 85.08888210498033.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005330 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006063 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006255 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:51:55,452] Trial 32 finished with value: 85.03661338060216 and parameters: {'n_estimators': 890, 'max_depth': 9, 'learning_rate': 0.12037564502684207, 'subsample': 0.885760770463168, 'colsample_bytree': 0.9189297041707498, 'min_child_samples': 24}. Best is trial 32 with value: 85.03661338060216.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005017 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005372 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006074 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:52:55,790] Trial 33 finished with value: 84.66438724862945 and parameters: {'n_estimators': 922, 'max_depth': 9, 'learning_rate': 0.12014566919484884, 'subsample': 0.9033537815012155, 'colsample_bytree': 0.9130915448535754, 'min_child_samples': 23}. Best is trial 33 with value: 84.66438724862945.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005755 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006183 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006324 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:53:55,908] Trial 34 finished with value: 85.31978035677346 and parameters: {'n_estimators': 918, 'max_depth': 9, 'learning_rate': 0.10268659017659632, 'subsample': 0.9180031639699094, 'colsample_bytree': 0.9150689170058508, 'min_child_samples': 23}. Best is trial 33 with value: 84.66438724862945.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005932 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009417 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006046 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:54:50,222] Trial 35 finished with value: 86.67957382771517 and parameters: {'n_estimators': 861, 'max_depth': 8, 'learning_rate': 0.06541307948484944, 'subsample': 0.8734391145825353, 'colsample_bytree': 0.9733164062045493, 'min_child_samples': 20}. Best is trial 33 with value: 84.66438724862945.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005552 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005497 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005331 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:55:20,290] Trial 36 finished with value: 91.24421812670506 and parameters: {'n_estimators': 931, 'max_depth': 4, 'learning_rate': 0.11776444558837541, 'subsample': 0.9440253028845896, 'colsample_bytree': 0.9264519522427722, 'min_child_samples': 24}. Best is trial 33 with value: 84.66438724862945.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006801 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004730 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004853 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total 

[I 2025-02-14 11:55:32,244] Trial 37 finished with value: 93.84068756760071 and parameters: {'n_estimators': 227, 'max_depth': 6, 'learning_rate': 0.08818748566635128, 'subsample': 0.9719768908793625, 'colsample_bytree': 0.8928247185249937, 'min_child_samples': 28}. Best is trial 33 with value: 84.66438724862945.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005389 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006027 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006782 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total 

[I 2025-02-14 11:56:11,132] Trial 38 finished with value: 89.46479159715251 and parameters: {'n_estimators': 878, 'max_depth': 9, 'learning_rate': 0.036992370069915126, 'subsample': 0.9295251323113559, 'colsample_bytree': 0.8160817556053575, 'min_child_samples': 41}. Best is trial 33 with value: 84.66438724862945.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011903 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.009598 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005804 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total 

[I 2025-02-14 11:56:52,078] Trial 39 finished with value: 85.06555652352944 and parameters: {'n_estimators': 999, 'max_depth': 9, 'learning_rate': 0.09499459143214257, 'subsample': 0.8768440254842476, 'colsample_bytree': 0.9705243701921095, 'min_child_samples': 48}. Best is trial 33 with value: 84.66438724862945.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004574 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008313 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005544 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:57:33,411] Trial 40 finished with value: 87.07354911951555 and parameters: {'n_estimators': 992, 'max_depth': 7, 'learning_rate': 0.0685242707232489, 'subsample': 0.8749290258982033, 'colsample_bytree': 0.9973081596871284, 'min_child_samples': 47}. Best is trial 33 with value: 84.66438724862945.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007580 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005828 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004908 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:58:12,078] Trial 41 finished with value: 85.20081136069395 and parameters: {'n_estimators': 945, 'max_depth': 9, 'learning_rate': 0.12479006969397576, 'subsample': 0.8675058912231207, 'colsample_bytree': 0.9684752591618622, 'min_child_samples': 50}. Best is trial 33 with value: 84.66438724862945.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005457 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005580 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004590 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 11:58:52,839] Trial 42 finished with value: 85.19048829455565 and parameters: {'n_estimators': 949, 'max_depth': 9, 'learning_rate': 0.09805391947741694, 'subsample': 0.8669740314132429, 'colsample_bytree': 0.9563160456475576, 'min_child_samples': 50}. Best is trial 33 with value: 84.66438724862945.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006134 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006384 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005919 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total 

[I 2025-02-14 11:59:32,147] Trial 43 finished with value: 85.68322650291559 and parameters: {'n_estimators': 860, 'max_depth': 9, 'learning_rate': 0.0914713867588827, 'subsample': 0.9580425165860873, 'colsample_bytree': 0.9701155233652614, 'min_child_samples': 47}. Best is trial 33 with value: 84.66438724862945.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005852 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005408 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003357 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 12:00:14,437] Trial 44 finished with value: 85.99515439954735 and parameters: {'n_estimators': 970, 'max_depth': 9, 'learning_rate': 0.07235960164550859, 'subsample': 0.8492873042544846, 'colsample_bytree': 0.9309880193568639, 'min_child_samples': 44}. Best is trial 33 with value: 84.66438724862945.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005793 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005412 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012105 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total 

[I 2025-02-14 12:00:49,433] Trial 45 finished with value: 85.42160451834245 and parameters: {'n_estimators': 892, 'max_depth': 8, 'learning_rate': 0.10582559766829665, 'subsample': 0.5424114340054323, 'colsample_bytree': 0.8873867989649432, 'min_child_samples': 17}. Best is trial 33 with value: 84.66438724862945.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005853 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004800 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004883 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 12:01:33,382] Trial 46 finished with value: 89.22503372471225 and parameters: {'n_estimators': 996, 'max_depth': 9, 'learning_rate': 0.03377869125945267, 'subsample': 0.997496453805942, 'colsample_bytree': 0.9701998152178377, 'min_child_samples': 48}. Best is trial 33 with value: 84.66438724862945.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004699 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005226 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004539 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 12:02:11,362] Trial 47 finished with value: 85.28460337302509 and parameters: {'n_estimators': 947, 'max_depth': 8, 'learning_rate': 0.12010060148471816, 'subsample': 0.8918580551440121, 'colsample_bytree': 0.9994796899117074, 'min_child_samples': 43}. Best is trial 33 with value: 84.66438724862945.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005558 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004660 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006524 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 12:02:18,841] Trial 48 finished with value: 92.36356202052113 and parameters: {'n_estimators': 121, 'max_depth': 9, 'learning_rate': 0.18175431344101442, 'subsample': 0.7986838997710017, 'colsample_bytree': 0.9072699270266997, 'min_child_samples': 21}. Best is trial 33 with value: 84.66438724862945.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.010851 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005348 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005577 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-02-14 12:02:51,747] Trial 49 finished with value: 88.24300600225374 and parameters: {'n_estimators': 842, 'max_depth': 8, 'learning_rate': 0.2910856149639349, 'subsample': 0.8373248535734683, 'colsample_bytree': 0.9360512799582174, 'min_child_samples': 35}. Best is trial 33 with value: 84.66438724862945.


Best Hyperparameters: {'n_estimators': 922, 'max_depth': 9, 'learning_rate': 0.12014566919484884, 'subsample': 0.9033537815012155, 'colsample_bytree': 0.9130915448535754, 'min_child_samples': 23}
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006398 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.539766
Fold 1 MSE: 80.9335
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006076 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3725
[LightGBM] [Info] Number of data points in the train set: 77925, number of used features: 24
[LightGBM] [Info] Start training from score 24.538201
Fold 2 MSE: 81.8207
[LightGBM] [In

#### XGBoost

In [52]:
# # Fungsi evaluasi MSE
# def mse(y_true, y_pred):
#     return mean_squared_error(y_true, y_pred)

# # Definisikan objective function untuk Optuna
# def objective(trial):
#     params = {
#         'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
#         'max_depth': trial.suggest_int('max_depth', 3, 9),
#         'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
#         'subsample': trial.suggest_float('subsample', 0.5, 1.0),
#         'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
#         'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 10.0),
#         'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 10.0),
#         'min_child_weight': trial.suggest_int('min_child_weight', 1, 20),
#         'gamma': trial.suggest_float('gamma', 0.0, 5.0)
#     }
    
#     # Out-of-fold prediction
#     oof_predictions = np.zeros(len(result))
#     kf = KFold(n_splits=10, shuffle=True, random_state=42)
    
#     for fold, (data_idx, val_idx) in enumerate(kf.split(result)):
#         X_data, X_val = result.drop('GT_NO2', axis=1).iloc[data_idx], result.drop('GT_NO2', axis=1).iloc[val_idx]
#         y_data, y_val = result['GT_NO2'].iloc[data_idx], result['GT_NO2'].iloc[val_idx]
        
#         model = xgb.XGBRegressor(**params, objective='reg:squarederror', seed=42)
#         model.fit(X_data, y_data)
        
#         oof_predictions[val_idx] = model.predict(X_val)
    
#     return mse(result['GT_NO2'], oof_predictions)

# # Lakukan optimasi hyperparameter
# study = optuna.create_study(direction='minimize')
# study.optimize(objective, n_trials=50)

# # Gunakan parameter terbaik untuk model final
# best_params = study.best_params
# print(f"Best Hyperparameters: {best_params}")

# xgb_model = xgb.XGBRegressor(**best_params, objective='reg:squarederror', seed=42)
# y = result['GT_NO2']

# # Out-of-fold predictions dengan parameter terbaik
# oof_predictions = np.zeros(len(result))
# kf = KFold(n_splits=10, shuffle=True, random_state=42)
# fold_scores = []

# for fold, (data_idx, val_idx) in enumerate(kf.split(result)):
#     X_data, X_val = result.drop('GT_NO2', axis=1).iloc[data_idx], result.drop('GT_NO2', axis=1).iloc[val_idx]
#     y_data, y_val = result['GT_NO2'].iloc[data_idx], result['GT_NO2'].iloc[val_idx]
    
#     xgb_model.fit(X_data, y_data)
#     val_pred = xgb_model.predict(X_val)
#     oof_predictions[val_idx] = val_pred
    
#     fold_mse = mse(y_val, val_pred)
#     fold_scores.append(fold_mse)
#     print(f"Fold {fold+1} MSE: {fold_mse:.4f}")

# print(f"\nRata-rata MSE: {np.mean(fold_scores):.4f}")


---
## test data

In [53]:
# Buat klastering KMeans dengan 3 klaster
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
dtest['Kmeans'] = kmeans.fit_predict(dtest[['NO2_strat', 'NO2_total', 'NO2_trop']])

# Normalisasi TropopausePressure
scaler = StandardScaler()
dtest['TropopausePressure'] = scaler.fit_transform(dtest[['TropopausePressure']])

# Polynomial Features untuk 'Precipitation', 'LST', 'AAI'
poly = PolynomialFeatures(degree=2)
poly_features = poly.fit_transform(dtest[['Precipitation', 'LST', 'AAI']])

# Dapatkan nama kolom hasil transformasi polinomial
poly_feature_columns = poly.get_feature_names_out(['Precipitation', 'LST', 'AAI'])

# Membuat DataFrame baru dari fitur polinomial
poly_dtest = pd.DataFrame(poly_features, columns=poly_feature_columns)

# Hapus fitur yang tidak diperlukan
poly_dtest.drop(['1', 'Precipitation', 'LST', 'AAI'], axis=1, inplace=True)

# Gabungkan dengan dtest asli
result_test = pd.concat([dtest, poly_dtest], axis=1)

# Pastikan test.index adalah datetime
result_test["month"] = test.index.month
result_test["year"] = test.index.year
result_test["day"] = test.index.day

# Sinusoidal Encoding Bulan
result_test['month_Sin'] = np.sin(2 * np.pi * result_test['month'] / 12)
result_test['month_Cos'] = np.cos(2 * np.pi * result_test['month'] / 12)

# Hitung elapsed months berdasarkan tahun minimum di result_test
min_year = result_test['year'].min()
result_test['Elapsed_months'] = (result_test['year'] - min_year) * 12 + result_test['month']

# Quarter & Semester
result_test['Quarter'] = ((result_test['month'] - 1) // 3 + 1).astype(int)
result_test['Semester'] = ((result_test['month'] - 1) // 6 + 1).astype(int)

# Identifikasi Tahun-Bulan dalam format YYYYMM
result_test['yearmonth'] = (result_test['year'] * 100 + result_test['month']).astype(int)


In [54]:
result_test.head()

Unnamed: 0,Precipitation,LST,AAI,CloudFraction,NO2_strat,NO2_total,NO2_trop,TropopausePressure,Kmeans,Precipitation^2,Precipitation LST,Precipitation AAI,LST^2,LST AAI,AAI^2,month,year,day,month_Sin,month_Cos,Elapsed_months,Quarter,Semester,yearmonth
0,3.277529,281.8314,-0.313361,0.771456,2.4e-05,7.5e-05,4.8e-05,-0.862889,0,10.742196,923.710587,-1.027051,79428.938026,-88.315104,0.098195,1,2019,1,0.5,0.866025,1,1,1,201901
1,0.0,280.7216,-0.229512,0.398208,2.3e-05,0.00012,7.3e-05,-0.864959,0,0.0,0.0,-0.0,78804.616707,-64.429036,0.052676,1,2019,1,0.5,0.866025,1,1,1,201901
2,0.0,282.98,-0.470822,0.153694,2.3e-05,0.000171,0.000148,-0.867253,1,0.0,0.0,-0.0,80077.6804,-133.233287,0.221674,1,2019,1,0.5,0.866025,1,1,1,201901
3,1.928031,278.0236,0.132952,0.756917,2.4e-05,0.000266,0.000201,-0.861829,1,3.717303,536.038078,0.256336,77297.122157,36.96388,0.017676,1,2019,1,0.5,0.866025,1,1,1,201901
4,0.0,280.0018,-0.198272,0.678858,2.3e-05,0.000149,0.000124,-0.862601,0,0.0,0.0,-0.0,78401.008003,-55.516618,0.039312,1,2019,1,0.5,0.866025,1,1,1,201901


In [55]:
result_test['target'] = lgbm.predict(result_test)



In [57]:
submission = pd.read_csv('raw_dataset/SampleSubmission.csv')

submission['GT_NO2'] = result_test['target']
submission

Unnamed: 0,ID_Zindi,GT_NO2
0,ID_003WOR,25.881186
1,ID_009Y2B,34.035146
2,ID_01C05M,32.008615
3,ID_0216G3,30.897995
4,ID_023C6G,31.903755
...,...,...
6571,ID_ZVEQ52,36.314812
6572,ID_ZWZ1IA,44.422341
6573,ID_ZX7B4A,25.150565
6574,ID_ZYZLPS,36.182595


In [58]:
submission.to_csv('Submission/Submission_LGBM.csv', index=False)
testing = pd.read_csv('Submission/Submission_LGBM.csv')
testing.head()

Unnamed: 0,ID_Zindi,GT_NO2
0,ID_003WOR,25.881186
1,ID_009Y2B,34.035146
2,ID_01C05M,32.008615
3,ID_0216G3,30.897995
4,ID_023C6G,31.903755
