In [4]:
pip install --upgrade pandas "dask[complete]"

Collecting pandas
  Using cached pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl.metadata (89 kB)
Collecting dask[complete]
  Downloading dask-2024.10.0-py3-none-any.whl.metadata (3.7 kB)
Collecting cloudpickle>=3.0.0 (from dask[complete])
  Downloading cloudpickle-3.1.0-py3-none-any.whl.metadata (7.0 kB)
Collecting partd>=1.4.0 (from dask[complete])
  Downloading partd-1.4.2-py3-none-any.whl.metadata (4.6 kB)
Collecting pyarrow>=14.0.1 (from dask[complete])
  Downloading pyarrow-18.0.0-cp310-cp310-macosx_12_0_arm64.whl.metadata (3.3 kB)
Collecting lz4>=4.3.2 (from dask[complete])
  Downloading lz4-4.3.3-cp310-cp310-macosx_11_0_arm64.whl.metadata (3.7 kB)
Collecting distributed==2024.10.0 (from dask[complete])
  Downloading distributed-2024.10.0-py3-none-any.whl.metadata (3.3 kB)
Collecting dask-expr<1.2,>=1.1 (from dask[complete])
  Downloading dask_expr-1.1.16-py3-none-any.whl.metadata (2.5 kB)
Collecting bokeh>=3.1.0 (from dask[complete])
  Downloading bokeh-3.6.0-py3-none-any.whl.me

    Uninstalling pandas-2.1.1:
      Successfully uninstalled pandas-2.1.1
  Attempting uninstall: dask
    Found existing installation: dask 2022.7.1
    Uninstalling dask-2022.7.1:
      Successfully uninstalled dask-2022.7.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
db-dtypes 1.0.4 requires pandas<2.0dev,>=0.24.2, but you have pandas 2.2.3 which is incompatible.
db-dtypes 1.0.4 requires pyarrow<10.0dev,>=3.0.0, but you have pyarrow 18.0.0 which is incompatible.
mlflow 2.7.1 requires cloudpickle<3, but you have cloudpickle 3.1.0 which is incompatible.
mlflow 2.7.1 requires pyarrow<14,>=4.0.0, but you have pyarrow 18.0.0 which is incompatible.
pandas-gbq 0.17.9 requires pyarrow<10.0dev,>=3.0.0, but you have pyarrow 18.0.0 which is incompatible.
streamlit 1.38.0 requires pillow<11,>=7.1.0, but you have pillow 11.0.0 which is incompatible.
wrds 3.2.0 

In [5]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error


In [8]:
# cleaned datasets with weather data

train_data = pd.read_csv('df_train.csv')
test_data = pd.read_csv('df_test.csv')

train_data.set_index('date', inplace=True)
test_data.set_index('date', inplace=True)

In [9]:
# Step 2: Define target variables and features
target_variables = ['valeur_NO2', 'valeur_CO', 'valeur_O3', 'valeur_PM10', 'valeur_PM25']

# Features available in both train and test sets
common_features = ['temperature', 'humidite', 'vent_moyen', 'vent_direction', 'pluie_1h']

# Time features (assuming they are derived from 'date' and can be computed for the test set)
time_features = ['hour_sin', 'hour_cos', 'day_of_week_sin', 'day_of_week_cos', 'month_sin', 'month_cos']

# Lag features and rolling statistics (not available in test set, we'll handle them separately)
lag_features = [col for col in train_data.columns if 'lag' in col]
rolling_features = [col for col in train_data.columns if 'rolling' in col]

# Interaction features (we can recompute them)
interaction_features = ['NO2_CO_interaction', 'PM10_PM25_interaction']

# All features for training
feature_columns = common_features + time_features + lag_features + rolling_features + interaction_features


In [10]:
# Step 3: Prepare the data for modeling

# Ensure the date index is in datetime format
train_data.index = pd.to_datetime(train_data.index)
test_data.index = pd.to_datetime(test_data.index)

# Generate time features for test_data
test_data['hour'] = test_data.index.hour
test_data['day_of_week'] = test_data.index.dayofweek
test_data['month'] = test_data.index.month

# Create cyclic features
test_data['hour_sin'] = np.sin(2 * np.pi * test_data['hour'] / 24)
test_data['hour_cos'] = np.cos(2 * np.pi * test_data['hour'] / 24)
test_data['day_of_week_sin'] = np.sin(2 * np.pi * test_data['day_of_week'] / 7)
test_data['day_of_week_cos'] = np.cos(2 * np.pi * test_data['day_of_week'] / 7)
test_data['month_sin'] = np.sin(2 * np.pi * test_data['month'] / 12)
test_data['month_cos'] = np.cos(2 * np.pi * test_data['month'] / 12)

# Drop intermediate columns
test_data.drop(['hour', 'day_of_week', 'month'], axis=1, inplace=True)

In [11]:
# Select the final features for modeling
final_features = common_features + time_features + interaction_features


In [12]:
# Handle missing values in train_data
train_data = train_data.dropna()


In [13]:
# Step 4: Implement time series cross-validation
tscv = TimeSeriesSplit(n_splits=5)

In [15]:
from sklearn.model_selection import GridSearchCV
import lightgbm as lgb
from sklearn.metrics import make_scorer, mean_absolute_error

# Define parameter grid for LightGBM
param_grid = {
    'n_estimators': [500, 1000],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [6, 8, 10],
}

# Define the MAE scoring function
mae_scorer = make_scorer(mean_absolute_error, greater_is_better=False)

# Dictionary to store the best parameters for each pollutant
best_params = {}

# Iterate through each pollutant
for pollutant in target_variables:
    print(f"Performing grid search for {pollutant}...")
    
    # Set up LightGBM regressor
    lgbm_reg = lgb.LGBMRegressor(
        objective='regression_l1',
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42
    )
    
    # Set up GridSearchCV
    grid_search = GridSearchCV(estimator=lgbm_reg, param_grid=param_grid, scoring=mae_scorer, cv=3, n_jobs=-1)
    
    # Fit grid search to find the best parameters
    grid_search.fit(X, train_data[pollutant])
    
    # Store best parameters for the target pollutant
    best_params[pollutant] = grid_search.best_params_
    print(f"Best params for {pollutant}: {grid_search.best_params_}")

# Output the best parameters for all pollutants
print("Best parameters for each pollutant:", best_params)

Performing grid search for valeur_NO2...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006500 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 18.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001274 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 18.000000


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003191 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 18.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005626 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 19.799999
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006493 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total 

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.040844 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 19.799999


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003947 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 16.900000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007760 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 18.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.017327 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start t

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001153 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 18.000000




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002984 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 19.799999
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003364 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 16.900000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006097 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start t

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004576 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 19.799999
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006335 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 16.900000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001832 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start t

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001274 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 16.900000


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002921 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 16.900000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013786 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 19.799999
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002328 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train s



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000477 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1664
[LightGBM] [Info] Number of data points in the train set: 40823, number of used features: 13
[LightGBM] [Info] Start training from score 18.200001
Best params for valeur_NO2: {'learning_rate': 0.01, 'max_depth': 10, 'n_estimators': 500}
Performing grid search for valeur_CO...


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006082 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 19.799999
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003146 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 16.900000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.014574 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train s

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.059020 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 18.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004276 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 16.900000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.013570 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train s

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003230 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 19.799999
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000444 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 16.900000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001894 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total 

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.030949 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 19.799999
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.047569 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 16.900000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001494 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start t

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001912 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 0.194000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011983 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 0.218000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016461 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start tra

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001322 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 0.190000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001742 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 0.218000






[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.013286 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 19.799999
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001919 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 0.190000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001292 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start tr

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002946 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 16.900000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006293 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 19.799999
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001229 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train s

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001396 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 0.190000


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005736 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 18.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004781 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 0.218000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001306 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start tr

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010575 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 0.218000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.033256 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 0.218000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009289 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start tra



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.038366 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 0.218000


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.020104 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 0.190000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000901 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 0.194000




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000511 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1664
[LightGBM] [Info] Number of data points in the train set: 40823, number of used features: 13
[LightGBM] [Info] Start training from score 0.199944
Best params for valeur_CO: {'learning_rate': 0.05, 'max_depth': 10, 'n_estimators': 500}
Performing grid search for valeur_O3...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001894 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 0.218000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010482 seconds.
You can set `force_





[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004108 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 18.000000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.040495 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 16.900000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.018120 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train s

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.069747 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 0.190000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.016847 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 0.194000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010045 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start tra



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.012122 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 0.218000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001504 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 50.400002
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001928 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start tr

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000678 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 0.190000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.039056 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 0.194000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022542 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006920 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 0.218000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005788 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 0.218000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002049 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start tra



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.023532 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 49.099998


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.035877 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 0.190000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001882 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 49.099998


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005064 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 18.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004625 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 18.000000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004612 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train s





[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006164 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 0.218000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008634 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 0.194000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002097 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start tra

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001178 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 0.190000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002897 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 0.218000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.025070 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001539 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 53.099998
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003813 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 53.099998
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.006328 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.013295 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 50.400002


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.093113 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 49.099998
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001663 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 53.099998


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001805 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 50.400002
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.050555 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 50.400002


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.045149 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 49.099998






[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000507 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1664
[LightGBM] [Info] Number of data points in the train set: 40823, number of used features: 13
[LightGBM] [Info] Start training from score 51.000000


Best params for valeur_O3: {'learning_rate': 0.05, 'max_depth': 6, 'n_estimators': 500}
Performing grid search for valeur_PM10...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.060222 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 50.400002
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003195 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 53.099998
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004708 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Tota

[LightGBM] [Info] Start training from score 50.400002
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001931 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 53.099998
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001935 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 49.099998




[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002353 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 49.099998
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004178 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 53.099998
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.045348 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start t

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.011791 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 0.190000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001256 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 49.099998
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001173 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train se

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.024602 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 50.400002
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001739 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.064745 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train s

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000446 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003113 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.037354 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total 

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007289 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 17.200001






[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002316 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 50.400002
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.088916 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 49.099998
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003380 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start t

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001042 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 53.099998
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002276 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002473 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start t



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001073 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 0.194000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010928 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 0.218000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011284 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001833 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003257 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.011888 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 17.200001
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.046594 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start t



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004412 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003392 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.022719 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 17.200001


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000485 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1664
[LightGBM] [Info] Number of data points in the train set: 40823, number of used features: 13
[LightGBM] [Info] Start training from score 16.200001
Best params for valeur_PM10: {'learning_rate': 0.01, 'max_depth': 6, 'n_estimators': 1000}
Performing grid search for valeur_PM25...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002423 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.096250 seconds.
You can set `



[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001307 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 49.099998
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 17.200001
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001622 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train s

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.032403 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 49.099998
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001462 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 17.200001
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.021678 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train s

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001207 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.021957 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002310 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train s

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010852 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 17.200001
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002474 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 17.200001
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002556 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total 

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001915 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 9.300000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004654 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 8.400000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.028282 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set

[LightGBM] [Info] Start training from score 9.300000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001399 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 8.400000


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002758 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 17.200001
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.013023 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 17.200001
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001931 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total 

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002312 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 9.300000


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003260 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 49.099998
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002336 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 50.400002
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002343 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start t







[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006601 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 17.200001
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002237 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 9.300000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007917 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train se

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004864 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001808 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 8.400000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.023400 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total B

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.069077 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002645 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002255 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total 



[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004899 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 8.400000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.033736 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 8.400000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003403 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start tra

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.050971 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 9.300000


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001129 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002177 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 9.300000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001216 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total B

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000529 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1664
[LightGBM] [Info] Number of data points in the train set: 40823, number of used features: 13
[LightGBM] [Info] Start training from score 8.700000
Best params for valeur_PM25: {'learning_rate': 0.05, 'max_depth': 10, 'n_estimators': 500}
Best parameters for each pollutant: {'valeur_NO2': {'learning_rate': 0.01, 'max_depth': 10, 'n_estimators': 500}, 'valeur_CO': {'learning_rate': 0.05, 'max_depth': 10, 'n_estimators': 500}, 'valeur_O3': {'learning_rate': 0.05, 'max_depth': 6, 'n_estimators': 500}, 'valeur_PM10': {'learning_rate': 0.01, 'max_depth': 6, 'n_estimators': 1000}, 'valeur_PM25': {'learning_rate': 0.05, 'max_depth': 10, 'n_estimators': 500}}


In [17]:
target_best_params = {
    'valeur_NO2':{'learning_rate': 0.01, 'max_depth': 10, 'n_estimators': 500},
     'valeur_CO':{'learning_rate': 0.05, 'max_depth': 10, 'n_estimators': 500},
    
     'valeur_O3':{'learning_rate': 0.05, 'max_depth': 6, 'n_estimators': 500},
     'valeur_PM10':{'learning_rate': 0.01, 'max_depth': 6, 'n_estimators': 1000},
     'valeur_PM25':{'learning_rate': 0.05, 'max_depth': 10, 'n_estimators': 500},
 
}

In [20]:
# Step 5 (Adjusted): Train separate models for each pollutant

# Prepare the training data
X = train_data[final_features]

# Initialize a dictionary to store models for each pollutant
models = {}
mae_scores = []

# Iterate through each split
for fold, (train_index, val_index) in enumerate(tscv.split(X)):
    print(f"Training fold {fold + 1}...")
    X_train, X_val = X.iloc[train_index], X.iloc[val_index]

    fold_mae = {}
    fold_models = {}

    for pollutant in target_variables:
        print(f"  Training model for {pollutant}...")
        y_train = train_data.iloc[train_index][pollutant]
        y_val = train_data.iloc[val_index][pollutant]

        # Initialize the LightGBM regressor
        lgbm_reg = lgb.LGBMRegressor(
            objective='regression_l1',
            max_depth = target_best_params[pollutant]['max_depth'],
            n_estimators=target_best_params[pollutant]['n_estimators'],
            learning_rate=target_best_params[pollutant]['learning_rate'],
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42
        )

        # Train the model
        lgbm_reg.fit(
            X_train,
            y_train,
            eval_set=[(X_val, y_val)],
            eval_metric='mae',
        )

        # Predict on validation set
        y_pred = lgbm_reg.predict(X_val)

        # Compute MAE
        mae = mean_absolute_error(y_val, y_pred)
        fold_mae[pollutant] = mae
        fold_models[pollutant] = lgbm_reg

    mae_scores.append(fold_mae)

    # After last fold, save the models
    if fold == tscv.get_n_splits() - 1:
        models = fold_models  # Use models from the last fold



Training fold 1...
  Training model for valeur_NO2...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000633 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 6808, number of used features: 13
[LightGBM] [Info] Start training from score 16.200001
  Training model for valeur_CO...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000332 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 6808, number of used features: 13
[LightGBM] [Info] Start training from score 0.175000
  Training model for valeur_O3...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000359 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636



  Training model for valeur_PM10...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000355 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 6808, number of used features: 13
[LightGBM] [Info] Start training from score 17.600000








  Training model for valeur_PM25...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000361 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 6808, number of used features: 13
[LightGBM] [Info] Start training from score 8.700000
Training fold 2...
  Training model for valeur_NO2...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000450 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1647
[LightGBM] [Info] Number of data points in the train set: 13611, number of used features: 13
[LightGBM] [Info] Start training from score 18.700001
  Training model for valeur_CO...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000512 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1



  Training model for valeur_PM10...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000465 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1647
[LightGBM] [Info] Number of data points in the train set: 13611, number of used features: 13
[LightGBM] [Info] Start training from score 17.100000


  Training model for valeur_PM25...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000481 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1647
[LightGBM] [Info] Number of data points in the train set: 13611, number of used features: 13
[LightGBM] [Info] Start training from score 9.100000
Training fold 3...
  Training model for valeur_NO2...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000603 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1658
[LightGBM] [Info] Number of data points in the train set: 20414, number of used features: 13
[LightGBM] [Info] Start training from score 20.299999
  Training model for valeur_CO...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000569 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 

  Training model for valeur_PM10...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000325 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1658
[LightGBM] [Info] Number of data points in the train set: 20414, number of used features: 13
[LightGBM] [Info] Start training from score 17.299999


  Training model for valeur_PM25...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000302 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1658
[LightGBM] [Info] Number of data points in the train set: 20414, number of used features: 13
[LightGBM] [Info] Start training from score 9.500000
Training fold 4...
  Training model for valeur_NO2...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000368 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27217, number of used features: 13
[LightGBM] [Info] Start training from score 19.799999
  Training model for valeur_CO...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the o

  Training model for valeur_O3...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000382 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27217, number of used features: 13
[LightGBM] [Info] Start training from score 49.099998


  Training model for valeur_PM10...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000386 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27217, number of used features: 13
[LightGBM] [Info] Start training from score 17.200001
  Training model for valeur_PM25...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000689 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27217, number of used features: 13
[LightGBM] [Info] Start training from score 9.300000
Training fold 5...
  Training model for valeur_NO2...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000427 seconds.
You can set `force_ro

  Training model for valeur_PM10...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000402 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1664
[LightGBM] [Info] Number of data points in the train set: 34020, number of used features: 13
[LightGBM] [Info] Start training from score 16.700001


  Training model for valeur_PM25...
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000413 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1664
[LightGBM] [Info] Number of data points in the train set: 34020, number of used features: 13
[LightGBM] [Info] Start training from score 9.000000


In [21]:
# Analyze MAE scores
mae_df = pd.DataFrame(mae_scores)
print("MAE scores across folds:")
print(mae_df)
print("\nAverage MAE:")
print(mae_df.mean())

MAE scores across folds:
   valeur_NO2  valeur_CO  valeur_O3  valeur_PM10  valeur_PM25
0    7.047454   0.083592  11.047870     2.615984     1.518367
1    5.779305   0.053169   8.046138     1.682431     1.078483
2    4.221672   0.052725   8.808841     1.658392     0.895112
3    2.603997   0.028785   7.652242     1.606758     0.893285
4    2.344535   0.026093   7.560321     1.213752     0.669432

Average MAE:
valeur_NO2     4.399393
valeur_CO      0.048873
valeur_O3      8.623083
valeur_PM10    1.755464
valeur_PM25    1.010936
dtype: float64


In [22]:
# Step 6: Implement recursive forecasting for the test set

# Initialize test_data with necessary features
test_data_pred = test_data.copy()
test_data_pred[interaction_features] = 0  # Initialize interaction features

# We need to generate lag features and rolling statistics recursively
# We'll start by getting the last known values from the training data

# Get the last known values for lag features
last_known_values = train_data.iloc[-168:]  # Get last 168 hours for lag168

# Initialize an empty DataFrame to store predictions
test_predictions = pd.DataFrame(index=test_data_pred.index, columns=target_variables)

# Iterate over each time step in the test set
for i, timestamp in enumerate(test_data_pred.index):
    # Prepare the input features
    X_test = test_data_pred.loc[[timestamp], common_features + time_features + interaction_features]

    # For the first few steps, we can use the last known lag values from the training set
    if i == 0:
        # Lag 1
        for pollutant in target_variables:
            lag1_value = train_data.iloc[-1][pollutant]
            X_test[f'{pollutant}_lag1'] = lag1_value
        # Lag 24
        if len(train_data) >= 24:
            for pollutant in target_variables:
                lag24_value = train_data.iloc[-24][pollutant]
                X_test[f'{pollutant}_lag24'] = lag24_value
        else:
            # Not enough data for lag24, use last known value
            for pollutant in target_variables:
                X_test[f'{pollutant}_lag24'] = train_data.iloc[-1][pollutant]
        # Lag 168
        if len(train_data) >= 168:
            for pollutant in target_variables:
                lag168_value = train_data.iloc[-168][pollutant]
                X_test[f'{pollutant}_lag168'] = lag168_value
        else:
            # Not enough data for lag168, use last known value
            for pollutant in target_variables:
                X_test[f'{pollutant}_lag168'] = train_data.iloc[-1][pollutant]
        # Rolling means and stds
        for pollutant in target_variables:
            rolling_mean24 = train_data[pollutant].rolling(window=24).mean().iloc[-1]
            rolling_std24 = train_data[pollutant].rolling(window=24).std().iloc[-1]
            rolling_mean168 = train_data[pollutant].rolling(window=168).mean().iloc[-1]
            rolling_std168 = train_data[pollutant].rolling(window=168).std().iloc[-1]
            X_test[f'{pollutant}_rolling_mean24'] = rolling_mean24
            X_test[f'{pollutant}_rolling_std24'] = rolling_std24
            X_test[f'{pollutant}_rolling_mean168'] = rolling_mean168
            X_test[f'{pollutant}_rolling_std168'] = rolling_std168
    else:
        # Use previous predictions for lag features
        for pollutant in target_variables:
            # Lag 1
            X_test[f'{pollutant}_lag1'] = test_predictions.iloc[i - 1][pollutant]
            # Lag 24
            if i >= 24:
                X_test[f'{pollutant}_lag24'] = test_predictions.iloc[i - 24][pollutant]
            else:
                # Use last known value from training data
                X_test[f'{pollutant}_lag24'] = train_data.iloc[-24 + i][pollutant]
            # Lag 168
            if i >= 168:
                X_test[f'{pollutant}_lag168'] = test_predictions.iloc[i - 168][pollutant]
            else:
                # Use last known value from training data
                X_test[f'{pollutant}_lag168'] = train_data.iloc[-168 + i][pollutant]
            # Rolling means and stds
            # For rolling calculations, we'll need to collect previous predictions
            if i < 24:
                recent_values = pd.concat([train_data[pollutant].iloc[-(24 - i):], test_predictions[pollutant].iloc[:i]])
            else:
                recent_values = test_predictions[pollutant].iloc[i - 24:i]
            rolling_mean24 = recent_values.mean()
            rolling_std24 = recent_values.std()
            if i < 168:
                recent_values_168 = pd.concat([train_data[pollutant].iloc[-(168 - i):], test_predictions[pollutant].iloc[:i]])
            else:
                recent_values_168 = test_predictions[pollutant].iloc[i - 168:i]
            rolling_mean168 = recent_values_168.mean()
            rolling_std168 = recent_values_168.std()
            X_test[f'{pollutant}_rolling_mean24'] = rolling_mean24
            X_test[f'{pollutant}_rolling_std24'] = rolling_std24
            X_test[f'{pollutant}_rolling_mean168'] = rolling_mean168
            X_test[f'{pollutant}_rolling_std168'] = rolling_std168

    # Update interaction features
    X_test['NO2_CO_interaction'] = X_test['valeur_NO2_lag1'] * X_test['valeur_CO_lag1']
    X_test['PM10_PM25_interaction'] = X_test['valeur_PM10_lag1'] * X_test['valeur_PM25_lag1']

    # Ensure all required features are present
    for col in feature_columns:
        if col not in X_test.columns:
            X_test[col] = 0  # or appropriate default value

    # Predict for each pollutant
    for pollutant in target_variables:
        model = models[pollutant]
        prediction = model.predict(X_test[final_features])[0]
        test_predictions.loc[timestamp, pollutant] = prediction

    # Update test_data_pred with the new interaction features (if necessary)
    # test_data_pred.loc[timestamp, 'NO2_CO_interaction'] = X_test['NO2_CO_interaction']
    # test_data_pred.loc[timestamp, 'PM10_PM25_interaction'] = X_test['PM10_PM25_interaction']






























ve gain, best gain: -inf
ve gain, best gain: -inf
urther splits with positive gain, best gain: -inf
m_leaves OR 2^max_depth > num_leaves. (num_leaves=31).


















[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.008845 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 8.600000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.082870 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 9.300000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003312 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.046531 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 8.600000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007012 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 8.600000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007822 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start tra

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004184 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 9.300000


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.014224 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 8.600000


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044332 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002076 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 8.400000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.003175 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train se

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.007414 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 15.800000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001460 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 8.600000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005346 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train se

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001013 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 9.300000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.010441 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 8.400000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002046 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start tra

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009906 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 8.600000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.005226 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1652
[LightGBM] [Info] Number of data points in the train set: 27215, number of used features: 13
[LightGBM] [Info] Start training from score 8.400000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.018944 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start tra

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.025090 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1662
[LightGBM] [Info] Number of data points in the train set: 27216, number of used features: 13
[LightGBM] [Info] Start training from score 9.300000


In [24]:
test_predictions = test_predictions.reset_index().rename(columns={'date': 'id'})


In [34]:
test_predictions['id'] = pd.to_datetime(test_predictions['id']).dt.strftime('%Y-%m-%d %H')

In [36]:
test_predictions.to_csv('test_predictions.csv', index=False)