In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import lightgbm as lgb
from statsmodels.tsa.stattools import adfuller
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.transformations.series.summarize import WindowSummarizer
from sktime.forecasting.compose import make_reduction
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [2]:
data = pd.read_csv("../data/HMIS_DATA_CORRECTED_17_21/mh_dist17_21_with_IDs_date_correction.csv")
data = data[(data['indicator_type'] == 'Total [(A+B) or (C+D)]')]
data['date'] = pd.to_datetime(data['date'])
data = data.set_index('date')
data.index = pd.DatetimeIndex(data.index)

In [5]:
def lightgbm_forecast_lags(series, district_name, 
                          max_lags=6, 
                          window_features=None,
                          differencing=True,
                          use_cv=True,
                          custom_params=None):
    """
    LightGBM time series forecasting with proper temporal validation
    """
    # Create output directory
    os.makedirs('LightGBM_Forecasts', exist_ok=True)
    
    # 1. Stationarity handling
    original_series = series.copy()
    d = 0
    if differencing:
        adf_result = adfuller(series.dropna())
        if adf_result[1] > 0.05:
            d = 1
            series = series.diff().dropna()

    # 2. Temporal split using sktime's proper method
    y_train, y_test = temporal_train_test_split(series, test_size=0.2)

    # 3. Create feature engineering pipeline
    features = []
    if max_lags > 0:
        features.append(("lags", WindowSummarizer(
            lag_feature={"lag": list(range(1, max_lags+1))},
            truncate="bfill"
        )))
    
    if window_features:
        features.append(("window", WindowSummarizer(
            lag_feature={
                "mean": [[1, window_features['mean_window']]],
                "std": [[1, window_features['std_window']]]
            },
            truncate="bfill"
        )))

    # 4. LightGBM regressor setup
    regressor = lgb.LGBMRegressor(
        **custom_params or {
            'objective': 'regression',
            'metric': 'rmse',
            'num_leaves': 31,
            'learning_rate': 0.05,
            'n_estimators': 500,
            'random_state': 42
        }
    )
    
    # 5. Create forecasting pipeline
    forecaster = make_reduction(
        estimator=Pipeline([
            ("features", Pipeline(features)),
            ("regressor", regressor)
        ]),
        window_length=max_lags,
        strategy="recursive"
    )

    # 6. Hyperparameter tuning with temporal CV
    if use_cv:
        param_grid = {
            'estimator__regressor__num_leaves': [15, 31, 63],
            'estimator__regressor__learning_rate': [0.01, 0.05, 0.1],
            'estimator__regressor__min_child_samples': [10, 20, 50]
        }
        
        forecaster = GridSearchCV(
            forecaster,
            param_grid,
            cv=TimeSeriesSplit(n_splits=5),
            scoring='neg_mean_squared_error',
            n_jobs=-1
        )

    # 7. Training
    forecaster.fit(y_train)
    
    # 8. Forecasting
    fh = np.arange(len(y_test)) + 1  # Forecast horizon
    y_pred = forecaster.predict(fh)
    
    # 9. Inverse differencing
    if d == 1:
        last_train_value = original_series.iloc[-len(y_test)-1]
        y_pred = pd.Series(np.cumsum(y_pred) + last_train_value, index=y_test.index)
        y_test = original_series.iloc[-len(y_test):]

    # 10. Create results dataframe
    forecast_df = pd.DataFrame({
        'district': district_name,
        'date': y_test.index,
        'actual': y_test.values,
        'forecast': y_pred.values
    })

    # 11. Save results
    forecast_df.to_csv(
        f'LightGBM_Forecasts/{district_name}_forecast.csv',
        index=False
    )

    # 12. Calculate metrics
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    metrics = {
        'district': district_name,
        'rmse': rmse,
        'best_params': getattr(forecaster, 'best_params_', None),
        'differencing': d
    }

    # 13. Visualization
    plt.figure(figsize=(14, 7))
    plt.plot(original_series.index, original_series, label='Original')
    plt.plot(forecast_df['date'], forecast_df['forecast'], 
            label='LightGBM Forecast', alpha=0.7)
    plt.title(f'{district_name} Forecast\nRMSE: {rmse:.2f}')
    plt.legend()
    plt.savefig(f'LightGBM_Forecasts/{district_name}_forecast_plot.png')
    plt.close()

    return metrics, forecast_df

In [6]:
districts = data['district'].unique()
rmse_values = []

# Usage Example


def run_for_each_district():
    results = {}
    
    for district in districts:
        district_data = data[data['district'] == district]
        ts = district_data["I1"].asfreq('MS')
        
        results, forecast = lightgbm_forecast_lags(
        series=ts,
        district_name=district,
        max_lags=6,
        window_features={'mean_window': 3, 'std_window': 6},
        custom_params={
            'num_leaves': 63,
            'learning_rate': 0.1,
            'n_estimators': 1000
        }
    )

        
    
        # Show results
        print("=== Metrics ===")
        print(results)
        print("\n=== Forecast Data ===")
        print(forecast)
    
    return results
run_for_each_district()

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000194 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 137
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 6858.406250
=== Metrics ===
{'district': 'AHMEDNAGAR', 'rmse': 626.1086564173092, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
     district       date  actual     forecast
0  AHMEDNAGAR 2020-06-01    6875  6626.946644
1  AHMEDNAGAR 2020-07-01    6478  6565.778252
2  AHMEDNAGAR 2020-08-01    5975  6560.345285
3  AHMEDNAGAR 2020-09-01    6550  7090.714364
4  AHMEDNAGAR 2020-10-01    6471  7059.701199
5  AHMEDNAGAR 2020-11-01    6107  6893.510113
6  AHMEDNAGAR 2020-12-01    7460  6424.814822
7  AHMEDNAGAR 2021-01-01    66

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000172 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 138
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 2684.531250
=== Metrics ===
{'district': 'AKOLA', 'rmse': 289.6626258502864, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0    AKOLA 2020-06-01    2999  2749.619517
1    AKOLA 2020-07-01    3121  2853.716739
2    AKOLA 2020-08-01    3029  2737.900894
3    AKOLA 2020-09-01    2763  2497.774749
4    AKOLA 2020-10-01    2737  2673.861628
5    AKOLA 2020-11-01    2700  2752.857362
6    AKOLA 2020-12-01    2933  2742.850168
7    AKOLA 2021-01-01    3111  2708.168665
8    AKOLA 2021-

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000163 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 135
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 3841.687500
=== Metrics ===
{'district': 'AMRAVATI', 'rmse': 509.9665843542814, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
   district       date  actual     forecast
0  AMRAVATI 2020-06-01    3883  3752.567599
1  AMRAVATI 2020-07-01    3459  3549.336165
2  AMRAVATI 2020-08-01    3049  3637.991354
3  AMRAVATI 2020-09-01    3325  3559.622165
4  AMRAVATI 2020-10-01    3191  3911.530465
5  AMRAVATI 2020-11-01    3502  4159.812462
6  AMRAVATI 2020-12-01    4172  3777.170519
7  AMRAVATI 2021-01-01    4458  3801.871957
8  A

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000201 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 135
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 6534.843750
=== Metrics ===
{'district': 'AURANGABAD', 'rmse': 448.2010414283846, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
     district       date  actual     forecast
0  AURANGABAD 2020-06-01    7193  6414.787633
1  AURANGABAD 2020-07-01    6028  6503.431628
2  AURANGABAD 2020-08-01    6173  6414.787633
3  AURANGABAD 2020-09-01    6644  6503.431628
4  AURANGABAD 2020-10-01    6246  6425.694205
5  AURANGABAD 2020-11-01    5765  6503.431628
6  AURANGABAD 2020-12-01    6760  6514.338200
7  AURANGABAD 2021-01-01    66

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000146 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 135
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 3939.187500
=== Metrics ===
{'district': 'BEED', 'rmse': 455.32772353310315, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0     BEED 2020-06-01    3954  3670.314002
1     BEED 2020-07-01    3821  3523.996908
2     BEED 2020-08-01    3261  3911.613461
3     BEED 2020-09-01    3819  3407.401177
4     BEED 2020-10-01    3751  3819.035446
5     BEED 2020-11-01    3332  4322.389273
6     BEED 2020-12-01    4156  3986.26

27 fits failed out of a total of 135.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
27 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 893, in _fit_and_score
    estimator.fit(X_train, **fit_params)
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sktime\forecasting\base\_base.py", line 396, in fit
    self._fit(y=y_inner, X=X_inner, fh=fh)
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sktime\forecasting\compose\_reduce.py", line 972, in _fit
    self.estimator_.fit(Xt, yt)
  File "c:\Users\nauti\AppData\Local\Programs\P

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000187 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 137
[LightGBM] [Info] Number of data points in the train set: 31, number of used features: 12
[LightGBM] [Info] Start training from score 9.258065
=== Metrics ===
{'district': 'BHANDARA', 'rmse': 353.5273879835007, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
   district       date  actual     forecast
0  BHANDARA 2020-06-01    1341  1613.675895
1  BHANDARA 2020-07-01    1262  1549.184033
2  BHANDARA 2020-08-01    1185  1543.269836
3  BHANDARA 2020-09-01    1200  1628.514999
4  BHANDARA 2020-10-01    1233  1620.903982
5  BHANDARA 2020-11-01    1093  1678.558703
6  BHANDARA 2020-12-01    1395  1726.510626
7  BHANDARA 2021-01-01    1416  1757.738260
8  BHAN

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000254 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 138
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 4129.781250
=== Metrics ===
{'district': 'BULDHANA', 'rmse': 406.76780554576396, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
   district       date  actual     forecast
0  BULDHANA 2020-06-01    4320  4088.890666
1  BULDHANA 2020-07-01    3900  4062.379643
2  BULDHANA 2020-08-01    3612  4338.190966
3  BULDHANA 2020-09-01    4073  4361.491472
4  BULDHANA 2020-10-01    3614  3994.238929
5  BULDHANA 2020-11-01    3484  4258.870483
6  BULDHANA 2020-12-01    4289  4246.891168
7  BULDHANA 2021-01-01    4304  3937.240994
8  

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000232 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 139
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 2668.000000
=== Metrics ===
{'district': 'CHANDRAPUR', 'rmse': 322.4757225639032, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
     district       date  actual     forecast
0  CHANDRAPUR 2020-06-01    2500  2728.620437
1  CHANDRAPUR 2020-07-01    2316  2542.222768
2  CHANDRAPUR 2020-08-01    2209  2473.983329
3  CHANDRAPUR 2020-09-01    2247  2394.398527
4  CHANDRAPUR 2020-10-01    2187  2517.101036
5  CHANDRAPUR 2020-11-01    2220  2617.304090
6  CHANDRAPUR 2020-12-01    2940  2658.776363
7  CHANDRAPUR 2021-01-01    30

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000232 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 138
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 3710.843750
=== Metrics ===
{'district': 'DHULE', 'rmse': 452.6613671416718, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0    DHULE 2020-06-01    3995  4037.878524
1    DHULE 2020-07-01    3857  3936.125240
2    DHULE 2020-08-01    3076  3836.884324
3    DHULE 2020-09-01    3376  3945.113571
4    DHULE 2020-10-01    3157  3896.478092
5    DHULE 2020-11-01    3146  3794.676955
6    DHULE 2020-12-01    3665  3742.563140
7    DHULE 2021-01-01    3484  3698.179848
8    DHULE 2021-

27 fits failed out of a total of 135.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
27 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 893, in _fit_and_score
    estimator.fit(X_train, **fit_params)
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sktime\forecasting\base\_base.py", line 396, in fit
    self._fit(y=y_inner, X=X_inner, fh=fh)
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sktime\forecasting\compose\_reduce.py", line 972, in _fit
    self.estimator_.fit(Xt, yt)
  File "c:\Users\nauti\AppData\Local\Programs\P

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000194 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 137
[LightGBM] [Info] Number of data points in the train set: 31, number of used features: 12
[LightGBM] [Info] Start training from score 20.709677
=== Metrics ===
{'district': 'GADCHIROLI', 'rmse': 325.34872284379463, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
     district       date  actual     forecast
0  GADCHIROLI 2020-06-01    1678  1925.890346
1  GADCHIROLI 2020-07-01    1431  1873.631814
2  GADCHIROLI 2020-08-01    1224  1754.305723
3  GADCHIROLI 2020-09-01    1361  1643.228485
4  GADCHIROLI 2020-10-01    1251  1577.924235
5  GADCHIROLI 2020-11-01    1256  1627.328278
6  GADCHIROLI 2020-12-01    1632  1603.945609
7  GADCHIROLI 2021-01-01    168

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000186 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 134
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 1671.875000
=== Metrics ===
{'district': 'GONDIA', 'rmse': 184.9206932233656, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0   GONDIA 2020-06-01    1680  1715.692463
1   GONDIA 2020-07-01    1703  1680.275424
2   GONDIA 2020-08-01    1383  1545.622523
3   GONDIA 2020-09-01    1445  1566.314455
4   GONDIA 2020-10-01    1472  1530.451849
5   GONDIA 2020-11-01    1519  1576.165995
6   GONDIA 2020-12-01    1528  1805.299577
7   GONDIA 2021-01-01    1531  1718.422547
8   GONDIA 2021

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000226 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 134
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 1953.218750
=== Metrics ===
{'district': 'HINGOLI', 'rmse': 171.56659120694826, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0  HINGOLI 2020-06-01    1988  1903.054678
1  HINGOLI 2020-07-01    1886  1968.865717
2  HINGOLI 2020-08-01    1692  1993.644172
3  HINGOLI 2020-09-01    2019  1835.627104
4  HINGOLI 2020-10-01    1976  1899.704820
5  HINGOLI 2020-11-01    1722  1981.130529
6  HINGOLI 2020-12-01    1949  1885.972651
7  HINGOLI 2021-01-01    1975  2115.463112
8  HINGOLI 20

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000345 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 139
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 6811.937500
=== Metrics ===
{'district': 'JALGAON', 'rmse': 882.0669387779428, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0  JALGAON 2020-06-01    6888  6708.501708
1  JALGAON 2020-07-01    6176  7029.216352
2  JALGAON 2020-08-01    5950  7194.216592
3  JALGAON 2020-09-01    6369  7099.130543
4  JALGAON 2020-10-01    6370  7755.350293
5  JALGAON 2020-11-01    6429  7520.796494
6  JALGAON 2020-12-01    7344  6822.519657
7  JALGAON 2021-01-01    7376  7155.932116
8  JALGAON 202

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000312 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 137
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 3557.718750
=== Metrics ===
{'district': 'JALNA', 'rmse': 313.3244367616019, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0    JALNA 2020-06-01    3865  3493.711533
1    JALNA 2020-07-01    3234  3526.893432
2    JALNA 2020-08-01    3247  3449.601890
3    JALNA 2020-09-01    3401  3578.979104
4    JALNA 2020-10-01    3244  3691.682480
5    JALNA 2020-11-01    3164  3693.705837
6    JALNA 2020-12-01    3529  3596.076650
7    JALNA 2021-01-01    3637  3553.921248
8    JALNA 2021-

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000264 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 133
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 5159.593750
=== Metrics ===
{'district': 'KOLHAPUR', 'rmse': 382.281685274919, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
   district       date  actual     forecast
0  KOLHAPUR 2020-06-01    4861  5211.459116
1  KOLHAPUR 2020-07-01    4670  5045.524849
2  KOLHAPUR 2020-08-01    4437  5114.395186
3  KOLHAPUR 2020-09-01    5026  5208.644091
4  KOLHAPUR 2020-10-01    4973  4986.337637
5  KOLHAPUR 2020-11-01    4789  5282.653099
6  KOLHAPUR 2020-12-01    5795  5414.004489
7  KOLHAPUR 2021-01-01    5111  5346.394790
8  KO

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000365 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 137
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 3893.937500
=== Metrics ===
{'district': 'LATUR', 'rmse': 482.35064640361253, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0    LATUR 2020-06-01    3989  3488.918873
1    LATUR 2020-07-01    3694  3753.475221
2    LATUR 2020-08-01    3498  3887.018598
3    LATUR 2020-09-01    3654  3666.213702
4    LATUR 2020-10-01    3751  3881.130389
5    LATUR 2020-11-01    3673  4104.367604
6    LATUR 2020-12-01    4091  3643.185571
7    LATUR 2021-01-01    4179  3479.512748
8    LATUR 2021

27 fits failed out of a total of 135.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
27 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 893, in _fit_and_score
    estimator.fit(X_train, **fit_params)
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sktime\forecasting\base\_base.py", line 396, in fit
    self._fit(y=y_inner, X=X_inner, fh=fh)
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sktime\forecasting\compose\_reduce.py", line 972, in _fit
    self.estimator_.fit(Xt, yt)
  File "c:\Users\nauti\AppData\Local\Programs\P

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000196 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 138
[LightGBM] [Info] Number of data points in the train set: 31, number of used features: 12
[LightGBM] [Info] Start training from score -658.225806
=== Metrics ===
{'district': 'MUMBAI', 'rmse': 4362.654061448858, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
  district       date  actual     forecast
0   MUMBAI 2020-06-01    2613   759.171798
1   MUMBAI 2020-07-01    2916  1668.986919
2   MUMBAI 2020-08-01    2676  2717.302814
3   MUMBAI 2020-09-01    3035  5212.210244
4   MUMBAI 2020-10-01    2924  4684.839069
5   MUMBAI 2020-11-01    3608  2525.039802
6   MUMBAI 2020-12-01    4419  2312.7

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000162 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 136
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 7025.875000
=== Metrics ===
{'district': 'NAGPUR', 'rmse': 1260.8329541613934, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0   NAGPUR 2020-06-01    7368  7219.858911
1   NAGPUR 2020-07-01    7018  7159.831784
2   NAGPUR 2020-08-01    5585  7264.904099
3   NAGPUR 2020-09-01    5078  7586.603825
4   NAGPUR 2020-10-01    5618  7159.507885
5   NAGPUR 2020-11-01    6429  7237.867249
6   NAGPUR 2020-12-01    7872  7548.824709
7   NAGPUR 2021-01-01    7618  7500.279934
8   NAGPUR 202

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000263 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 136
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 5575.250000
=== Metrics ===
{'district': 'NANDED', 'rmse': 468.2190025699603, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0   NANDED 2020-06-01    5423  5539.650847
1   NANDED 2020-07-01    5261  5740.972863
2   NANDED 2020-08-01    4984  5843.561123
3   NANDED 2020-09-01    5871  5863.211119
4   NANDED 2020-10-01    5214  5590.426213
5   NANDED 2020-11-01    5237  5407.148087
6   NANDED 2020-12-01    6097  5445.903274
7   NANDED 2021-01-01    5865  5383.031964
8   NANDED 2021

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000125 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 138
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 3146.250000
=== Metrics ===
{'district': 'NANDURBAR', 'rmse': 545.5536780216936, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
    district       date  actual     forecast
0  NANDURBAR 2020-06-01    4043  3048.171507
1  NANDURBAR 2020-07-01    3577  2988.357370
2  NANDURBAR 2020-08-01    2901  3371.957972
3  NANDURBAR 2020-09-01    3040  3296.654337
4  NANDURBAR 2020-10-01    2588  3286.701801
5  NANDURBAR 2020-11-01    2581  2978.068154
6  NANDURBAR 2020-12-01    2989  3595.828240
7  NANDURBAR 2021-01-01    2803  3350.4

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000165 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 139
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 10788.656250
=== Metrics ===
{'district': 'NASHIK', 'rmse': 591.6867965581116, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual      forecast
0   NASHIK 2020-06-01   10644  10603.126465
1   NASHIK 2020-07-01   10668  10290.199245
2   NASHIK 2020-08-01    9838  10508.616057
3   NASHIK 2020-09-01   10420   9980.918152
4   NASHIK 2020-10-01   10365  10168.278920
5   NASHIK 2020-11-01    9966   9980.918152
6   NASHIK 2020-12-01   11339  10525.663438
7   NASHIK 2021-01-01   11056  10290.199245
8   N

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000152 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 135
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 2449.406250
=== Metrics ===
{'district': 'OSMANABAD', 'rmse': 319.9143373127787, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
    district       date  actual     forecast
0  OSMANABAD 2020-06-01    2582  2383.016046
1  OSMANABAD 2020-07-01    2273  2501.149227
2  OSMANABAD 2020-08-01    1935  2694.565497
3  OSMANABAD 2020-09-01    2241  2437.149259
4  OSMANABAD 2020-10-01    2207  2204.867224
5  OSMANABAD 2020-11-01    2274  2318.690157
6  OSMANABAD 2020-12-01    2540  2512.576654
7  OSMANABAD 2021-01-01    2519  2520.9

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000203 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 139
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 5201.281250
=== Metrics ===
{'district': 'PALGHAR', 'rmse': 723.3869131440182, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0  PALGHAR 2020-06-01    5473  5156.809227
1  PALGHAR 2020-07-01    4867  5356.666275
2  PALGHAR 2020-08-01    4606  5396.966341
3  PALGHAR 2020-09-01    4753  5485.826172
4  PALGHAR 2020-10-01    4616  5597.252293
5  PALGHAR 2020-11-01    4554  5368.452781
6  PALGHAR 2020-12-01    5005  5816.701580
7  PALGHAR 2021-01-01    5509  5466.666825
8  PALGHAR 202

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000238 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 138
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 3062.312500
=== Metrics ===
{'district': 'PARBHANI', 'rmse': 439.51762690835056, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
   district       date  actual     forecast
0  PARBHANI 2020-06-01    3561  2909.144089
1  PARBHANI 2020-07-01    3186  3175.560962
2  PARBHANI 2020-08-01    2743  2821.189940
3  PARBHANI 2020-09-01    3099  2909.144089
4  PARBHANI 2020-10-01    2920  3076.831624
5  PARBHANI 2020-11-01    2538  3262.176405
6  PARBHANI 2020-12-01    3472  2909.144089
7  PARBHANI 2021-01-01    3279  3136.530392
8  

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000196 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 137
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 16314.250000
=== Metrics ===
{'district': 'PUNE', 'rmse': 1312.901814885667, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual      forecast
0     PUNE 2020-06-01   16981  16410.005595
1     PUNE 2020-07-01   14939  16415.769065
2     PUNE 2020-08-01   14322  16222.638722
3     PUNE 2020-09-01   16328  16093.720180
4     PUNE 2020-10-01   15750  16093.720180
5     PUNE 2020-11-01   15551  15816.910426
6     PUNE 2020-12-01   18113  16415.769065
7     PUNE 2021-01-01   17234  16415.769065
8     P

27 fits failed out of a total of 135.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
27 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 893, in _fit_and_score
    estimator.fit(X_train, **fit_params)
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sktime\forecasting\base\_base.py", line 396, in fit
    self._fit(y=y_inner, X=X_inner, fh=fh)
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sktime\forecasting\compose\_reduce.py", line 972, in _fit
    self.estimator_.fit(Xt, yt)
  File "c:\Users\nauti\AppData\Local\Programs\P

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000287 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 129
[LightGBM] [Info] Number of data points in the train set: 31, number of used features: 12
[LightGBM] [Info] Start training from score 30.709677
=== Metrics ===
{'district': 'RAIGAD', 'rmse': 784.1557463081392, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
  district       date  actual     forecast
0   RAIGAD 2020-06-01    3940  3969.818967
1   RAIGAD 2020-07-01    3465  4449.067669
2   RAIGAD 2020-08-01    3013  4306.246911
3   RAIGAD 2020-09-01    3412  4783.895238
4   RAIGAD 2020-10-01    3704  4573.796497
5   RAIGAD 2020-11-01    3998  4783.077170
6   RAIGAD 2020-12-01    5251  4768.695562
7   RAIGAD 2021-01-01    4693  4669.718146
8   RAIGAD 2021-0

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000124 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 136
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 1478.437500
=== Metrics ===
{'district': 'RATNAGIRI', 'rmse': 258.90603151961045, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
    district       date  actual     forecast
0  RATNAGIRI 2020-06-01    1939  1525.136747
1  RATNAGIRI 2020-07-01    1742  1316.197769
2  RATNAGIRI 2020-08-01    1206  1313.491348
3  RATNAGIRI 2020-09-01    1279  1460.608707
4  RATNAGIRI 2020-10-01    1186  1425.411219
5  RATNAGIRI 2020-11-01    1074  1424.006591
6  RATNAGIRI 2020-12-01    1108  1292.840924
7  RATNAGIRI 2021-01-01    1177  1288.

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000250 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 136
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 4170.968750
=== Metrics ===
{'district': 'SANGLI', 'rmse': 271.95219071157806, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0   SANGLI 2020-06-01    4359  4118.128974
1   SANGLI 2020-07-01    4264  4220.759889
2   SANGLI 2020-08-01    3874  4166.414270
3   SANGLI 2020-09-01    3891  4381.622850
4   SANGLI 2020-10-01    3891  4158.882522
5   SANGLI 2020-11-01    3821  4138.447248
6   SANGLI 2020-12-01    4539  4354.936109
7   SANGLI 2021-01-01    4333  4118.662860
8   SANGLI 202

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000255 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 132
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 4054.937500
=== Metrics ===
{'district': 'SATARA', 'rmse': 503.11305213933053, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0   SATARA 2020-06-01    4063  4100.427894
1   SATARA 2020-07-01    3915  3937.716029
2   SATARA 2020-08-01    3233  4008.670405
3   SATARA 2020-09-01    3103  4203.729114
4   SATARA 2020-10-01    3760  4118.183041
5   SATARA 2020-11-01    3582  3863.170649
6   SATARA 2020-12-01    4595  3894.296270
7   SATARA 2021-01-01    4175  4175.870872
8   SATARA 202

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000268 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 135
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 668.406250
=== Metrics ===
{'district': 'SINDHUDURG', 'rmse': 106.78197635294067, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
     district       date  actual    forecast
0  SINDHUDURG 2020-06-01     792  683.520908
1  SINDHUDURG 2020-07-01     721  707.528025
2  SINDHUDURG 2020-08-01     549  667.568492
3  SINDHUDURG 2020-09-01     579  701.228104
4  SINDHUDURG 2020-10-01     530  676.900632
5  SINDHUDURG 2020-11-01     476  638.239105
6  SINDHUDURG 2020-12-01     592  708.647399
7  SINDHUDURG 2021-01-01     618  706.

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000185 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 136
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 7066.625000
=== Metrics ===
{'district': 'SOLAPUR', 'rmse': 488.0965012443051, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0  SOLAPUR 2020-06-01    6951  6910.500694
1  SOLAPUR 2020-07-01    6858  6973.115522
2  SOLAPUR 2020-08-01    6356  7044.131027
3  SOLAPUR 2020-09-01    6758  6910.500694
4  SOLAPUR 2020-10-01    6321  6811.169567
5  SOLAPUR 2020-11-01    6058  7172.859043
6  SOLAPUR 2020-12-01    7648  7367.212365
7  SOLAPUR 2021-01-01    7550  7279.395133
8  SOLAPUR 202

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000130 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 135
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 13838.343750
=== Metrics ===
{'district': 'THANE', 'rmse': 1592.825811830195, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual      forecast
0    THANE 2020-06-01   13519  14410.098571
1    THANE 2020-07-01   12050  13902.458267
2    THANE 2020-08-01   12215  15022.412233
3    THANE 2020-09-01   13770  13712.342979
4    THANE 2020-10-01   13402  13652.357404
5    THANE 2020-11-01   13995  13388.640348
6    THANE 2020-12-01   14285  13605.619152
7    THANE 2021-01-01   15181  14360.889815
8    T

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000186 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 136
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 1587.656250
=== Metrics ===
{'district': 'WARDHA', 'rmse': 241.71009781248944, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0   WARDHA 2020-06-01    1435  1523.560723
1   WARDHA 2020-07-01    1452  1546.836291
2   WARDHA 2020-08-01    1358  1583.327999
3   WARDHA 2020-09-01    1360  1667.454443
4   WARDHA 2020-10-01    1306  1604.004646
5   WARDHA 2020-11-01    1196  1749.054095
6   WARDHA 2020-12-01    1561  1599.

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000244 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 137
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 1663.500000
=== Metrics ===
{'district': 'WASHIM', 'rmse': 192.3923022587395, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0   WASHIM 2020-06-01    1583  1743.238876
1   WASHIM 2020-07-01    1629  1620.706516
2   WASHIM 2020-08-01    1502  1726.904404
3   WASHIM 2020-09-01    1599  1814.043668
4   WASHIM 2020-10-01    1501  1681.239079
5   WASHIM 2020-11-01    1453  1847.371243
6   WASHIM 2020-12-01    1839  1672.669891
7   WASHIM 2021-01-01    1658  1676.452225
8   WASHIM 2021

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000160 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 134
[LightGBM] [Info] Number of data points in the train set: 32, number of used features: 12
[LightGBM] [Info] Start training from score 3851.750000
=== Metrics ===
{'district': 'YAVATMAL', 'rmse': 252.1196570399129, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
   district       date  actual     forecast
0  YAVATMAL 2020-06-01    3616  4038.108428
1  YAVATMAL 2020-07-01    3673  3889.812259
2  YAVATMAL 2020-08-01    3425  3838.748087
3  YAVATMAL 2020-09-01    3507  3631.643553
4  YAVATMAL 2020-10-01    3562  3485.401754
5  YAVATMAL 2020-11-01    3501  3353.137635
6  YAVATMAL 2020-12-01    3918  3505.759121
7  YAVATMAL 2021-01-01    3851  3810.221313
8  Y

108 fits failed out of a total of 135.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
81 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 893, in _fit_and_score
    estimator.fit(X_train, **fit_params)
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sktime\forecasting\base\_base.py", line 396, in fit
    self._fit(y=y_inner, X=X_inner, fh=fh)
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sktime\forecasting\compose\_reduce.py", line 963, in _fit
    yt, Xt = self._transform(y, X)
             ^^^^^^^^^^^^^^^^^^^^^
  File "

[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 3, number of used features: 0
[LightGBM] [Info] Start training from score 12741.666667
=== Metrics ===
{'district': 'MUMBAI SUBURBAN', 'rmse': 591.2353357354607, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
          district       date  actual      forecast
0  MUMBAI SUBURBAN 2021-01-01   13242  12741.666667
1  MUMBAI SUBURBAN 2021-02-01   13444  12741.666667
2  MUMBAI SUBURBAN 2021-03-01   13294  12741.666667


{'district': 'MUMBAI SUBURBAN',
 'rmse': 591.2353357354607,
 'best_params': {'estimator__regressor__learning_rate': 0.01,
  'estimator__regressor__min_child_samples': 10,
  'estimator__regressor__num_leaves': 15},
 'differencing': 0}

In [None]:
# !pip install "dask<2025.1"
