In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import lightgbm as lgb
from statsmodels.tsa.stattools import adfuller
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.transformations.series.summarize import WindowSummarizer
from sktime.forecasting.compose import make_reduction
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [2]:
data = pd.read_csv("../data/HMIS_DATA_CORRECTED_17_21/mh_dist17_21_with_IDs_date_correction.csv")
data = data[(data['indicator_type'] == 'Total [(A+B) or (C+D)]')]
data['date'] = pd.to_datetime(data['date'])
data = data.set_index('date')
data.index = pd.DatetimeIndex(data.index)

In [3]:
def lightgbm_forecast_lags(series, district_name, 
                          max_lags=6, 
                          window_features=None,
                          differencing=True,
                          use_cv=True,
                          custom_params=None):
    """
    LightGBM time series forecasting with proper temporal validation
    """
    # Create output directory
    os.makedirs('LightGBM_Forecasts', exist_ok=True)
    
    # 1. Stationarity handling
    original_series = series.copy()
    d = 0
    if differencing:
        adf_result = adfuller(series.dropna())
        if adf_result[1] > 0.05:
            d = 1
            series = series.diff().dropna()

    # 2. Temporal split using sktime's proper method
    y_train, y_test = temporal_train_test_split(series, test_size=0.2)

    # 3. Create feature engineering pipeline
    features = []
    if max_lags > 0:
        features.append(("lags", WindowSummarizer(
            lag_feature={"lag": list(range(1, max_lags+1))},
            truncate="bfill"
        )))
    
    if window_features:
        features.append(("window", WindowSummarizer(
            lag_feature={
                "mean": [[1, window_features['mean_window']]],
                "std": [[1, window_features['std_window']]]
            },
            truncate="bfill"
        )))

    # 4. LightGBM regressor setup
    regressor = lgb.LGBMRegressor(
        **custom_params or {
            'objective': 'regression',
            'metric': 'rmse',
            'num_leaves': 31,
            'learning_rate': 0.05,
            'n_estimators': 500,
            'random_state': 42
        }
    )
    
    # 5. Create forecasting pipeline
    forecaster = make_reduction(
        estimator=Pipeline([
            ("features", Pipeline(features)),
            ("regressor", regressor)
        ]),
        window_length=max_lags,
        strategy="recursive"
    )

    # 6. Hyperparameter tuning with temporal CV
    if use_cv:
        param_grid = {
            'estimator__regressor__num_leaves': [15, 31, 63],
            'estimator__regressor__learning_rate': [0.01, 0.05, 0.1],
            'estimator__regressor__min_child_samples': [10, 20, 50]
        }
        
        forecaster = GridSearchCV(
            forecaster,
            param_grid,
            cv=TimeSeriesSplit(n_splits=5),
            scoring='neg_mean_squared_error',
            n_jobs=-1
        )

    # 7. Training
    forecaster.fit(y_train)
    
    # 8. Forecasting
    fh = np.arange(len(y_test)) + 1  # Forecast horizon
    y_pred = forecaster.predict(fh)
    
    # 9. Inverse differencing
    if d == 1:
        last_train_value = original_series.iloc[-len(y_test)-1]
        y_pred = pd.Series(np.cumsum(y_pred) + last_train_value, index=y_test.index)
        y_test = original_series.iloc[-len(y_test):]

    # 10. Create results dataframe
    forecast_df = pd.DataFrame({
        'district': district_name,
        'date': y_test.index,
        'actual': y_test.values,
        'forecast': y_pred.values
    })

    # 11. Save results
    forecast_df.to_csv(
        f'LightGBM_Forecasts/{district_name}_forecast.csv',
        index=False
    )

    # 12. Calculate metrics
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    metrics = {
        'district': district_name,
        'rmse': rmse,
        'best_params': getattr(forecaster, 'best_params_', None),
        'differencing': d
    }

    # 13. Visualization
    plt.figure(figsize=(14, 7))
    plt.plot(original_series.index, original_series, label='Original')
    plt.plot(forecast_df['date'], forecast_df['forecast'], 
            label='LightGBM Forecast', alpha=0.7)
    plt.title(f'{district_name} Forecast\nRMSE: {rmse:.2f}')
    plt.legend()
    plt.savefig(f'LightGBM_Forecasts/{district_name}_forecast_plot.png')
    plt.close()

    return metrics, forecast_df

In [4]:
districts = data['district'].unique()
rmse_values = []

# Usage Example


def run_for_each_district():
    results = {}
    
    for district in districts:
        district_data = data[data['district'] == district]
        ts = district_data["I55"].asfreq('MS')
        
        results, forecast = lightgbm_forecast_lags(
        series=ts,
        district_name=district,
        max_lags=3,
        window_features={'mean_window': 3, 'std_window': 6},
        custom_params={
            'num_leaves': 63,
            'learning_rate': 0.1,
            'n_estimators': 1000
        }
    )

        
    
        # Show results
        print("=== Metrics ===")
        print(results)
        print("\n=== Forecast Data ===")
        print(forecast)
    
    return results
run_for_each_district()

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000071 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 75
[LightGBM] [Info] Number of data points in the train set: 35, number of used features: 6
[LightGBM] [Info] Start training from score 5635.942857
=== Metrics ===
{'district': 'AHMEDNAGAR', 'rmse': 1181.2060974019232, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
     district       date  actual     forecast
0  AHMEDNAGAR 2020-06-01    5761  6581.872315
1  AHMEDNAGAR 2020-07-01    6225  5830.747154
2  AHMEDNAGAR 2020-08-01    6609  5984.308493
3  AHMEDNAGAR 2020-09-01    7268  6256.978277
4  AHMEDNAGAR 2020-10-01    6799  6103.416939
5  AHMEDNAGAR 2020-11-01    5248  5830.747154
6  AHMEDNAGAR

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000049 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score 43.500000
=== Metrics ===
{'district': 'AKOLA', 'rmse': 903.8880260568568, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
  district       date  actual     forecast
0    AKOLA 2020-06-01    1889  2190.103004
1    AKOLA 2020-07-01    1999  2519.950822
2    AKOLA 2020-08-01    1927  2447.428187
3    AKOLA 2020-09-01    2604  2433.340576
4    AKOLA 2020-10-01    2342  2910.790436
5    AKOLA 2020-11-01    2550  3223.358195
6    AKOLA 2020-12-01    2513  3150.835560
7    AKOLA 2021-01-01    2215  3136.747948
8    AKOLA 2021-02-0

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000053 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 75
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score 16.088235
=== Metrics ===
{'district': 'AMRAVATI', 'rmse': 1063.7175484798927, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
   district       date  actual     forecast
0  AMRAVATI 2020-06-01    3183  2264.703954
1  AMRAVATI 2020-07-01    2772  2268.396804
2  AMRAVATI 2020-08-01    3081  2267.953092
3  AMRAVATI 2020-09-01    3483  2081.028140
4  AMRAVATI 2020-10-01    3505  2035.895676
5  AMRAVATI 2020-11-01    3484  2169.297917
6  AMRAVATI 2020-12-01    3227  1823.765991
7  AMRAVATI 2021-01-01    2783  1738.542083
8  AMRA

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000259 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score -31.735294
=== Metrics ===
{'district': 'AURANGABAD', 'rmse': 714.0470864319601, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
     district       date  actual     forecast
0  AURANGABAD 2020-06-01    5574  5730.036846
1  AURANGABAD 2020-07-01    5621  5560.265790
2  AURANGABAD 2020-08-01    5821  5571.277273
3  AURANGABAD 2020-09-01    6050  5401.506217
4  AURANGABAD 2020-10-01    6041  5426.031088
5  AURANGABAD 2020-11-01    6010  5256.260031
6  AURANGABAD 2020-12-01    6373  5280.784903
7  AURANGABAD 2021-01-01    6136 

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000128 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 74
[LightGBM] [Info] Number of data points in the train set: 35, number of used features: 6
[LightGBM] [Info] Start training from score 2185.171429
=== Metrics ===
{'district': 'BEED', 'rmse': 443.22112146514706, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0     BEED 2020-06-01    2392  2289.228181
1     BEED 2020-07-01    2942  2289.228181
2     BEED 2020-08-01    2934  2393.337852
3     BEED 2020-09-01    2943  2393.337852
4     BEED 2020-10-01    3112  2393.337852
5     BEED 2020-11-01    2812  2393.337852
6     BEED 2020-12-01    2752  2393.337852
7     BEED 2021-01-01    2275  2393.337852
8     BEED 2021-02

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000140 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score 13.882353
=== Metrics ===
{'district': 'BHANDARA', 'rmse': 395.1522856777317, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
   district       date  actual    forecast
0  BHANDARA 2020-06-01    1098  960.015968
1  BHANDARA 2020-07-01    1119  985.628246
2  BHANDARA 2020-08-01    1256  937.501506
3  BHANDARA 2020-09-01    1297  882.407261
4  BHANDARA 2020-10-01    1524  903.576567
5  BHANDARA 2020-11-01    1320  855.449827
6  BHANDARA 2020-12-01    1448  802.280822
7  BHANDARA 2021-01-01    1100  823.450128
8  BHANDARA 2021-

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000056 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 71
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score 6.117647
=== Metrics ===
{'district': 'BULDHANA', 'rmse': 217.49995241555163, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
   district       date  actual     forecast
0  BULDHANA 2020-06-01    1131  1116.333858
1  BULDHANA 2020-07-01    1172  1172.944311
2  BULDHANA 2020-08-01    1332  1273.274429
3  BULDHANA 2020-09-01    1442  1212.104132
4  BULDHANA 2020-10-01    1664  1222.060024
5  BULDHANA 2020-11-01    1582  1249.913336
6  BULDHANA 2020-12-01    1464  1336.084804
7  BULDHANA 2021-01-01    1211  1321.661716
8  BULDH

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000172 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 74
[LightGBM] [Info] Number of data points in the train set: 35, number of used features: 6
[LightGBM] [Info] Start training from score 1686.000000
=== Metrics ===
{'district': 'CHANDRAPUR', 'rmse': 390.0172332932461, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
     district       date  actual     forecast
0  CHANDRAPUR 2020-06-01    1843  1800.684662
1  CHANDRAPUR 2020-07-01    1983  2009.922421
2  CHANDRAPUR 2020-08-01    2295  2297.480287
3  CHANDRAPUR 2020-09-01    2647  2015.362541
4  CHANDRAPUR 2020-10-01    2760  2020.502651
5  CHANDRAPUR 2020-11-01    2186  1915.916878
6  CHANDRAPUR 2020-12-01    1946  2020.502651
7  CHANDRAPUR 2021-01-01    1652

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000166 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 75
[LightGBM] [Info] Number of data points in the train set: 35, number of used features: 6
[LightGBM] [Info] Start training from score 1597.200000
=== Metrics ===
{'district': 'DHULE', 'rmse': 281.6729968318723, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0    DHULE 2020-06-01    1791  1728.335993
1    DHULE 2020-07-01    1662  1474.011062
2    DHULE 2020-08-01    2372  1667.301418
3    DHULE 2020-09-01    2023  1626.753059
4    DHULE 2020-10-01    2028  1824.148089
5    DHULE 2020-11-01    1809  1659.089299
6    DHULE 2020-12-01    1803  1652.976269
7    DHULE 2021-01-01    1700  1583.464132
8    DHULE 2021-02

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000143 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 75
[LightGBM] [Info] Number of data points in the train set: 35, number of used features: 6
[LightGBM] [Info] Start training from score 1375.485714
=== Metrics ===
{'district': 'GADCHIROLI', 'rmse': 351.98179182371905, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
     district       date  actual     forecast
0  GADCHIROLI 2020-06-01    1153  1315.963046
1  GADCHIROLI 2020-07-01    1238  1324.531155
2  GADCHIROLI 2020-08-01    1510  1221.831973
3  GADCHIROLI 2020-09-01    1648  1243.570681
4  GADCHIROLI 2020-10-01    1751  1275.960510
5  GADCHIROLI 2020-11-01    1807  1260.891649
6  GADCHIROLI 2020-12-01    1797  1260.891649
7  GADCHIROLI 2021-01-01    155

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000146 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 76
[LightGBM] [Info] Number of data points in the train set: 35, number of used features: 6
[LightGBM] [Info] Start training from score 779.742857
=== Metrics ===
{'district': 'GONDIA', 'rmse': 187.9637316114614, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual    forecast
0   GONDIA 2020-06-01     905  790.851389
1   GONDIA 2020-07-01     973  778.860253
2   GONDIA 2020-08-01     997  848.771497
3   GONDIA 2020-09-01     986  868.079921
4   GONDIA 2020-10-01    1065  798.168677
5   GONDIA 2020-11-01     934  778.860253
6   GONDIA 2020-12-01     990  848.771497
7   GONDIA 2021-01-01     953  868.079921
8   GONDIA 2021-02-01    10

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000208 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 76
[LightGBM] [Info] Number of data points in the train set: 35, number of used features: 6
[LightGBM] [Info] Start training from score 942.000000
=== Metrics ===
{'district': 'HINGOLI', 'rmse': 219.01922097250468, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0  HINGOLI 2020-06-01     891  1096.874224
1  HINGOLI 2020-07-01    1043  1202.004690
2  HINGOLI 2020-08-01    1101  1171.249424
3  HINGOLI 2020-09-01    1343  1172.898683
4  HINGOLI 2020-10-01    1229  1172.898683
5  HINGOLI 2020-11-01    1073  1172.898683
6  HINGOLI 2020-12-01     996  1172.898683
7  HINGOLI 2021-01-01     848  1172.898683
8  HINGOLI 2021-

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000194 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 75
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score 76.117647
=== Metrics ===
{'district': 'JALGAON', 'rmse': 3837.4261572980736, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
  district       date  actual     forecast
0  JALGAON 2020-06-01    4311  4287.971010
1  JALGAON 2020-07-01    3714  4264.443449
2  JALGAON 2020-08-01   15827  4543.668548
3  JALGAON 2020-09-01    5302  4890.165168
4  JALGAON 2020-10-01    4900  5028.136038
5  JALGAON 2020-11-01    4163  5004.190288
6  JALGAON 2020-12-01    6939  5120.021436
7  JALGAON 2021-01-01    3425  5316.165952
8  JALGAON 2021-0

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000136 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score 31.058824
=== Metrics ===
{'district': 'JALNA', 'rmse': 488.7097621122054, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
  district       date  actual     forecast
0    JALNA 2020-06-01    1794  1623.247016
1    JALNA 2020-07-01    1894  1796.726722
2    JALNA 2020-08-01    1719  1725.296811
3    JALNA 2020-09-01    1730  1754.547844
4    JALNA 2020-10-01    2115  1769.710235
5    JALNA 2020-11-01    1968  1585.339074
6    JALNA 2020-12-01    1879  1772.459705
7    JALNA 2021-01-01     936  1804.700146
8    JALNA 2021-02-0

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000150 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score 176.088235
=== Metrics ===
{'district': 'KOLHAPUR', 'rmse': 2334.622820154885, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
   district       date  actual     forecast
0  KOLHAPUR 2020-06-01    6692  6924.194979
1  KOLHAPUR 2020-07-01    6281  7186.794040
2  KOLHAPUR 2020-08-01    6879  7434.069572
3  KOLHAPUR 2020-09-01    7432  7545.530203
4  KOLHAPUR 2020-10-01    6246  7803.529289
5  KOLHAPUR 2020-11-01    5778  8061.528374
6  KOLHAPUR 2020-12-01    5687  8273.569241
7  KOLHAPUR 2021-01-01    5502  8531.568326
8  KOLH

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000190 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score 48.852941
=== Metrics ===
{'district': 'LATUR', 'rmse': 564.7046470782929, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
  district       date  actual     forecast
0    LATUR 2020-06-01    3154  2882.895704
1    LATUR 2020-07-01    2947  2882.054552
2    LATUR 2020-08-01    3092  3043.583014
3    LATUR 2020-09-01    3775  2749.123293
4    LATUR 2020-10-01    3978  3043.971982
5    LATUR 2020-11-01    3652  2764.828166
6    LATUR 2020-12-01    3509  3165.626947
7    LATUR 2021-01-01    3108  2886.483131
8    LATUR 2021-02-0

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000200 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 75
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score -107.676471
=== Metrics ===
{'district': 'MUMBAI', 'rmse': 1557.3235050301319, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
  district       date  actual     forecast
0   MUMBAI 2020-06-01    2618  4118.860554
1   MUMBAI 2020-07-01    2667  3998.639625
2   MUMBAI 2020-08-01    2879  3151.129789
3   MUMBAI 2020-09-01    3817  2135.303753
4   MUMBAI 2020-10-01    3638  1832.941172
5   MUMBAI 2020-11-01    4551  2482.767214
6   MUMBAI 2020-12-01    3083  3628.043480
7   MUMBAI 2021-01-01    2294  4015.315540
8   MUMBAI 2021-

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000185 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 75
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score -33.235294
=== Metrics ===
{'district': 'NAGPUR', 'rmse': 607.2641854881305, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
  district       date  actual     forecast
0   NAGPUR 2020-06-01    2379  1999.949353
1   NAGPUR 2020-07-01    2306  2187.922418
2   NAGPUR 2020-08-01    2264  2218.397178
3   NAGPUR 2020-09-01    2244  1699.246668
4   NAGPUR 2020-10-01    2712  1984.827659
5   NAGPUR 2020-11-01    2889  2235.126972
6   NAGPUR 2020-12-01    2822  1632.319036
7   NAGPUR 2021-01-01    2046  1820.292101
8   NAGPUR 2021-02

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000040 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 76
[LightGBM] [Info] Number of data points in the train set: 35, number of used features: 6
[LightGBM] [Info] Start training from score 3642.771429
=== Metrics ===
{'district': 'NANDED', 'rmse': 1037.4634410464976, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0   NANDED 2020-06-01    2096  3685.651890
1   NANDED 2020-07-01    3902  3086.508646
2   NANDED 2020-08-01    4032  3729.129114
3   NANDED 2020-09-01    4237  3086.508646
4   NANDED 2020-10-01    4756  3685.651890
5   NANDED 2020-11-01    4347  3086.508646
6   NANDED 2020-12-01    4200  3685.651890
7   NANDED 2021-01-01    2250  3086.508646
8   NANDED 2021-

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000162 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 74
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score 5.117647
=== Metrics ===
{'district': 'NANDURBAR', 'rmse': 905.9120642766901, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
    district       date  actual     forecast
0  NANDURBAR 2020-06-01    2152  1745.918055
1  NANDURBAR 2020-07-01    2523  1784.059607
2  NANDURBAR 2020-08-01    2577  1679.591607
3  NANDURBAR 2020-09-01    2654  1465.408587
4  NANDURBAR 2020-10-01    2593  1451.910036
5  NANDURBAR 2020-11-01    2566  1237.727016
6  NANDURBAR 2020-12-01    2217  1283.087244
7  NANDURBAR 2021-01-01    1855  1270.973947

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000132 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 76
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score 135.147059
=== Metrics ===
{'district': 'NASHIK', 'rmse': 1108.9070788901747, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
  district       date  actual     forecast
0   NASHIK 2020-06-01    6559  6649.973126
1   NASHIK 2020-07-01    7484  6801.468902
2   NASHIK 2020-08-01    7551  7186.761211
3   NASHIK 2020-09-01    8376  7273.703988
4   NASHIK 2020-10-01    7227  7249.147638
5   NASHIK 2020-11-01    6907  7477.909225
6   NASHIK 2020-12-01    6679  7858.968925
7   NASHIK 2021-01-01    6820  7960.679875
8   NASHIK 2021-0

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000162 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 75
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score 9.529412
=== Metrics ===
{'district': 'OSMANABAD', 'rmse': 435.80917669050547, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
    district       date  actual     forecast
0  OSMANABAD 2020-06-01    1465  1610.771622
1  OSMANABAD 2020-07-01    1794  1584.439406
2  OSMANABAD 2020-08-01    1952  1433.196937
3  OSMANABAD 2020-09-01    2171  1551.520196
4  OSMANABAD 2020-10-01    2081  1623.379274
5  OSMANABAD 2020-11-01    1981  1413.025072
6  OSMANABAD 2020-12-01    1981  1407.983157
7  OSMANABAD 2021-01-01    1830  1445.13089

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000151 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score -10.235294
=== Metrics ===
{'district': 'PALGHAR', 'rmse': 1629.0324216539966, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
  district       date  actual     forecast
0  PALGHAR 2020-06-01    4417  3615.374565
1  PALGHAR 2020-07-01    4269  3510.411515
2  PALGHAR 2020-08-01    4229  3229.522764
3  PALGHAR 2020-09-01    4522  3470.738948
4  PALGHAR 2020-10-01    5385  3131.452176
5  PALGHAR 2020-11-01    5076  3135.495808
6  PALGHAR 2020-12-01    4708  2711.593310
7  PALGHAR 2021-01-01    4568  2715.636942
8  PALGHAR 2021-

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000046 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 74
[LightGBM] [Info] Number of data points in the train set: 35, number of used features: 6
[LightGBM] [Info] Start training from score 1211.114286
=== Metrics ===
{'district': 'PARBHANI', 'rmse': 473.55662433352467, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
   district       date  actual     forecast
0  PARBHANI 2020-06-01    1298  1183.184061
1  PARBHANI 2020-07-01    1573  1325.817273
2  PARBHANI 2020-08-01    1630  1233.423398
3  PARBHANI 2020-09-01    1966  1250.906608
4  PARBHANI 2020-10-01    1984  1247.744200
5  PARBHANI 2020-11-01     868  1283.102238
6  PARBHANI 2020-12-01    1740  1195.063124
7  PARBHANI 2021-01-01    1755  1152.942845
8  PA

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000258 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 74
[LightGBM] [Info] Number of data points in the train set: 35, number of used features: 6
[LightGBM] [Info] Start training from score 10352.057143
=== Metrics ===
{'district': 'PUNE', 'rmse': 4391.20019475051, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0     PUNE 2020-06-01   13305  9535.651320
1     PUNE 2020-07-01   11790  9179.781911
2     PUNE 2020-08-01   12579  8662.332248
3     PUNE 2020-09-01   14094  8835.352962
4     PUNE 2020-10-01   14381  8752.062775
5     PUNE 2020-11-01   14329  8752.062775
6     PUNE 2020-12-01   13462  8752.062775
7     PUNE 2021-01-01   13048  8752.062775
8     PUNE 2021-02-

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000215 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 74
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score 30.294118
=== Metrics ===
{'district': 'RAIGAD', 'rmse': 953.0134440907533, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
  district       date  actual     forecast
0   RAIGAD 2020-06-01    1924  3061.834945
1   RAIGAD 2020-07-01    3532  2519.878043
2   RAIGAD 2020-08-01    3912  2504.915143
3   RAIGAD 2020-09-01    3990  2861.296021
4   RAIGAD 2020-10-01    3615  2571.856021
5   RAIGAD 2020-11-01    3367  2781.040239
6   RAIGAD 2020-12-01    3787  3006.306146
7   RAIGAD 2021-01-01    3511  2464.349244
8   RAIGAD 2021-02-

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000194 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 76
[LightGBM] [Info] Number of data points in the train set: 35, number of used features: 6
[LightGBM] [Info] Start training from score 1075.485714
=== Metrics ===
{'district': 'RATNAGIRI', 'rmse': 378.9281853902767, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
    district       date  actual     forecast
0  RATNAGIRI 2020-06-01    1240  1269.344257
1  RATNAGIRI 2020-07-01    1387  1340.490214
2  RATNAGIRI 2020-08-01    2246  1340.490214
3  RATNAGIRI 2020-09-01    1755  1340.490214
4  RATNAGIRI 2020-10-01    1658  1340.490214
5  RATNAGIRI 2020-11-01    1155  1340.490214
6  RATNAGIRI 2020-12-01    1683  1340.490214
7  RATNAGIRI 2021-01-01    1239  1340.490

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000207 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 76
[LightGBM] [Info] Number of data points in the train set: 35, number of used features: 6
[LightGBM] [Info] Start training from score 2285.371429
=== Metrics ===
{'district': 'SANGLI', 'rmse': 1023.4238444299883, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual     forecast
0   SANGLI 2020-06-01    2942  2209.864436
1   SANGLI 2020-07-01    3899  2105.975747
2   SANGLI 2020-08-01    4244  2209.042719
3   SANGLI 2020-09-01    2465  2086.262150
4   SANGLI 2020-10-01    2330  1914.362576
5   SANGLI 2020-11-01    3203  2157.943375
6   SANGLI 2020-12-01    2586  1867.969559
7   SANGLI 2021-01-01    1822  1914.362576
8   SANGLI 2021-

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000198 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 75
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score 1.382353
=== Metrics ===
{'district': 'SATARA', 'rmse': 1577.183293729222, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
  district       date  actual     forecast
0   SATARA 2020-06-01    1957  1508.732863
1   SATARA 2020-07-01    1934  1117.217458
2   SATARA 2020-08-01    2767  1002.950321
3   SATARA 2020-09-01    2016   888.683184
4   SATARA 2020-10-01    2412   774.416046
5   SATARA 2020-11-01    2455   660.148909
6   SATARA 2020-12-01    2576   545.881771
7   SATARA 2021-01-01    2272   431.614634
8   SATARA 2021-02-0

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000221 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score -1.676471
=== Metrics ===
{'district': 'SINDHUDURG', 'rmse': 57.14422378497496, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
     district       date  actual    forecast
0  SINDHUDURG 2020-06-01     423  360.396713
1  SINDHUDURG 2020-07-01     479  429.324129
2  SINDHUDURG 2020-08-01     516  434.296346
3  SINDHUDURG 2020-09-01     446  391.065434
4  SINDHUDURG 2020-10-01     435  459.992850
5  SINDHUDURG 2020-11-01     458  409.766354
6  SINDHUDURG 2020-12-01     456  397.989622
7  SINDHUDURG 2021-01-01     456  414.9292

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000184 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score 64.823529
=== Metrics ===
{'district': 'SOLAPUR', 'rmse': 1226.868373618937, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
  district       date  actual     forecast
0  SOLAPUR 2020-06-01    2283  3168.425396
1  SOLAPUR 2020-07-01    2470  2923.695246
2  SOLAPUR 2020-08-01    1918  2884.663344
3  SOLAPUR 2020-09-01    2558  2995.573222
4  SOLAPUR 2020-10-01    2463  3148.327612
5  SOLAPUR 2020-11-01    3694  3347.227701
6  SOLAPUR 2020-12-01    6427  3630.661678
7  SOLAPUR 2021-01-01    4111  3914.095655
8  SOLAPUR 2021-02

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000142 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 75
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score 49.205882
=== Metrics ===
{'district': 'THANE', 'rmse': 3331.3271673865934, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
  district       date  actual      forecast
0    THANE 2020-06-01    7942  11446.872848
1    THANE 2020-07-01    5153  10987.534302
2    THANE 2020-08-01   11494  10608.023557
3    THANE 2020-09-01   12157  10122.682092
4    THANE 2020-10-01   12242   9779.378401
5    THANE 2020-11-01    7730  12021.237874
6    THANE 2020-12-01   12598  11208.858819
7    THANE 2021-01-01    8749  13450.142726
8    THANE

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000132 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73
[LightGBM] [Info] Number of data points in the train set: 34, number of used features: 6
[LightGBM] [Info] Start training from score 17.029412
=== Metrics ===
{'district': 'WARDHA', 'rmse': 300.3383344712912, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
  district       date  actual     forecast
0   WARDHA 2020-06-01    1078  1267.671304
1   WARDHA 2020-07-01    1131  1324.377444
2   WARDHA 2020-08-01    1326  1386.042543
3   WARDHA 2020-09-01    1431  1411.149185
4   WARDHA 2020-10-01    1425  1284.012618
5   WARDHA 2020-11-01    1342  1465.957361
6   WARDHA 2020-12-01    1209  1390.384204
7   WARDHA 2021-01-01    1027  1545.951475
8   WARDHA 2021-02-

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000234 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 75
[LightGBM] [Info] Number of data points in the train set: 35, number of used features: 6
[LightGBM] [Info] Start training from score 491.600000
=== Metrics ===
{'district': 'WASHIM', 'rmse': 202.85507392819548, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
  district       date  actual    forecast
0   WASHIM 2020-06-01     579  445.898225
1   WASHIM 2020-07-01     622  478.415605
2   WASHIM 2020-08-01     696  409.448246
3   WASHIM 2020-09-01     678  414.879150
4   WASHIM 2020-10-01     696  402.213317
5   WASHIM 2020-11-01     627  396.782413
6   WASHIM 2020-12-01     616  396.782413
7   WASHIM 2021-01-01     509  396.782413
8   WASHIM 2021-02-01     

 nan nan nan nan nan nan nan nan nan]


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000203 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 73
[LightGBM] [Info] Number of data points in the train set: 35, number of used features: 6
[LightGBM] [Info] Start training from score 3137.542857
=== Metrics ===
{'district': 'YAVATMAL', 'rmse': 680.0991436283696, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 0}

=== Forecast Data ===
   district       date  actual     forecast
0  YAVATMAL 2020-06-01    2781  2162.268199
1  YAVATMAL 2020-07-01    4215  3177.294030
2  YAVATMAL 2020-08-01    3800  3041.810485
3  YAVATMAL 2020-09-01    3624  2697.772419
4  YAVATMAL 2020-10-01    3708  2673.920146
5  YAVATMAL 2020-11-01    3219  3177.294030
6  YAVATMAL 2020-12-01    3613  3041.810485
7  YAVATMAL 2021-01-01    3199  2697.772419
8  YAV

54 fits failed out of a total of 135.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
27 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 893, in _fit_and_score
    estimator.fit(X_train, **fit_params)
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sktime\forecasting\base\_base.py", line 396, in fit
    self._fit(y=y_inner, X=X_inner, fh=fh)
  File "c:\Users\nauti\AppData\Local\Programs\Python\Python311\Lib\site-packages\sktime\forecasting\compose\_reduce.py", line 963, in _fit
    yt, Xt = self._transform(y, X)
             ^^^^^^^^^^^^^^^^^^^^^
  File "c

[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 5, number of used features: 0
[LightGBM] [Info] Start training from score 260.200000
=== Metrics ===
{'district': 'MUMBAI SUBURBAN', 'rmse': 1754.3324618403053, 'best_params': {'estimator__regressor__learning_rate': 0.01, 'estimator__regressor__min_child_samples': 10, 'estimator__regressor__num_leaves': 15}, 'differencing': 1}

=== Forecast Data ===
          district       date  actual  forecast
0  MUMBAI SUBURBAN 2021-01-01    5269    6055.2
1  MUMBAI SUBURBAN 2021-02-01    3958    6315.4
2  MUMBAI SUBURBAN 2021-03-01    4827    6575.6


{'district': 'MUMBAI SUBURBAN',
 'rmse': 1754.3324618403053,
 'best_params': {'estimator__regressor__learning_rate': 0.01,
  'estimator__regressor__min_child_samples': 10,
  'estimator__regressor__num_leaves': 15},
 'differencing': 1}

In [5]:
# !pip install "dask<2025.1"
