In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet
from sklearn.metrics import mean_squared_error

In [2]:
data = pd.read_csv("../data/HMIS_DATA_CORRECTED_17_21/mh_dist17_21_with_IDs_date_correction.csv")
data = data[(data['indicator_type'] == 'Total [(A+B) or (C+D)]')]
data['date'] = pd.to_datetime(data['date'])
data = data.set_index('date')
data.index = pd.DatetimeIndex(data.index)

In [3]:
def prophet_forecast(
    series,
    district_name,
    forecast_periods=12,
    yearly_seasonality='auto',
    weekly_seasonality=False,
    daily_seasonality=False,
    changepoint_prior_scale=0.05,
    seasonality_prior_scale=10.0,
    holidays=None,
    plot_forecast=True
):
    """
    Prophet time series forecasting with error handling and RMSE calculation.
    Returns (metrics, forecast_df) or (None, None) on error.
    """
    try:
        # 0. Directory setup
        os.makedirs('Prophet_Forecasts', exist_ok=True)

        # 1. Prepare DataFrame for Prophet
        df = pd.DataFrame({'ds': series.index, 'y': series.values})
        df = df.dropna()
        if len(df) < forecast_periods + 12:
            raise ValueError(f"Series too short ({len(df)} points) for {forecast_periods}-step forecast.")

        # 2. Train/test split
        train = df.iloc[:-forecast_periods]
        test = df.iloc[-forecast_periods:]

        # 3. Model setup
        model = Prophet(
            yearly_seasonality=yearly_seasonality,
            weekly_seasonality=weekly_seasonality,
            daily_seasonality=daily_seasonality,
            changepoint_prior_scale=changepoint_prior_scale,
            seasonality_prior_scale=seasonality_prior_scale,
            holidays=holidays
        )
        model.fit(train)

        # 4. Forecast
        future = model.make_future_dataframe(periods=forecast_periods, freq='MS')
        forecast = model.predict(future)

        # 5. Extract forecasted values for test period
        forecast_test = forecast.set_index('ds').loc[test['ds']]
        y_pred = forecast_test['yhat'].values
        y_test = test['y'].values

        # 6. Metrics
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        metrics = {
            'district': district_name,
            'rmse': rmse,
            'forecast_periods': forecast_periods
        }

        # 7. Save forecast DataFrame
        forecast_df = pd.DataFrame({
            'district': district_name,
            'date': test['ds'].values,
            'actual': y_test,
            'forecast': y_pred
        })
        forecast_df.to_csv(
            os.path.join('Prophet_Forecasts', f'{district_name}_forecast.csv'),
            index=False
        )

        # 8. Plotting
        if plot_forecast:
            plt.figure(figsize=(12, 6))
            plt.plot(df['ds'], df['y'], label='Original')
            plt.plot(forecast_df['date'], forecast_df['forecast'], label='Prophet Forecast', color='crimson')
            plt.scatter(forecast_df['date'], forecast_df['actual'], label='Actual (Test)', color='black')
            plt.title(f'{district_name} Prophet Forecast (RMSE: {rmse:.2f})')
            plt.legend()
            plt.savefig(os.path.join('Prophet_Forecasts', f'{district_name}_forecast_plot.png'))
            plt.close()

        return metrics, forecast_df

    except Exception as e:
        print(f"⚠️ Skipping {district_name}: {str(e)}")
        return None, None

In [4]:
districts = data['district'].unique()
rmse_values = []

# Usage Example


def run_for_each_district():
    results = {}
    
    for district in districts:
        district_data = data[data['district'] == district]
        ts = district_data["I55"].asfreq('MS')
        
        metrics, forecast = prophet_forecast(
            series=ts,
            district_name=district,
            forecast_periods=6,  # Forecast the last year
            yearly_seasonality='auto',
            weekly_seasonality=False,
            daily_seasonality=False
        )

        
    
        # Show results
        print("=== Metrics ===")
        if metrics:
            print(metrics)
        print("\n=== Forecast Data ===")
        print(forecast)
    
    return results
run_for_each_district()

16:00:43 - cmdstanpy - INFO - Chain [1] start processing
16:00:44 - cmdstanpy - INFO - Chain [1] done processing
16:00:44 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'AHMEDNAGAR', 'rmse': 2307.7065807141907, 'forecast_periods': 6}

=== Forecast Data ===
     district       date  actual     forecast
0  AHMEDNAGAR 2020-10-01    6799  5849.173853
1  AHMEDNAGAR 2020-11-01    5248  5782.043775
2  AHMEDNAGAR 2020-12-01    4923  5542.589475
3  AHMEDNAGAR 2021-01-01    4542  9218.391484
4  AHMEDNAGAR 2021-02-01    4301  6484.628788
5  AHMEDNAGAR 2021-03-01    3941  5876.180198


16:00:44 - cmdstanpy - INFO - Chain [1] done processing
16:00:44 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'AKOLA', 'rmse': 1134.5717133680312, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0    AKOLA 2020-10-01    2342  3473.345130
1    AKOLA 2020-11-01    2550  2534.169979
2    AKOLA 2020-12-01    2513  1952.443898
3    AKOLA 2021-01-01    2215  2928.316723
4    AKOLA 2021-02-01    1967  1083.963959
5    AKOLA 2021-03-01    2265  4465.120202


16:00:44 - cmdstanpy - INFO - Chain [1] done processing
16:00:45 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'AMRAVATI', 'rmse': 590.3419421336738, 'forecast_periods': 6}

=== Forecast Data ===
   district       date  actual     forecast
0  AMRAVATI 2020-10-01    3505  2810.074427
1  AMRAVATI 2020-11-01    3484  2630.879623
2  AMRAVATI 2020-12-01    3227  3045.584230
3  AMRAVATI 2021-01-01    2783  2876.941082
4  AMRAVATI 2021-02-01    2216  3122.960630
5  AMRAVATI 2021-03-01    2626  2752.378514


16:00:45 - cmdstanpy - INFO - Chain [1] done processing
16:00:45 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'AURANGABAD', 'rmse': 476.6071065309574, 'forecast_periods': 6}

=== Forecast Data ===
     district       date  actual     forecast
0  AURANGABAD 2020-10-01    6041  6324.680736
1  AURANGABAD 2020-11-01    6010  5706.937164
2  AURANGABAD 2020-12-01    6373  5466.643143
3  AURANGABAD 2021-01-01    6136  6350.310166
4  AURANGABAD 2021-02-01    5864  5304.221144
5  AURANGABAD 2021-03-01    5897  5797.802246


16:00:45 - cmdstanpy - INFO - Chain [1] done processing
16:00:45 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'BEED', 'rmse': 658.8394118615939, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0     BEED 2020-10-01    3112  3895.692927
1     BEED 2020-11-01    2812  1931.603660
2     BEED 2020-12-01    2752  2161.341979
3     BEED 2021-01-01    2275  2657.497741
4     BEED 2021-02-01    2079  2842.337143
5     BEED 2021-03-01    2382  2752.511660


16:00:46 - cmdstanpy - INFO - Chain [1] done processing
16:00:46 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'BHANDARA', 'rmse': 240.7377330147034, 'forecast_periods': 6}

=== Forecast Data ===
   district       date  actual     forecast
0  BHANDARA 2020-10-01    1524  1628.302969
1  BHANDARA 2020-11-01    1320  1291.989948
2  BHANDARA 2020-12-01    1448  1414.183535
3  BHANDARA 2021-01-01    1100  1290.330956
4  BHANDARA 2021-02-01     954  1451.308548
5  BHANDARA 2021-03-01    1076  1302.669464


16:00:46 - cmdstanpy - INFO - Chain [1] done processing
16:00:46 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'BULDHANA', 'rmse': 302.0281604572532, 'forecast_periods': 6}

=== Forecast Data ===
   district       date  actual     forecast
0  BULDHANA 2020-10-01    1664  1099.181377
1  BULDHANA 2020-11-01    1582  1407.254443
2  BULDHANA 2020-12-01    1464  1455.410502
3  BULDHANA 2021-01-01    1211  1503.710518
4  BULDHANA 2021-02-01    1338  1354.705302
5  BULDHANA 2021-03-01    1567  1232.728137


16:00:46 - cmdstanpy - INFO - Chain [1] done processing
16:00:47 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'CHANDRAPUR', 'rmse': 300.77530373100427, 'forecast_periods': 6}

=== Forecast Data ===
     district       date  actual     forecast
0  CHANDRAPUR 2020-10-01    2760  2449.032870
1  CHANDRAPUR 2020-11-01    2186  2328.128878
2  CHANDRAPUR 2020-12-01    1946  1967.788763
3  CHANDRAPUR 2021-01-01    1652  1870.239687
4  CHANDRAPUR 2021-02-01    1528  2101.703545
5  CHANDRAPUR 2021-03-01    1680  1900.577553


16:00:47 - cmdstanpy - INFO - Chain [1] done processing
16:00:47 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'DHULE', 'rmse': 330.6828197973424, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0    DHULE 2020-10-01    2028  1350.420055
1    DHULE 2020-11-01    1809  1801.233661
2    DHULE 2020-12-01    1803  2005.690633
3    DHULE 2021-01-01    1700  1327.784234
4    DHULE 2021-02-01    1662  1585.828968
5    DHULE 2021-03-01    1633  1525.753693


16:00:47 - cmdstanpy - INFO - Chain [1] done processing
16:00:48 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'GADCHIROLI', 'rmse': 200.33044875967016, 'forecast_periods': 6}

=== Forecast Data ===
     district       date  actual     forecast
0  GADCHIROLI 2020-10-01    1751  2013.911861
1  GADCHIROLI 2020-11-01    1807  1756.291703
2  GADCHIROLI 2020-12-01    1797  1435.332691
3  GADCHIROLI 2021-01-01    1553  1389.867627
4  GADCHIROLI 2021-02-01    1320  1382.352925
5  GADCHIROLI 2021-03-01    1502  1413.702473


16:00:48 - cmdstanpy - INFO - Chain [1] done processing
16:00:48 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'GONDIA', 'rmse': 186.65200368968897, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0   GONDIA 2020-10-01    1065  1112.024365
1   GONDIA 2020-11-01     934  1033.995367
2   GONDIA 2020-12-01     990   931.284748
3   GONDIA 2021-01-01     953   764.503139
4   GONDIA 2021-02-01    1090   767.516086
5   GONDIA 2021-03-01    1022   789.946048


16:00:48 - cmdstanpy - INFO - Chain [1] done processing
16:00:49 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'HINGOLI', 'rmse': 113.30213148229582, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0  HINGOLI 2020-10-01    1229  1200.106060
1  HINGOLI 2020-11-01    1073  1168.508693
2  HINGOLI 2020-12-01     996  1008.026365
3  HINGOLI 2021-01-01     848   836.607088
4  HINGOLI 2021-02-01     788  1044.395175
5  HINGOLI 2021-03-01     890   922.473841


16:00:49 - cmdstanpy - INFO - Chain [1] done processing
16:00:49 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'JALGAON', 'rmse': 2113.665873006927, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0  JALGAON 2020-10-01    4900  4357.005141
1  JALGAON 2020-11-01    4163  4715.731638
2  JALGAON 2020-12-01    6939  5343.111902
3  JALGAON 2021-01-01    3425  6088.981014
4  JALGAON 2021-02-01    3322  6522.879214
5  JALGAON 2021-03-01    2919  5432.138167


16:00:49 - cmdstanpy - INFO - Chain [1] done processing
16:00:49 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'JALNA', 'rmse': 641.4862936531551, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0    JALNA 2020-10-01    2115   675.442131
1    JALNA 2020-11-01    1968  1524.091523
2    JALNA 2020-12-01    1879  1715.105734
3    JALNA 2021-01-01     936  1267.722760
4    JALNA 2021-02-01     997  1111.307294
5    JALNA 2021-03-01     830  1052.887679


16:00:49 - cmdstanpy - INFO - Chain [1] done processing
16:00:50 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'KOLHAPUR', 'rmse': 1073.2958431660186, 'forecast_periods': 6}

=== Forecast Data ===
   district       date  actual     forecast
0  KOLHAPUR 2020-10-01    6246  3879.330874
1  KOLHAPUR 2020-11-01    5778  4835.189581
2  KOLHAPUR 2020-12-01    5687  5244.409049
3  KOLHAPUR 2021-01-01    5502  5640.161040
4  KOLHAPUR 2021-02-01    5141  5030.059080
5  KOLHAPUR 2021-03-01    4987  4545.993816


16:00:50 - cmdstanpy - INFO - Chain [1] done processing
16:00:50 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'LATUR', 'rmse': 258.78915022183287, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0    LATUR 2020-10-01    3978  4199.755791
1    LATUR 2020-11-01    3652  3855.462064
2    LATUR 2020-12-01    3509  3258.380950
3    LATUR 2021-01-01    3108  3452.042915
4    LATUR 2021-02-01    3476  3689.157675
5    LATUR 2021-03-01    3449  3158.058643


16:00:50 - cmdstanpy - INFO - Chain [1] done processing
16:00:50 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'MUMBAI', 'rmse': 3980.1329223911657, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0   MUMBAI 2020-10-01    3638  9707.188016
1   MUMBAI 2020-11-01    4551  7743.444283
2   MUMBAI 2020-12-01    3083  6494.257832
3   MUMBAI 2021-01-01    2294  5562.311939
4   MUMBAI 2021-02-01    2517  6139.007715
5   MUMBAI 2021-03-01    2688  6235.466981


16:00:51 - cmdstanpy - INFO - Chain [1] done processing
16:00:51 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'NAGPUR', 'rmse': 270.8910919430042, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0   NAGPUR 2020-10-01    2712  2896.860761
1   NAGPUR 2020-11-01    2889  2321.449351
2   NAGPUR 2020-12-01    2822  2617.160405
3   NAGPUR 2021-01-01    2046  1927.982080
4   NAGPUR 2021-02-01    2416  2286.233137
5   NAGPUR 2021-03-01    2031  2137.196674


16:00:51 - cmdstanpy - INFO - Chain [1] done processing
16:00:51 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'NANDED', 'rmse': 1069.801148161748, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0   NANDED 2020-10-01    4756  3710.866780
1   NANDED 2020-11-01    4347  2616.859092
2   NANDED 2020-12-01    4200  3053.870515
3   NANDED 2021-01-01    2250  2925.090535
4   NANDED 2021-02-01    2131  2985.279649
5   NANDED 2021-03-01    3287  2755.961186


16:00:51 - cmdstanpy - INFO - Chain [1] done processing
16:00:52 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'NANDURBAR', 'rmse': 274.06373259910816, 'forecast_periods': 6}

=== Forecast Data ===
    district       date  actual     forecast
0  NANDURBAR 2020-10-01    2593  2755.906020
1  NANDURBAR 2020-11-01    2566  2176.890641
2  NANDURBAR 2020-12-01    2217  2340.605339
3  NANDURBAR 2021-01-01    1855  1987.769790
4  NANDURBAR 2021-02-01    1625  2037.251132
5  NANDURBAR 2021-03-01    1865  2129.318028


16:00:52 - cmdstanpy - INFO - Chain [1] done processing
16:00:52 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'NASHIK', 'rmse': 1277.39695864873, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0   NASHIK 2020-10-01    7227  7129.495845
1   NASHIK 2020-11-01    6907  6867.077403
2   NASHIK 2020-12-01    6679  7993.430855
3   NASHIK 2021-01-01    6820  8136.323656
4   NASHIK 2021-02-01    6552  8347.678349
5   NASHIK 2021-03-01    5827  7586.107698


16:00:52 - cmdstanpy - INFO - Chain [1] done processing
16:00:52 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'OSMANABAD', 'rmse': 396.60696314913787, 'forecast_periods': 6}

=== Forecast Data ===
    district       date  actual     forecast
0  OSMANABAD 2020-10-01    2081  1272.022525
1  OSMANABAD 2020-11-01    1981  1703.493835
2  OSMANABAD 2020-12-01    1981  1798.499582
3  OSMANABAD 2021-01-01    1830  1676.491708
4  OSMANABAD 2021-02-01    1598  1567.737253
5  OSMANABAD 2021-03-01    1712  2105.117338


16:00:53 - cmdstanpy - INFO - Chain [1] done processing
16:00:53 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'PALGHAR', 'rmse': 704.5474703196696, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0  PALGHAR 2020-10-01    5385  4615.918624
1  PALGHAR 2020-11-01    5076  4671.450156
2  PALGHAR 2020-12-01    4708  5405.872651
3  PALGHAR 2021-01-01    4568  4543.292418
4  PALGHAR 2021-02-01    3787  5033.908665
5  PALGHAR 2021-03-01    4420  4845.156637


16:00:53 - cmdstanpy - INFO - Chain [1] done processing
16:00:53 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'PARBHANI', 'rmse': 691.053529180717, 'forecast_periods': 6}

=== Forecast Data ===
   district       date  actual     forecast
0  PARBHANI 2020-10-01    1984   773.084890
1  PARBHANI 2020-11-01     868  1899.752792
2  PARBHANI 2020-12-01    1740  1611.759592
3  PARBHANI 2021-01-01    1755  1358.037368
4  PARBHANI 2021-02-01    1299   994.890258
5  PARBHANI 2021-03-01    1630  1369.245357


16:00:53 - cmdstanpy - INFO - Chain [1] done processing
16:00:54 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'PUNE', 'rmse': 1062.3913281900966, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual      forecast
0     PUNE 2020-10-01   14381  15318.817897
1     PUNE 2020-11-01   14329  13296.505528
2     PUNE 2020-12-01   13462  13724.660562
3     PUNE 2021-01-01   13048  14169.362069
4     PUNE 2021-02-01   11303  11821.163742
5     PUNE 2021-03-01   13097  11299.343849


16:00:54 - cmdstanpy - INFO - Chain [1] done processing
16:00:54 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'RAIGAD', 'rmse': 675.0664261351492, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0   RAIGAD 2020-10-01    3615  2825.165168
1   RAIGAD 2020-11-01    3367  3270.784423
2   RAIGAD 2020-12-01    3787  2557.018622
3   RAIGAD 2021-01-01    3511  3281.058339
4   RAIGAD 2021-02-01    3272  3916.351586
5   RAIGAD 2021-03-01    3167  3513.807229


16:00:54 - cmdstanpy - INFO - Chain [1] done processing
16:00:54 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'RATNAGIRI', 'rmse': 327.1887173597828, 'forecast_periods': 6}

=== Forecast Data ===
    district       date  actual     forecast
0  RATNAGIRI 2020-10-01    1658  1550.734879
1  RATNAGIRI 2020-11-01    1155  1195.366389
2  RATNAGIRI 2020-12-01    1683  1597.331549
3  RATNAGIRI 2021-01-01    1239  1616.438218
4  RATNAGIRI 2021-02-01    1047  1618.191755
5  RATNAGIRI 2021-03-01    1037  1428.306499


16:00:55 - cmdstanpy - INFO - Chain [1] done processing
16:00:55 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'SANGLI', 'rmse': 1086.0874985690762, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0   SANGLI 2020-10-01    2330  2829.999115
1   SANGLI 2020-11-01    3203  3641.317430
2   SANGLI 2020-12-01    2586  2881.836416
3   SANGLI 2021-01-01    1822  3616.244776
4   SANGLI 2021-02-01    1718  3536.153665
5   SANGLI 2021-03-01    2696  2847.257364


16:00:55 - cmdstanpy - INFO - Chain [1] done processing
16:00:56 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'SATARA', 'rmse': 1213.7556730394363, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0   SATARA 2020-10-01    2412  4014.426353
1   SATARA 2020-11-01    2455  3433.028860
2   SATARA 2020-12-01    2576  3109.226187
3   SATARA 2021-01-01    2272  3413.205669
4   SATARA 2021-02-01    1962  3165.649993
5   SATARA 2021-03-01    2076  3585.785576


16:00:56 - cmdstanpy - INFO - Chain [1] done processing
16:00:56 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'SINDHUDURG', 'rmse': 215.55903021393866, 'forecast_periods': 6}

=== Forecast Data ===
     district       date  actual    forecast
0  SINDHUDURG 2020-10-01     435  239.968766
1  SINDHUDURG 2020-11-01     458  368.756663
2  SINDHUDURG 2020-12-01     456  319.920673
3  SINDHUDURG 2021-01-01     456   61.268229
4  SINDHUDURG 2021-02-01     391  213.215925
5  SINDHUDURG 2021-03-01     339  175.126005


16:00:56 - cmdstanpy - INFO - Chain [1] done processing
16:00:57 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'SOLAPUR', 'rmse': 1792.9395662735674, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0  SOLAPUR 2020-10-01    2463  3728.236373
1  SOLAPUR 2020-11-01    3694  2941.800279
2  SOLAPUR 2020-12-01    6427  2770.451015
3  SOLAPUR 2021-01-01    4111  2290.186878
4  SOLAPUR 2021-02-01    2035  2503.025280
5  SOLAPUR 2021-03-01    2190  2655.196368


16:00:57 - cmdstanpy - INFO - Chain [1] done processing
16:00:57 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'THANE', 'rmse': 3897.438478926347, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual      forecast
0    THANE 2020-10-01   12242   5127.806748
1    THANE 2020-11-01    7730  13096.255812
2    THANE 2020-12-01   12598  14243.859739
3    THANE 2021-01-01    8749   9819.198063
4    THANE 2021-02-01    9308   7003.319840
5    THANE 2021-03-01   12833  11231.130997


16:00:58 - cmdstanpy - INFO - Chain [1] done processing
16:00:58 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'WARDHA', 'rmse': 421.8613809511082, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0   WARDHA 2020-10-01    1425  1478.522556
1   WARDHA 2020-11-01    1342  1772.158964
2   WARDHA 2020-12-01    1209  1641.619800
3   WARDHA 2021-01-01    1027  1362.285869
4   WARDHA 2021-02-01    1006  1585.365786
5   WARDHA 2021-03-01    1105  1599.630701


16:00:58 - cmdstanpy - INFO - Chain [1] done processing
16:00:59 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'WASHIM', 'rmse': 223.77965709169757, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual    forecast
0   WASHIM 2020-10-01     696  752.543315
1   WASHIM 2020-11-01     627  593.676085
2   WASHIM 2020-12-01     616  803.549319
3   WASHIM 2021-01-01     509  720.131293
4   WASHIM 2021-02-01     477  861.270341
5   WASHIM 2021-03-01     521  783.186030


16:00:59 - cmdstanpy - INFO - Chain [1] done processing


=== Metrics ===
{'district': 'YAVATMAL', 'rmse': 1251.0779732487774, 'forecast_periods': 6}

=== Forecast Data ===
   district       date  actual     forecast
0  YAVATMAL 2020-10-01    3708  4538.350681
1  YAVATMAL 2020-11-01    3219  4007.932485
2  YAVATMAL 2020-12-01    3613  1762.432797
3  YAVATMAL 2021-01-01    3199  3752.219947
4  YAVATMAL 2021-02-01    2808  4889.926163
5  YAVATMAL 2021-03-01    3435  3554.214173
⚠️ Skipping MUMBAI SUBURBAN: Series too short (12 points) for 6-step forecast.
=== Metrics ===

=== Forecast Data ===
None


{}

In [5]:
districts

array(['AHMEDNAGAR', 'AKOLA', 'AMRAVATI', 'AURANGABAD', 'BEED',
       'BHANDARA', 'BULDHANA', 'CHANDRAPUR', 'DHULE', 'GADCHIROLI',
       'GONDIA', 'HINGOLI', 'JALGAON', 'JALNA', 'KOLHAPUR', 'LATUR',
       'MUMBAI', 'NAGPUR', 'NANDED', 'NANDURBAR', 'NASHIK', 'OSMANABAD',
       'PALGHAR', 'PARBHANI', 'PUNE', 'RAIGAD', 'RATNAGIRI', 'SANGLI',
       'SATARA', 'SINDHUDURG', 'SOLAPUR', 'THANE', 'WARDHA', 'WASHIM',
       'YAVATMAL', 'MUMBAI SUBURBAN'], dtype=object)