In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet
from sklearn.metrics import mean_squared_error

In [2]:
data = pd.read_csv("../data/HMIS_DATA_CORRECTED_17_21/mh_dist17_21_with_IDs_date_correction.csv")
data = data[(data['indicator_type'] == 'Total [(A+B) or (C+D)]')]
data['date'] = pd.to_datetime(data['date'])
data = data.set_index('date')
data.index = pd.DatetimeIndex(data.index)

In [3]:
def prophet_forecast(
    series,
    district_name,
    forecast_periods=12,
    yearly_seasonality='auto',
    weekly_seasonality=False,
    daily_seasonality=False,
    changepoint_prior_scale=0.05,
    seasonality_prior_scale=10.0,
    holidays=None,
    plot_forecast=True
):
    """
    Prophet time series forecasting with error handling and RMSE calculation.
    Returns (metrics, forecast_df) or (None, None) on error.
    """
    try:
        # 0. Directory setup
        os.makedirs('Prophet_Forecasts', exist_ok=True)

        # 1. Prepare DataFrame for Prophet
        df = pd.DataFrame({'ds': series.index, 'y': series.values})
        df = df.dropna()
        if len(df) < forecast_periods + 12:
            raise ValueError(f"Series too short ({len(df)} points) for {forecast_periods}-step forecast.")

        # 2. Train/test split
        train = df.iloc[:-forecast_periods]
        test = df.iloc[-forecast_periods:]

        # 3. Model setup
        model = Prophet(
            yearly_seasonality=yearly_seasonality,
            weekly_seasonality=weekly_seasonality,
            daily_seasonality=daily_seasonality,
            changepoint_prior_scale=changepoint_prior_scale,
            seasonality_prior_scale=seasonality_prior_scale,
            holidays=holidays
        )
        model.fit(train)

        # 4. Forecast
        future = model.make_future_dataframe(periods=forecast_periods, freq='MS')
        forecast = model.predict(future)

        # 5. Extract forecasted values for test period
        forecast_test = forecast.set_index('ds').loc[test['ds']]
        y_pred = forecast_test['yhat'].values
        y_test = test['y'].values

        # 6. Metrics
        rmse = np.sqrt(mean_squared_error(y_test, y_pred))
        metrics = {
            'district': district_name,
            'rmse': rmse,
            'forecast_periods': forecast_periods
        }

        # 7. Save forecast DataFrame
        forecast_df = pd.DataFrame({
            'district': district_name,
            'date': test['ds'].values,
            'actual': y_test,
            'forecast': y_pred
        })
        forecast_df.to_csv(
            os.path.join('Prophet_Forecasts', f'{district_name}_forecast.csv'),
            index=False
        )

        # 8. Plotting
        if plot_forecast:
            plt.figure(figsize=(12, 6))
            plt.plot(df['ds'], df['y'], label='Original')
            plt.plot(forecast_df['date'], forecast_df['forecast'], label='Prophet Forecast', color='crimson')
            plt.scatter(forecast_df['date'], forecast_df['actual'], label='Actual (Test)', color='black')
            plt.title(f'{district_name} Prophet Forecast (RMSE: {rmse:.2f})')
            plt.legend()
            plt.savefig(os.path.join('Prophet_Forecasts', f'{district_name}_forecast_plot.png'))
            plt.close()

        return metrics, forecast_df

    except Exception as e:
        print(f"⚠️ Skipping {district_name}: {str(e)}")
        return None, None

In [4]:
districts = data['district'].unique()
rmse_values = []

# Usage Example


def run_for_each_district():
    results = {}
    
    for district in districts:
        district_data = data[data['district'] == district]
        ts = district_data["I48"].asfreq('MS')
        
        metrics, forecast = prophet_forecast(
            series=ts,
            district_name=district,
            forecast_periods=6,  # Forecast the last year
            yearly_seasonality='auto',
            weekly_seasonality=False,
            daily_seasonality=False
        )

        
    
        # Show results
        print("=== Metrics ===")
        if metrics:
            print(metrics)
        print("\n=== Forecast Data ===")
        print(forecast)
    
    return results
run_for_each_district()

12:14:09 - cmdstanpy - INFO - Chain [1] start processing
12:14:09 - cmdstanpy - INFO - Chain [1] done processing
12:14:10 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'AHMEDNAGAR', 'rmse': 106.6501375384711, 'forecast_periods': 6}

=== Forecast Data ===
     district       date  actual    forecast
0  AHMEDNAGAR 2020-10-01     343  482.805534
1  AHMEDNAGAR 2020-11-01     468  423.752666
2  AHMEDNAGAR 2020-12-01     305  452.589230
3  AHMEDNAGAR 2021-01-01     372  449.668904
4  AHMEDNAGAR 2021-02-01     301  218.303959
5  AHMEDNAGAR 2021-03-01     316  425.947361


12:14:10 - cmdstanpy - INFO - Chain [1] done processing
12:14:11 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'AKOLA', 'rmse': 156.37399207386676, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual    forecast
0    AKOLA 2020-10-01     493  636.332823
1    AKOLA 2020-11-01     514  758.265970
2    AKOLA 2020-12-01     450  302.838632
3    AKOLA 2021-01-01     481  288.561529
4    AKOLA 2021-02-01     416  497.974632
5    AKOLA 2021-03-01     363  396.134821


12:14:11 - cmdstanpy - INFO - Chain [1] done processing
12:14:12 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'AMRAVATI', 'rmse': 400.23160036395007, 'forecast_periods': 6}

=== Forecast Data ===
   district       date  actual     forecast
0  AMRAVATI 2020-10-01     819  1678.957293
1  AMRAVATI 2020-11-01     609   437.222143
2  AMRAVATI 2020-12-01     634   451.702797
3  AMRAVATI 2021-01-01     472   339.191007
4  AMRAVATI 2021-02-01     753   377.651510
5  AMRAVATI 2021-03-01     457   474.911935


12:14:12 - cmdstanpy - INFO - Chain [1] done processing
12:14:12 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'AURANGABAD', 'rmse': 194.37668875397003, 'forecast_periods': 6}

=== Forecast Data ===
     district       date  actual    forecast
0  AURANGABAD 2020-10-01     889  817.846579
1  AURANGABAD 2020-11-01     837  391.295946
2  AURANGABAD 2020-12-01     522  498.600970
3  AURANGABAD 2021-01-01     685  547.167567
4  AURANGABAD 2021-02-01     477  530.545170
5  AURANGABAD 2021-03-01     489  512.801059


12:14:13 - cmdstanpy - INFO - Chain [1] done processing
12:14:13 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'BEED', 'rmse': 310.44790842604453, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0     BEED 2020-10-01     495  1125.884462
1     BEED 2020-11-01     472   321.383994
2     BEED 2020-12-01     561   252.198285
3     BEED 2021-01-01     439   631.731763
4     BEED 2021-02-01     414   310.047699
5     BEED 2021-03-01     439   558.402368


12:14:14 - cmdstanpy - INFO - Chain [1] done processing
12:14:14 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'BHANDARA', 'rmse': 80.70222803738089, 'forecast_periods': 6}

=== Forecast Data ===
   district       date  actual    forecast
0  BHANDARA 2020-10-01     397  336.622188
1  BHANDARA 2020-11-01     285  415.853638
2  BHANDARA 2020-12-01     316  193.108517
3  BHANDARA 2021-01-01     194  239.967515
4  BHANDARA 2021-02-01     173  204.091965
5  BHANDARA 2021-03-01     225  236.265153


12:14:14 - cmdstanpy - INFO - Chain [1] done processing
12:14:15 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'BULDHANA', 'rmse': 62.14800743773779, 'forecast_periods': 6}

=== Forecast Data ===
   district       date  actual    forecast
0  BULDHANA 2020-10-01     280  274.712185
1  BULDHANA 2020-11-01     328  265.423828
2  BULDHANA 2020-12-01     268  176.564683
3  BULDHANA 2021-01-01     168  250.087176
4  BULDHANA 2021-02-01     139  198.299828
5  BULDHANA 2021-03-01     195  219.805633


12:14:15 - cmdstanpy - INFO - Chain [1] done processing
12:14:15 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'CHANDRAPUR', 'rmse': 76.723385470598, 'forecast_periods': 6}

=== Forecast Data ===
     district       date  actual    forecast
0  CHANDRAPUR 2020-10-01     628  750.447835
1  CHANDRAPUR 2020-11-01     562  490.850288
2  CHANDRAPUR 2020-12-01     467  452.554548
3  CHANDRAPUR 2021-01-01     385  271.752516
4  CHANDRAPUR 2021-02-01     422  374.799842
5  CHANDRAPUR 2021-03-01     385  386.262868


12:14:16 - cmdstanpy - INFO - Chain [1] done processing
12:14:16 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'DHULE', 'rmse': 173.2620541688867, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual    forecast
0    DHULE 2020-10-01     167  387.123922
1    DHULE 2020-11-01     356  149.701990
2    DHULE 2020-12-01     294  211.899727
3    DHULE 2021-01-01     398  160.109222
4    DHULE 2021-02-01     333  173.279384
5    DHULE 2021-03-01     368  351.817459


12:14:17 - cmdstanpy - INFO - Chain [1] done processing
12:14:17 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'GADCHIROLI', 'rmse': 62.61081819021534, 'forecast_periods': 6}

=== Forecast Data ===
     district       date  actual    forecast
0  GADCHIROLI 2020-10-01     431  486.663194
1  GADCHIROLI 2020-11-01     434  386.019970
2  GADCHIROLI 2020-12-01     399  351.753207
3  GADCHIROLI 2021-01-01     360  381.865012
4  GADCHIROLI 2021-02-01     235  359.079846
5  GADCHIROLI 2021-03-01     272  275.750513


12:14:18 - cmdstanpy - INFO - Chain [1] done processing
12:14:18 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'GONDIA', 'rmse': 40.64717646206007, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual    forecast
0   GONDIA 2020-10-01     335  396.151382
1   GONDIA 2020-11-01     425  424.247847
2   GONDIA 2020-12-01     365  305.197159
3   GONDIA 2021-01-01     314  364.010362
4   GONDIA 2021-02-01     254  257.396281
5   GONDIA 2021-03-01     336  326.826691


12:14:18 - cmdstanpy - INFO - Chain [1] done processing
12:14:19 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'HINGOLI', 'rmse': 93.82287440465291, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual    forecast
0  HINGOLI 2020-10-01     164  156.801792
1  HINGOLI 2020-11-01     125   62.856558
2  HINGOLI 2020-12-01      79  242.789449
3  HINGOLI 2021-01-01     296  171.443876
4  HINGOLI 2021-02-01     102   24.647635
5  HINGOLI 2021-03-01     126  150.045143


12:14:19 - cmdstanpy - INFO - Chain [1] done processing
12:14:19 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'JALGAON', 'rmse': 164.97288818919108, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual    forecast
0  JALGAON 2020-10-01     415  551.332991
1  JALGAON 2020-11-01     469  270.460909
2  JALGAON 2020-12-01     593  331.592190
3  JALGAON 2021-01-01     340  309.230072
4  JALGAON 2021-02-01     239  286.238372
5  JALGAON 2021-03-01     243  426.792199


12:14:19 - cmdstanpy - INFO - Chain [1] done processing
12:14:20 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'JALNA', 'rmse': 197.39093678704927, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual    forecast
0    JALNA 2020-10-01     226  173.314507
1    JALNA 2020-11-01     211  532.681316
2    JALNA 2020-12-01     163  502.842852
3    JALNA 2021-01-01     184  188.882673
4    JALNA 2021-02-01     166  210.717265
5    JALNA 2021-03-01     149  249.039108


12:14:20 - cmdstanpy - INFO - Chain [1] done processing
12:14:20 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'KOLHAPUR', 'rmse': 139.77128242749967, 'forecast_periods': 6}

=== Forecast Data ===
   district       date  actual    forecast
0  KOLHAPUR 2020-10-01     332  478.963591
1  KOLHAPUR 2020-11-01     381  372.816476
2  KOLHAPUR 2020-12-01     365  483.579270
3  KOLHAPUR 2021-01-01     395  437.919784
4  KOLHAPUR 2021-02-01     621  339.793175
5  KOLHAPUR 2021-03-01     408  431.882460


12:14:20 - cmdstanpy - INFO - Chain [1] done processing
12:14:21 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'LATUR', 'rmse': 129.37282362103704, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual    forecast
0    LATUR 2020-10-01     448  341.904598
1    LATUR 2020-11-01     462  328.383674
2    LATUR 2020-12-01     367  340.625667
3    LATUR 2021-01-01     570  352.370142
4    LATUR 2021-02-01     322  431.512312
5    LATUR 2021-03-01     401  294.872243


12:14:21 - cmdstanpy - INFO - Chain [1] done processing
12:14:22 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'MUMBAI', 'rmse': 839.7326216228543, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0   MUMBAI 2020-10-01     763  2100.714937
1   MUMBAI 2020-11-01     583  1667.102454
2   MUMBAI 2020-12-01     934  1165.036955
3   MUMBAI 2021-01-01     474  1032.632331
4   MUMBAI 2021-02-01     543  1059.347687
5   MUMBAI 2021-03-01     484  1280.293150


12:14:22 - cmdstanpy - INFO - Chain [1] done processing
12:14:22 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'NAGPUR', 'rmse': 290.4775844874021, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0   NAGPUR 2020-10-01     603  1146.599296
1   NAGPUR 2020-11-01     560   801.481782
2   NAGPUR 2020-12-01     819   857.843563
3   NAGPUR 2021-01-01     599   849.256696
4   NAGPUR 2021-02-01     374   657.373589
5   NAGPUR 2021-03-01     416   505.509158


12:14:22 - cmdstanpy - INFO - Chain [1] done processing
12:14:23 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'NANDED', 'rmse': 135.8404733144762, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual    forecast
0   NANDED 2020-10-01     391  601.665380
1   NANDED 2020-11-01     412  264.018355
2   NANDED 2020-12-01     334  310.339710
3   NANDED 2021-01-01     287  341.759331
4   NANDED 2021-02-01     220  412.780424
5   NANDED 2021-03-01     285  345.947928


12:14:23 - cmdstanpy - INFO - Chain [1] done processing
12:14:24 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'NANDURBAR', 'rmse': 83.58735033075683, 'forecast_periods': 6}

=== Forecast Data ===
    district       date  actual    forecast
0  NANDURBAR 2020-10-01     568  696.688143
1  NANDURBAR 2020-11-01     511  461.874774
2  NANDURBAR 2020-12-01     505  489.406202
3  NANDURBAR 2021-01-01     342  386.311508
4  NANDURBAR 2021-02-01     363  254.256482
5  NANDURBAR 2021-03-01     354  448.420950


12:14:24 - cmdstanpy - INFO - Chain [1] done processing
12:14:24 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'NASHIK', 'rmse': 79.7734131875089, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0   NASHIK 2020-10-01    1264  1332.631543
1   NASHIK 2020-11-01    1037  1000.017896
2   NASHIK 2020-12-01     976   833.880527
3   NASHIK 2021-01-01     882   882.804580
4   NASHIK 2021-02-01     909  1006.577146
5   NASHIK 2021-03-01    1000   951.164268


12:14:24 - cmdstanpy - INFO - Chain [1] done processing
12:14:25 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'OSMANABAD', 'rmse': 23.155860064610145, 'forecast_periods': 6}

=== Forecast Data ===
    district       date  actual    forecast
0  OSMANABAD 2020-10-01     248  227.337847
1  OSMANABAD 2020-11-01     189  183.888931
2  OSMANABAD 2020-12-01     200  203.424106
3  OSMANABAD 2021-01-01     138  183.213244
4  OSMANABAD 2021-02-01     133  159.355124
5  OSMANABAD 2021-03-01     169  172.682541


12:14:25 - cmdstanpy - INFO - Chain [1] done processing
12:14:25 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'PALGHAR', 'rmse': 170.5264888553059, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual    forecast
0  PALGHAR 2020-10-01    1020  907.426234
1  PALGHAR 2020-11-01    1006  721.636738
2  PALGHAR 2020-12-01     948  687.648799
3  PALGHAR 2021-01-01     784  876.049307
4  PALGHAR 2021-02-01     589  615.490658
5  PALGHAR 2021-03-01     678  614.890528


12:14:26 - cmdstanpy - INFO - Chain [1] done processing
12:14:26 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'PARBHANI', 'rmse': 42.66379000686876, 'forecast_periods': 6}

=== Forecast Data ===
   district       date  actual    forecast
0  PARBHANI 2020-10-01     315  295.079389
1  PARBHANI 2020-11-01     205  216.498113
2  PARBHANI 2020-12-01     179  221.448415
3  PARBHANI 2021-01-01     287  196.737805
4  PARBHANI 2021-02-01     196  197.285234
5  PARBHANI 2021-03-01     212  190.991123


12:14:26 - cmdstanpy - INFO - Chain [1] done processing
12:14:27 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'PUNE', 'rmse': 535.2524760248847, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0     PUNE 2020-10-01    1349  2197.360604
1     PUNE 2020-11-01    1578  1690.678253
2     PUNE 2020-12-01    1364  2035.420307
3     PUNE 2021-01-01    1384  1440.486184
4     PUNE 2021-02-01    1070  1632.666378
5     PUNE 2021-03-01    1211  1675.725516


12:14:27 - cmdstanpy - INFO - Chain [1] done processing
12:14:27 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'RAIGAD', 'rmse': 54.941081612640595, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual    forecast
0   RAIGAD 2020-10-01     491  422.293383
1   RAIGAD 2020-11-01     465  410.936011
2   RAIGAD 2020-12-01     444  374.947924
3   RAIGAD 2021-01-01     377  387.789012
4   RAIGAD 2021-02-01     298  305.895542
5   RAIGAD 2021-03-01     261  335.301338


12:14:27 - cmdstanpy - INFO - Chain [1] done processing
12:14:28 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'RATNAGIRI', 'rmse': 68.05068958705064, 'forecast_periods': 6}

=== Forecast Data ===
    district       date  actual    forecast
0  RATNAGIRI 2020-10-01     253  271.058515
1  RATNAGIRI 2020-11-01     257  254.380960
2  RATNAGIRI 2020-12-01     209  246.841081
3  RATNAGIRI 2021-01-01     184  305.160177
4  RATNAGIRI 2021-02-01     197  202.605735
5  RATNAGIRI 2021-03-01     151  257.344951


12:14:28 - cmdstanpy - INFO - Chain [1] done processing
12:14:28 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'SANGLI', 'rmse': 300.6581405666083, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual    forecast
0   SANGLI 2020-10-01     373  821.608054
1   SANGLI 2020-11-01     410  645.105860
2   SANGLI 2020-12-01     432  506.080903
3   SANGLI 2021-01-01     300  614.696290
4   SANGLI 2021-02-01     294  643.484338
5   SANGLI 2021-03-01     339  582.283611


12:14:29 - cmdstanpy - INFO - Chain [1] done processing
12:14:29 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'SATARA', 'rmse': 165.68781791479967, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual    forecast
0   SATARA 2020-10-01     494  615.978643
1   SATARA 2020-11-01     518  482.222099
2   SATARA 2020-12-01     814  466.398884
3   SATARA 2021-01-01     577  440.632257
4   SATARA 2021-02-01     423  365.956737
5   SATARA 2021-03-01     401  477.676200


12:14:29 - cmdstanpy - INFO - Chain [1] done processing
12:14:30 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'SINDHUDURG', 'rmse': 21.942750176560896, 'forecast_periods': 6}

=== Forecast Data ===
     district       date  actual    forecast
0  SINDHUDURG 2020-10-01     144  125.074494
1  SINDHUDURG 2020-11-01     117  136.137492
2  SINDHUDURG 2020-12-01     117  140.060542
3  SINDHUDURG 2021-01-01      86   97.853478
4  SINDHUDURG 2021-02-01      90   83.103601
5  SINDHUDURG 2021-03-01      89  127.008334


12:14:30 - cmdstanpy - INFO - Chain [1] done processing
12:14:30 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'SOLAPUR', 'rmse': 174.77163854639127, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual    forecast
0  SOLAPUR 2020-10-01     535  820.273124
1  SOLAPUR 2020-11-01     622  634.937863
2  SOLAPUR 2020-12-01     481  470.250844
3  SOLAPUR 2021-01-01     416  547.784259
4  SOLAPUR 2021-02-01     453  732.775261
5  SOLAPUR 2021-03-01     595  672.238449


12:14:31 - cmdstanpy - INFO - Chain [1] done processing
12:14:31 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'THANE', 'rmse': 315.6065334415665, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual     forecast
0    THANE 2020-10-01    1156  1387.450202
1    THANE 2020-11-01     848  1524.620795
2    THANE 2020-12-01    1031   971.619402
3    THANE 2021-01-01     949   765.997246
4    THANE 2021-02-01     620   665.984198
5    THANE 2021-03-01     686   903.093061


12:14:31 - cmdstanpy - INFO - Chain [1] done processing
12:14:32 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'WARDHA', 'rmse': 56.47026437550319, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual    forecast
0   WARDHA 2020-10-01     241  256.379481
1   WARDHA 2020-11-01     231  206.683436
2   WARDHA 2020-12-01     310  189.110606
3   WARDHA 2021-01-01     166  216.197013
4   WARDHA 2021-02-01     150  146.574811
5   WARDHA 2021-03-01     125  159.055883


12:14:32 - cmdstanpy - INFO - Chain [1] done processing
12:14:32 - cmdstanpy - INFO - Chain [1] start processing


=== Metrics ===
{'district': 'WASHIM', 'rmse': 34.83117651237075, 'forecast_periods': 6}

=== Forecast Data ===
  district       date  actual    forecast
0   WASHIM 2020-10-01     113  125.591575
1   WASHIM 2020-11-01     154  100.959112
2   WASHIM 2020-12-01     123  166.693059
3   WASHIM 2021-01-01     105  131.821740
4   WASHIM 2021-02-01     100   61.441754
5   WASHIM 2021-03-01      99   85.138033


12:14:32 - cmdstanpy - INFO - Chain [1] done processing


=== Metrics ===
{'district': 'YAVATMAL', 'rmse': 61.44487250223455, 'forecast_periods': 6}

=== Forecast Data ===
   district       date  actual    forecast
0  YAVATMAL 2020-10-01     182  252.315408
1  YAVATMAL 2020-11-01     174  233.365683
2  YAVATMAL 2020-12-01     163  191.676978
3  YAVATMAL 2021-01-01     146  196.666520
4  YAVATMAL 2021-02-01     140  179.917871
5  YAVATMAL 2021-03-01     161  256.923884
⚠️ Skipping MUMBAI SUBURBAN: Series too short (12 points) for 6-step forecast.
=== Metrics ===

=== Forecast Data ===
None


{}

In [5]:
districts

array(['AHMEDNAGAR', 'AKOLA', 'AMRAVATI', 'AURANGABAD', 'BEED',
       'BHANDARA', 'BULDHANA', 'CHANDRAPUR', 'DHULE', 'GADCHIROLI',
       'GONDIA', 'HINGOLI', 'JALGAON', 'JALNA', 'KOLHAPUR', 'LATUR',
       'MUMBAI', 'NAGPUR', 'NANDED', 'NANDURBAR', 'NASHIK', 'OSMANABAD',
       'PALGHAR', 'PARBHANI', 'PUNE', 'RAIGAD', 'RATNAGIRI', 'SANGLI',
       'SATARA', 'SINDHUDURG', 'SOLAPUR', 'THANE', 'WARDHA', 'WASHIM',
       'YAVATMAL', 'MUMBAI SUBURBAN'], dtype=object)