In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from sklearn.linear_model import BayesianRidge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit


In [2]:
data = pd.read_csv("../data/HMIS_DATA_CORRECTED_17_21/mh_dist17_21_with_IDs_date_correction.csv")
data = data[(data['indicator_type'] == 'Total [(A+B) or (C+D)]')]
data['date'] = pd.to_datetime(data['date'])
data = data.set_index('date')
data.index = pd.DatetimeIndex(data.index)

In [3]:
def bayesian_ridge_regression_lags_only(
    series,
    district_name,
    max_lags=3,
    differencing=True,
    n_iter=300,
    alpha_1=1e-6,
    alpha_2=1e-6,
    lambda_1=1e-6,
    lambda_2=1e-6,
    compute_scores=True,
    random_state=None
):
    """
    Bayesian Ridge Regression for time series forecasting with:
    - Probabilistic uncertainty estimates
    - Automatic complexity control
    - Lag features only
    """
    # Create directory structure
    os.makedirs('BayesianRidgeRegression', exist_ok=True)
    
    # 1. Stationarity and Differencing
    original_series = series.copy()
    d = 0
    if differencing:
        adf_result = adfuller(series.dropna())
        if adf_result[1] > 0.05:
            d = 1
            series = series.diff().dropna()

    # 2. Feature Engineering - Lag features
    df = pd.DataFrame({'y': series.astype(float)})
    for lag in range(1, max_lags + 1):
        df[f'lag_{lag}'] = df['y'].shift(lag).astype(float)
    df = df.dropna()

    # 3. Train-Test Split
    train_size = int(len(df) * 0.8)
    train = df.iloc[:train_size]
    test = df.iloc[train_size:]

    X_train = train.drop(columns=['y'])
    y_train = train['y']
    X_test = test.drop(columns=['y'])
    y_test = test['y']

    # 4. Model Training with Bayesian Ridge
    model = BayesianRidge(
        n_iter=n_iter,
        alpha_1=alpha_1,
        alpha_2=alpha_2,
        lambda_1=lambda_1,
        lambda_2=lambda_2,
        compute_score=compute_scores,
    )
    model.fit(X_train, y_train)

    # 5. Forecasting with Uncertainty
    pred_test, pred_test_std = model.predict(X_test, return_std=True)

    # 6. Inverse Differencing
    if d == 1:
        last_train_value = original_series.iloc[len(original_series) - len(test) - 1]
        pred_test = np.cumsum(pred_test) + last_train_value
        y_test = original_series.iloc[-len(test):]

    # 7. Create forecast DataFrame with uncertainty
    forecast_df = pd.DataFrame({
        'district': district_name,
        'date': y_test.index,
        'actual': y_test.values,
        'forecast_mean': pred_test,
        'forecast_std': pred_test_std
    })

    # 8. Save forecasts
    forecast_csv_path = os.path.join('BayesianRidgeRegression', 'bayesian_forecasts.csv')
    write_header = not os.path.exists(forecast_csv_path)
    forecast_df.to_csv(forecast_csv_path, mode='a', header=write_header, index=False)

    # 9. Calculate metrics and uncertainty statistics
    rmse = np.sqrt(mean_squared_error(forecast_df['actual'], forecast_df['forecast_mean']))
    log_marginal_likelihood = model.scores_[-1] if compute_scores else np.nan
    
    # Calculate coefficient statistics
    coef_means = model.coef_
    coef_stds = np.sqrt(np.diag(model.sigma_))
    
    metrics_df = pd.DataFrame([{
        'district': district_name,
        'rmse': rmse,
        'log_marginal_likelihood': log_marginal_likelihood,
        'coefficient_means': coef_means.tolist(),
        'coefficient_stds': coef_stds.tolist(),
        'intercept_mean': model.intercept_,
        'intercept_std': np.nan,
        'effective_params': model.lambda_ / model.alpha_,
        'differencing': d,
        'n_iter': n_iter
    }])

    metrics_csv_path = os.path.join('BayesianRidgeRegression', 'bayesian_metrics.csv')
    write_header_metrics = not os.path.exists(metrics_csv_path)
    metrics_df.to_csv(metrics_csv_path, mode='a', header=write_header_metrics, index=False)

    # 10. Plotting with uncertainty bands
    plt.figure(figsize=(12, 6))
    plt.plot(original_series.index, original_series.values, label='Original Series')
    plt.plot(forecast_df['date'], forecast_df['forecast_mean'], label='Mean Forecast', color='darkorange')
    plt.fill_between(
        forecast_df['date'],
        forecast_df['forecast_mean'] - 1.96*forecast_df['forecast_std'],
        forecast_df['forecast_mean'] + 1.96*forecast_df['forecast_std'],
        color='orange',
        alpha=0.2,
        label='95% Confidence'
    )
    plt.title(
        f'Bayesian Ridge Forecast for {district_name}\n'
        f'RMSE: {rmse:.2f} | Log ML: {log_marginal_likelihood:.2f}'
    )
    plt.legend()
    plt.savefig(os.path.join('BayesianRidgeRegression', f'bayesian_forecast_{district_name}.png'))
    plt.close()

    return {
        'district': district_name,
        'forecast_df': forecast_df,
        'metrics_df': metrics_df,
        'model': model,
        'coef_means': coef_means,
        'coef_stds': coef_stds,
        'intercept_mean': model.intercept_,
        'intercept_std': np.nan
    }

In [4]:
districts = data['district'].unique()
rmse_values = []

def run_for_each_district():
    results = {}
    
    for district in districts:
        district_data = data[data['district'] == district]
        ts = district_data["I40"].asfreq('MS')
        
        results = bayesian_ridge_regression_lags_only(
            ts,
            district,
            max_lags=4,
            n_iter=500,
            alpha_1=1e-5,
            lambda_1=1e-3
        )
        
    
        # Show results
        print("=== Metrics ===")
        print(results['metrics_df'])
        print("\n=== Forecast Data ===")
        print(results['forecast_df'].head())
    
    return results
run_for_each_district()



=== Metrics ===
     district       rmse  log_marginal_likelihood  \
0  AHMEDNAGAR  15.812603              -133.291919   

                                   coefficient_means  \
0  [0.0034438916536789622, -0.0001901844947154823...   

                                    coefficient_stds  intercept_mean  \
0  [0.018740734927776857, 0.018737301824296237, 0...       43.700926   

   intercept_std  effective_params  differencing  n_iter  
0            NaN     333996.938669             0     500  

=== Forecast Data ===
              district       date  actual  forecast_mean  forecast_std
date                                                                  
2020-07-01  AHMEDNAGAR 2020-07-01    40.0      43.936148     10.987037
2020-08-01  AHMEDNAGAR 2020-08-01    55.0      43.933439     10.987042
2020-09-01  AHMEDNAGAR 2020-09-01    36.0      43.993783     11.017748
2020-10-01  AHMEDNAGAR 2020-10-01    82.0      43.931347     11.017671
2020-11-01  AHMEDNAGAR 2020-11-01    49.0      44.11



=== Metrics ===
  district       rmse  log_marginal_likelihood  \
0    AKOLA  11.008634              -132.707331   

                                   coefficient_means  \
0  [0.14113531980791238, 0.13091907469375633, 0.0...   

                                    coefficient_stds  intercept_mean  \
0  [0.1121157745601413, 0.11301857421922183, 0.11...       33.628315   

   intercept_std  effective_params  differencing  n_iter  
0            NaN       4861.023467             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01    AKOLA 2020-07-01    34.0      45.406104     12.679692
2020-08-01    AKOLA 2020-08-01    45.0      44.619256     12.687100
2020-09-01    AKOLA 2020-09-01    54.0      45.696574     12.781596
2020-10-01    AKOLA 2020-10-01    48.0      48.289136     13.205992
2020-11-01    AKOLA 2020-11-01    39.0      48.656983     13.443394




=== Metrics ===
   district      rmse  log_marginal_likelihood  \
0  AMRAVATI  17.32472              -132.099504   

                                   coefficient_means  \
0  [0.37920177011948636, 0.26511015658752474, -0....   

                                    coefficient_stds  intercept_mean  \
0  [0.14356654845522648, 0.14814385806708463, 0.1...       39.229023   

   intercept_std  effective_params  differencing  n_iter  
0            NaN       1029.281666             0     500  

=== Forecast Data ===
            district       date  actual  forecast_mean  forecast_std
date                                                                
2020-07-01  AMRAVATI 2020-07-01    62.0      57.714249     13.370725
2020-08-01  AMRAVATI 2020-08-01    70.0      60.721323     14.081303
2020-09-01  AMRAVATI 2020-09-01    98.0      66.272829     15.020391
2020-10-01  AMRAVATI 2020-10-01    55.0      76.450239     17.353481
2020-11-01  AMRAVATI 2020-11-01    45.0      64.828711     16.613416




=== Metrics ===
     district       rmse  log_marginal_likelihood  \
0  AURANGABAD  15.362882              -139.449569   

                                   coefficient_means  \
0  [-0.0013505983532587106, 0.0004629224497920743...   

                                    coefficient_stds  intercept_mean  \
0  [0.013377767864347738, 0.013377766803545986, 0...        0.617857   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      1.187070e+06             1     500  

=== Forecast Data ===
              district       date  actual  forecast_mean  forecast_std
date                                                                  
2020-07-01  AURANGABAD 2020-07-01      53      63.600349     14.629612
2020-08-01  AURANGABAD 2020-08-01      74      64.252453     14.629729
2020-09-01  AURANGABAD 2020-09-01      76      64.836982     14.623581
2020-10-01  AURANGABAD 2020-10-01      56      65.466248     14.623601
2020-11-01  AURANGABAD 2020-11-01      57      66.10



=== Metrics ===
  district      rmse  log_marginal_likelihood  \
0     BEED  8.098236              -132.700277   

                                   coefficient_means  \
0  [0.3387262010446299, 0.0595252116911144, -0.11...   

                                    coefficient_stds  intercept_mean  \
0  [0.13594657896485018, 0.140652817416538, 0.140...       18.704715   

   intercept_std  effective_params  differencing  n_iter  
0            NaN       1947.362481             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01     BEED 2020-07-01    30.0      24.976030     11.628806
2020-08-01     BEED 2020-08-01    30.0      26.934707     11.253439
2020-09-01     BEED 2020-09-01    33.0      28.319367     11.441747
2020-10-01     BEED 2020-10-01    16.0      28.241218     11.446592
2020-11-01     BEED 2020-11-01    18.0      22.706220     11.316344




=== Metrics ===
   district      rmse  log_marginal_likelihood  \
0  BHANDARA  4.649879              -112.174989   

                                   coefficient_means  \
0  [0.10651220504964201, 0.05056035883180311, 0.0...   

                                    coefficient_stds  intercept_mean  \
0  [0.08847487145858714, 0.08863124453956302, 0.0...       16.004326   

   intercept_std  effective_params  differencing  n_iter  
0            NaN        2952.42459             0     500  

=== Forecast Data ===
            district       date  actual  forecast_mean  forecast_std
date                                                                
2020-07-01  BHANDARA 2020-07-01    19.0      18.541506      6.300106
2020-08-01  BHANDARA 2020-08-01    22.0      18.729666      6.439450
2020-09-01  BHANDARA 2020-09-01    28.0      19.200884      6.555192
2020-10-01  BHANDARA 2020-10-01    22.0      20.037197      6.816962
2020-11-01  BHANDARA 2020-11-01    25.0      19.681356      6.912928




=== Metrics ===
   district      rmse  log_marginal_likelihood  \
0  BULDHANA  8.461511              -118.341265   

                                   coefficient_means  \
0  [-0.3451426174635145, 0.07426871902212909, -0....   

                                    coefficient_stds  intercept_mean  \
0  [0.13580478981861357, 0.14195016250009934, 0.1...        0.136243   

   intercept_std  effective_params  differencing  n_iter  
0            NaN        788.554205             1     500  

=== Forecast Data ===
            district       date  actual  forecast_mean  forecast_std
date                                                                
2020-07-01  BULDHANA 2020-07-01      31      26.422366      7.312392
2020-08-01  BULDHANA 2020-08-01      25      23.817429      7.305825
2020-09-01  BULDHANA 2020-09-01      17      27.062669      7.308357
2020-10-01  BULDHANA 2020-10-01      19      28.436527      7.451945
2020-11-01  BULDHANA 2020-11-01      24      28.044809      7.446397




=== Metrics ===
     district       rmse  log_marginal_likelihood  \
0  CHANDRAPUR  14.586615              -125.398391   

                                   coefficient_means  \
0  [0.0010362513095185676, 0.0003596417179671284,...   

                                    coefficient_stds  intercept_mean  \
0  [0.012328292590819983, 0.012325701773783384, 0...       42.869654   

   intercept_std  effective_params  differencing  n_iter  
0            NaN     495788.127691             0     500  

=== Forecast Data ===
              district       date  actual  forecast_mean  forecast_std
date                                                                  
2020-07-01  CHANDRAPUR 2020-07-01    41.0      42.896016      8.750572
2020-08-01  CHANDRAPUR 2020-08-01    37.0      42.893948      8.755687
2020-09-01  CHANDRAPUR 2020-09-01    44.0      42.880530      8.760251
2020-10-01  CHANDRAPUR 2020-10-01    67.0      42.884988      8.763171
2020-11-01  CHANDRAPUR 2020-11-01    47.0      42.91



=== Metrics ===
  district       rmse  log_marginal_likelihood  \
0    DHULE  12.637071              -143.641936   

                                   coefficient_means  \
0  [0.2619950443173953, -0.030704254611393172, 0....   

                                    coefficient_stds  intercept_mean  \
0  [0.13648900009133458, 0.1366657736695784, 0.13...        7.052818   

   intercept_std  effective_params  differencing  n_iter  
0            NaN       2990.304983             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01    DHULE 2020-07-01    24.0      16.024493     13.864484
2020-08-01    DHULE 2020-08-01    31.0      19.460676     14.008295
2020-09-01    DHULE 2020-09-01    27.0      23.274372     14.199148
2020-10-01    DHULE 2020-10-01    50.0      23.201144     14.269799
2020-11-01    DHULE 2020-11-01    25.0      31.895965     15.002754




=== Metrics ===
     district     rmse  log_marginal_likelihood  \
0  GADCHIROLI  9.22512              -123.893131   

                                   coefficient_means  \
0  [0.15727949863090246, 0.08692067858567977, -0....   

                                    coefficient_stds  intercept_mean  \
0  [0.10700702865675726, 0.10769127547166038, 0.1...       26.429435   

   intercept_std  effective_params  differencing  n_iter  
0            NaN       3078.789058             0     500  

=== Forecast Data ===
              district       date  actual  forecast_mean  forecast_std
date                                                                  
2020-07-01  GADCHIROLI 2020-07-01    46.0      29.921771      9.434340
2020-08-01  GADCHIROLI 2020-08-01    44.0      33.281063     10.187424
2020-09-01  GADCHIROLI 2020-09-01    44.0      34.730202     10.575093
2020-10-01  GADCHIROLI 2020-10-01    41.0      33.589709     11.054033
2020-11-01  GADCHIROLI 2020-11-01    40.0      31.867088



=== Metrics ===
  district      rmse  log_marginal_likelihood  \
0   GONDIA  7.087232              -111.585707   

                                   coefficient_means  \
0  [0.14509699400953616, 0.05175306801620433, 0.0...   

                                    coefficient_stds  intercept_mean  \
0  [0.10425725239869542, 0.10547458669756635, 0.1...       16.614842   

   intercept_std  effective_params  differencing  n_iter  
0            NaN        1844.61089             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01   GONDIA 2020-07-01    23.0      20.661400      6.491944
2020-08-01   GONDIA 2020-08-01    21.0      21.349318      6.620575
2020-09-01   GONDIA 2020-09-01    28.0      21.560280      6.716273
2020-10-01   GONDIA 2020-10-01    39.0      22.494707      7.071527
2020-11-01   GONDIA 2020-11-01    30.0      24.287514      7.724299




=== Metrics ===
  district      rmse  log_marginal_likelihood  \
0  HINGOLI  3.721333              -104.825905   

                                   coefficient_means  \
0  [0.2878290032214991, 0.055117705520150974, 0.0...   

                                    coefficient_stds  intercept_mean  \
0  [0.13458151258697384, 0.1400382379711064, 0.13...        9.804733   

   intercept_std  effective_params  differencing  n_iter  
0            NaN        461.405622             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01  HINGOLI 2020-07-01    12.0      11.444302      5.061984
2020-08-01  HINGOLI 2020-08-01    19.0      12.480544      5.198848
2020-09-01  HINGOLI 2020-09-01    14.0      14.454379      5.456525
2020-10-01  HINGOLI 2020-10-01    20.0      14.849739      5.362177
2020-11-01  HINGOLI 2020-11-01    12.0      15.733734      5.731312




=== Metrics ===
  district       rmse  log_marginal_likelihood  \
0  JALGAON  10.481543               -141.19524   

                                   coefficient_means  \
0  [0.32393749034030817, 0.07840786685681195, 0.0...   

                                    coefficient_stds  intercept_mean  \
0  [0.1335236463033284, 0.1398384092691553, 0.139...       18.953918   

   intercept_std  effective_params  differencing  n_iter  
0            NaN        3445.45622             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01  JALGAON 2020-07-01    21.0      32.464236     14.519679
2020-08-01  JALGAON 2020-08-01    25.0      29.467131     14.136946
2020-09-01  JALGAON 2020-09-01    41.0      29.323893     13.988657
2020-10-01  JALGAON 2020-10-01    37.0      34.618840     14.102425
2020-11-01  JALGAON 2020-11-01    39.0      35.180186     14.163238




=== Metrics ===
  district      rmse  log_marginal_likelihood  \
0    JALNA  6.326275              -124.581907   

                                   coefficient_means  \
0  [0.0653980741387751, 0.012573098875019865, 0.0...   

                                    coefficient_stds  intercept_mean  \
0  [0.0723120278047876, 0.07229522347370346, 0.07...       26.528856   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      10980.947119             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01    JALNA 2020-07-01    38.0      29.344179      9.091517
2020-08-01    JALNA 2020-08-01    26.0      30.133182      9.310681
2020-09-01    JALNA 2020-09-01    25.0      29.539154      9.348614
2020-10-01    JALNA 2020-10-01    29.0      29.467279      9.279064
2020-11-01    JALNA 2020-11-01    20.0      29.657571      9.318846




=== Metrics ===
   district      rmse  log_marginal_likelihood  \
0  KOLHAPUR  7.748103              -129.446511   

                                   coefficient_means  \
0  [0.26940173849170357, 0.08297388379240608, 0.1...   

                                    coefficient_stds  intercept_mean  \
0  [0.13292490326879547, 0.1359769094458528, 0.13...       12.840428   

   intercept_std  effective_params  differencing  n_iter  
0            NaN       1815.853752             0     500  

=== Forecast Data ===
            district       date  actual  forecast_mean  forecast_std
date                                                                
2020-07-01  KOLHAPUR 2020-07-01    23.0      23.527194     10.440550
2020-08-01  KOLHAPUR 2020-08-01    20.0      23.566958     10.187382
2020-09-01  KOLHAPUR 2020-09-01    16.0      23.298561     10.149599
2020-10-01  KOLHAPUR 2020-10-01    14.0      20.923419     10.080107
2020-11-01  KOLHAPUR 2020-11-01    15.0      19.810698      9.847635




=== Metrics ===
  district       rmse  log_marginal_likelihood  \
0    LATUR  19.379418              -114.949564   

                                   coefficient_means  \
0  [0.3970633908470405, -0.270046560288058, 0.183...   

                                    coefficient_stds  intercept_mean  \
0  [0.15060929282143437, 0.1560099888053007, 0.15...       20.817782   

   intercept_std  effective_params  differencing  n_iter  
0            NaN        349.226863             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01    LATUR 2020-07-01    16.0      28.810881      8.509726
2020-08-01    LATUR 2020-08-01    58.0      17.771155      8.156455
2020-09-01    LATUR 2020-09-01    35.0      42.389336     11.805695
2020-10-01    LATUR 2020-10-01    38.0      14.334917     11.615260
2020-11-01    LATUR 2020-11-01    14.0      33.793737     11.695879




=== Metrics ===
  district       rmse  log_marginal_likelihood  \
0   MUMBAI  14.421965              -159.860675   

                                   coefficient_means  \
0  [-0.3871752151153364, -0.03250206944848746, 0....   

                                    coefficient_stds  intercept_mean  \
0  [0.14122776854847338, 0.1525913920076086, 0.15...       -4.315317   

   intercept_std  effective_params  differencing  n_iter  
0            NaN       8666.834581             1     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01   MUMBAI 2020-07-01      74      58.366777     26.777763
2020-08-01   MUMBAI 2020-08-01      66      67.354464     26.645202
2020-09-01   MUMBAI 2020-09-01      76      67.269287     24.762149
2020-10-01   MUMBAI 2020-10-01      90      58.375715     24.777942
2020-11-01   MUMBAI 2020-11-01      69      48.335517     24.912592
=== Metr



=== Metrics ===
  district       rmse  log_marginal_likelihood  \
0   NANDED  16.823654              -126.822595   

                                   coefficient_means  \
0  [-0.3419536003887913, -0.1482172542206433, 0.0...   

                                    coefficient_stds  intercept_mean  \
0  [0.13085549138325858, 0.12712293127975624, 0.1...       -1.852721   

   intercept_std  effective_params  differencing  n_iter  
0            NaN       1415.357537             1     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01   NANDED 2020-07-01      26      21.597135      9.454016
2020-08-01   NANDED 2020-08-01      32      22.125180      9.439634
2020-09-01   NANDED 2020-09-01      50      16.134096      9.406423
2020-10-01   NANDED 2020-10-01      21       7.295951      9.674369
2020-11-01   NANDED 2020-11-01      33      12.852403     10.131550




=== Metrics ===
    district      rmse  log_marginal_likelihood  \
0  NANDURBAR  9.280757              -121.501254   

                                   coefficient_means  \
0  [0.4381663191328681, 0.12749251004229764, 0.12...   

                                    coefficient_stds  intercept_mean  \
0  [0.12868654347232059, 0.14315033017331305, 0.1...       24.698258   

   intercept_std  effective_params  differencing  n_iter  
0            NaN          479.4926             0     500  

=== Forecast Data ===
             district       date  actual  forecast_mean  forecast_std
date                                                                 
2020-07-01  NANDURBAR 2020-07-01    42.0      32.012692      9.017355
2020-08-01  NANDURBAR 2020-08-01    48.0      42.764741      8.897922
2020-09-01  NANDURBAR 2020-09-01    52.0      44.227300      9.480054
2020-10-01  NANDURBAR 2020-10-01    44.0      48.833815      9.881574
2020-11-01  NANDURBAR 2020-11-01    56.0      42.989569     10



=== Metrics ===
  district       rmse  log_marginal_likelihood  \
0   NASHIK  15.865218              -144.065429   

                                   coefficient_means  \
0  [0.48364950592274264, 0.043136340190653516, -0...   

                                    coefficient_stds  intercept_mean  \
0  [0.1362809562724647, 0.15042405008818677, 0.15...       54.250084   

   intercept_std  effective_params  differencing  n_iter  
0            NaN        2174.32252             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01   NASHIK 2020-07-01    68.0      93.214081     20.059470
2020-08-01   NASHIK 2020-08-01    94.0      78.509680     20.081105
2020-09-01   NASHIK 2020-09-01   111.0      91.677296     22.172121
2020-10-01   NASHIK 2020-10-01    96.0     104.026528     22.571701
2020-11-01   NASHIK 2020-11-01    87.0      93.632004     21.303253
=== Metr



=== Metrics ===
   district    rmse  log_marginal_likelihood  \
0  PARBHANI  8.5955               -95.896569   

                                   coefficient_means  \
0  [0.6316286202945349, 0.053283039067816354, 0.0...   

                                    coefficient_stds  intercept_mean  \
0  [0.1527481891641121, 0.18320323114517362, 0.18...        4.683864   

   intercept_std  effective_params  differencing  n_iter  
0            NaN         84.930093             0     500  

=== Forecast Data ===
            district       date  actual  forecast_mean  forecast_std
date                                                                
2020-07-01  PARBHANI 2020-07-01    11.0      13.355330      3.901707
2020-08-01  PARBHANI 2020-08-01    33.0      11.119631      3.868494
2020-09-01  PARBHANI 2020-09-01    18.0      24.335759      5.591681
2020-10-01  PARBHANI 2020-10-01    12.0      16.375930      5.383942
2020-11-01  PARBHANI 2020-11-01    21.0      12.495435      5.386597
=== M



=== Metrics ===
  district      rmse  log_marginal_likelihood  \
0   RAIGAD  8.658585              -123.046386   

                                   coefficient_means  \
0  [0.4427715579243928, 0.3358863871217769, 0.014...   

                                    coefficient_stds  intercept_mean  \
0  [0.1444206289280269, 0.1578468889452393, 0.147...       10.219947   

   intercept_std  effective_params  differencing  n_iter  
0            NaN        488.177788             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01   RAIGAD 2020-07-01    21.0      28.977155      8.037925
2020-08-01   RAIGAD 2020-08-01    29.0      26.011993      8.029587
2020-09-01   RAIGAD 2020-09-01    35.0      25.496921      8.288889
2020-10-01   RAIGAD 2020-10-01    32.0      30.129230      8.566703
2020-11-01   RAIGAD 2020-11-01    11.0      32.181761      8.505817
=== Metric



=== Metrics ===
  district     rmse  log_marginal_likelihood  \
0   SANGLI  8.37888              -116.786012   

                                   coefficient_means  \
0  [0.0004152814211863075, -0.0009423365588148197...   

                                    coefficient_stds  intercept_mean  \
0  [0.011328550794957322, 0.01132887539654175, 0....        26.93626   

   intercept_std  effective_params  differencing  n_iter  
0            NaN     359255.434894             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01   SANGLI 2020-07-01    24.0      26.915191      6.821225
2020-08-01   SANGLI 2020-08-01    16.0      26.925951      6.821229
2020-09-01   SANGLI 2020-09-01    13.0      26.914686      6.819880
2020-10-01   SANGLI 2020-10-01    28.0      26.921037      6.816917
2020-11-01   SANGLI 2020-11-01    34.0      26.929571      6.822169
=== Metrics 



=== Metrics ===
     district      rmse  log_marginal_likelihood  \
0  SINDHUDURG  2.615488               -81.967406   

                                   coefficient_means  \
0  [0.0250464623192942, -0.053205164673277995, -0...   

                                    coefficient_stds  intercept_mean  \
0  [0.0924809080654782, 0.09243009612712637, 0.09...        6.962193   

   intercept_std  effective_params  differencing  n_iter  
0            NaN        500.349893             0     500  

=== Forecast Data ===
              district       date  actual  forecast_mean  forecast_std
date                                                                  
2020-07-01  SINDHUDURG 2020-07-01     9.0       6.010306      2.794082
2020-08-01  SINDHUDURG 2020-08-01     3.0       6.037628      2.837813
2020-09-01  SINDHUDURG 2020-09-01    10.0       5.602504      2.752610
2020-10-01  SINDHUDURG 2020-10-01     4.0       5.744584      2.926900
2020-11-01  SINDHUDURG 2020-11-01     6.0       5.7416



=== Metrics ===
  district       rmse  log_marginal_likelihood  \
0    THANE  14.221095              -140.756733   

                                   coefficient_means  \
0  [-0.01405606507310405, 0.007412603816146889, -...   

                                    coefficient_stds  intercept_mean  \
0  [0.037460683207906194, 0.03742442205847069, 0....       -0.411633   

   intercept_std  effective_params  differencing  n_iter  
0            NaN     155900.065241             1     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01    THANE 2020-07-01      98      74.797416     15.165146
2020-08-01    THANE 2020-08-01      79      73.987692     15.186288
2020-09-01    THANE 2020-09-01      76      74.016729     15.199416
2020-10-01    THANE 2020-10-01      68      73.494681     15.197907
2020-11-01    THANE 2020-11-01      78      73.129611     15.194620
=== Metr



=== Metrics ===
  district     rmse  log_marginal_likelihood  \
0   WASHIM  4.20541               -95.022618   

                                   coefficient_means  \
0  [0.09002989152908955, -0.021455599763839917, -...   

                                    coefficient_stds  intercept_mean  \
0  [0.0898317634579432, 0.09200730096592073, 0.09...        8.789563   

   intercept_std  effective_params  differencing  n_iter  
0            NaN       1145.134649             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01   WASHIM 2020-07-01    13.0       9.033511      4.044284
2020-08-01   WASHIM 2020-08-01    18.0       8.901882      4.172299
2020-09-01   WASHIM 2020-09-01     7.0       9.053828      4.452133
2020-10-01   WASHIM 2020-10-01    10.0       8.120569      4.216252
2020-11-01   WASHIM 2020-11-01     5.0       8.363911      4.214847




=== Metrics ===
   district      rmse  log_marginal_likelihood  \
0  YAVATMAL  6.683218              -114.460736   

                                   coefficient_means  \
0  [0.41494245675227354, 0.22008702384187856, -0....   

                                    coefficient_stds  intercept_mean  \
0  [0.14706047505338046, 0.15784777884710585, 0.1...        4.836494   

   intercept_std  effective_params  differencing  n_iter  
0            NaN          439.1632             0     500  

=== Forecast Data ===
            district       date  actual  forecast_mean  forecast_std
date                                                                
2020-07-01  YAVATMAL 2020-07-01     3.0       6.308960      6.372975
2020-08-01  YAVATMAL 2020-08-01     7.0       6.023115      5.981038
2020-09-01  YAVATMAL 2020-09-01     8.0       8.382488      5.997124
2020-10-01  YAVATMAL 2020-10-01    23.0       9.543263      5.995167
2020-11-01  YAVATMAL 2020-11-01     6.0      15.786776      6.582617
=



{'district': 'MUMBAI SUBURBAN',
 'forecast_df':                    district       date  actual  forecast_mean  forecast_std
 date                                                                       
 2021-02-01  MUMBAI SUBURBAN 2021-02-01      28      29.585505      9.751985
 2021-03-01  MUMBAI SUBURBAN 2021-03-01      28      35.514140      8.665135,
 'metrics_df':           district      rmse  log_marginal_likelihood  \
 0  MUMBAI SUBURBAN  5.430291               -19.325982   
 
                                    coefficient_means  \
 0  [-0.6332501940717783, 0.20732265664917227, -0....   
 
                                     coefficient_stds  intercept_mean  \
 0  [0.3144703711635698, 0.3879400634302669, 0.431...       -5.889937   
 
    intercept_std  effective_params  differencing  n_iter  
 0            NaN        156.642563             1     500  ,
 'model': BayesianRidge(alpha_1=1e-05, compute_score=True, lambda_1=0.001, n_iter=500),
 'coef_means': array([-0.63325019,  0.2