In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from sklearn.linear_model import BayesianRidge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import TimeSeriesSplit


In [2]:
data = pd.read_csv("../data/HMIS_DATA_CORRECTED_17_21/mh_dist17_21_with_IDs_date_correction.csv")
data = data[(data['indicator_type'] == 'Total [(A+B) or (C+D)]')]
data['date'] = pd.to_datetime(data['date'])
data = data.set_index('date')
data.index = pd.DatetimeIndex(data.index)

In [3]:
def bayesian_ridge_regression_lags_only(
    series,
    district_name,
    max_lags=3,
    differencing=True,
    n_iter=300,
    alpha_1=1e-6,
    alpha_2=1e-6,
    lambda_1=1e-6,
    lambda_2=1e-6,
    compute_scores=True,
    random_state=None
):
    """
    Bayesian Ridge Regression for time series forecasting with:
    - Probabilistic uncertainty estimates
    - Automatic complexity control
    - Lag features only
    """
    # Create directory structure
    os.makedirs('BayesianRidgeRegression', exist_ok=True)
    
    # 1. Stationarity and Differencing
    original_series = series.copy()
    d = 0
    if differencing:
        adf_result = adfuller(series.dropna())
        if adf_result[1] > 0.05:
            d = 1
            series = series.diff().dropna()

    # 2. Feature Engineering - Lag features
    df = pd.DataFrame({'y': series.astype(float)})
    for lag in range(1, max_lags + 1):
        df[f'lag_{lag}'] = df['y'].shift(lag).astype(float)
    df = df.dropna()

    # 3. Train-Test Split
    train_size = int(len(df) * 0.8)
    train = df.iloc[:train_size]
    test = df.iloc[train_size:]

    X_train = train.drop(columns=['y'])
    y_train = train['y']
    X_test = test.drop(columns=['y'])
    y_test = test['y']

    # 4. Model Training with Bayesian Ridge
    model = BayesianRidge(
        n_iter=n_iter,
        alpha_1=alpha_1,
        alpha_2=alpha_2,
        lambda_1=lambda_1,
        lambda_2=lambda_2,
        compute_score=compute_scores,
    )
    model.fit(X_train, y_train)

    # 5. Forecasting with Uncertainty
    pred_test, pred_test_std = model.predict(X_test, return_std=True)

    # 6. Inverse Differencing
    if d == 1:
        last_train_value = original_series.iloc[len(original_series) - len(test) - 1]
        pred_test = np.cumsum(pred_test) + last_train_value
        y_test = original_series.iloc[-len(test):]

    # 7. Create forecast DataFrame with uncertainty
    forecast_df = pd.DataFrame({
        'district': district_name,
        'date': y_test.index,
        'actual': y_test.values,
        'forecast_mean': pred_test,
        'forecast_std': pred_test_std
    })

    # 8. Save forecasts
    forecast_csv_path = os.path.join('BayesianRidgeRegression', 'bayesian_forecasts.csv')
    write_header = not os.path.exists(forecast_csv_path)
    forecast_df.to_csv(forecast_csv_path, mode='a', header=write_header, index=False)

    # 9. Calculate metrics and uncertainty statistics
    rmse = np.sqrt(mean_squared_error(forecast_df['actual'], forecast_df['forecast_mean']))
    log_marginal_likelihood = model.scores_[-1] if compute_scores else np.nan
    
    # Calculate coefficient statistics
    coef_means = model.coef_
    coef_stds = np.sqrt(np.diag(model.sigma_))
    
    metrics_df = pd.DataFrame([{
        'district': district_name,
        'rmse': rmse,
        'log_marginal_likelihood': log_marginal_likelihood,
        'coefficient_means': coef_means.tolist(),
        'coefficient_stds': coef_stds.tolist(),
        'intercept_mean': model.intercept_,
        'intercept_std': np.nan,
        'effective_params': model.lambda_ / model.alpha_,
        'differencing': d,
        'n_iter': n_iter
    }])

    metrics_csv_path = os.path.join('BayesianRidgeRegression', 'bayesian_metrics.csv')
    write_header_metrics = not os.path.exists(metrics_csv_path)
    metrics_df.to_csv(metrics_csv_path, mode='a', header=write_header_metrics, index=False)

    # 10. Plotting with uncertainty bands
    plt.figure(figsize=(12, 6))
    plt.plot(original_series.index, original_series.values, label='Original Series')
    plt.plot(forecast_df['date'], forecast_df['forecast_mean'], label='Mean Forecast', color='darkorange')
    plt.fill_between(
        forecast_df['date'],
        forecast_df['forecast_mean'] - 1.96*forecast_df['forecast_std'],
        forecast_df['forecast_mean'] + 1.96*forecast_df['forecast_std'],
        color='orange',
        alpha=0.2,
        label='95% Confidence'
    )
    plt.title(
        f'Bayesian Ridge Forecast for {district_name}\n'
        f'RMSE: {rmse:.2f} | Log ML: {log_marginal_likelihood:.2f}'
    )
    plt.legend()
    plt.savefig(os.path.join('BayesianRidgeRegression', f'bayesian_forecast_{district_name}.png'))
    plt.close()

    return {
        'district': district_name,
        'forecast_df': forecast_df,
        'metrics_df': metrics_df,
        'model': model,
        'coef_means': coef_means,
        'coef_stds': coef_stds,
        'intercept_mean': model.intercept_,
        'intercept_std': np.nan
    }

In [4]:
districts = data['district'].unique()
rmse_values = []

def run_for_each_district():
    results = {}
    
    for district in districts:
        district_data = data[data['district'] == district]
        ts = district_data["I48"].asfreq('MS')
        
        results = bayesian_ridge_regression_lags_only(
            ts,
            district,
            max_lags=4,
            n_iter=500,
            alpha_1=1e-5,
            lambda_1=1e-3
        )
        
    
        # Show results
        print("=== Metrics ===")
        print(results['metrics_df'])
        print("\n=== Forecast Data ===")
        print(results['forecast_df'].head())
    
    return results
run_for_each_district()



=== Metrics ===
     district       rmse  log_marginal_likelihood  \
0  AHMEDNAGAR  93.433481              -207.222332   

                                   coefficient_means  \
0  [0.30175998094197815, 0.0501714889804589, 0.01...   

                                    coefficient_stds  intercept_mean  \
0  [0.1302223713219305, 0.13812698552307828, 0.13...      323.131363   

   intercept_std  effective_params  differencing  n_iter  
0            NaN       172216.1789             0     500  

=== Forecast Data ===
              district       date  actual  forecast_mean  forecast_std
date                                                                  
2020-07-01  AHMEDNAGAR 2020-07-01   354.0     428.554607    117.226908
2020-08-01  AHMEDNAGAR 2020-08-01   391.0     433.840474    109.338554
2020-09-01  AHMEDNAGAR 2020-09-01   417.0     448.912628    110.537414
2020-10-01  AHMEDNAGAR 2020-10-01   343.0     453.343328    114.162341
2020-11-01  AHMEDNAGAR 2020-11-01   468.0     434.41



=== Metrics ===
  district       rmse  log_marginal_likelihood  \
0    AKOLA  67.458159              -206.961811   

                                   coefficient_means  \
0  [0.0007478335897655638, -0.0001493157008442984...   

                                    coefficient_stds  intercept_mean  \
0  [0.011971959405631854, 0.0119719396468928, 0.0...      449.138516   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      5.560266e+07             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01    AKOLA 2020-07-01   407.0     449.347535     90.041171
2020-08-01    AKOLA 2020-08-01   306.0     449.332832     90.068204
2020-09-01    AKOLA 2020-09-01   504.0     449.461275     90.042538
2020-10-01    AKOLA 2020-10-01   493.0     449.470868     89.997805
2020-11-01    AKOLA 2020-11-01   514.0     449.517959     90.099666




=== Metrics ===
   district        rmse  log_marginal_likelihood  \
0  AMRAVATI  416.923628              -236.188706   

                                   coefficient_means  \
0  [-0.36104029455981196, 0.06506183237011268, -0...   

                                    coefficient_stds  intercept_mean  \
0  [0.13758739131903314, 0.14625993128165277, 0.1...      -22.593237   

   intercept_std  effective_params  differencing  n_iter  
0            NaN     876110.238675             1     500  

=== Forecast Data ===
            district       date  actual  forecast_mean  forecast_std
date                                                                
2020-07-01  AMRAVATI 2020-07-01     470     358.608679    235.373367
2020-08-01  AMRAVATI 2020-08-01     482     329.754797    235.432229
2020-09-01  AMRAVATI 2020-09-01     586     318.749420    234.693554
2020-10-01  AMRAVATI 2020-10-01     819     248.644507    234.981607
2020-11-01  AMRAVATI 2020-11-01     609     132.074733    237.7209



=== Metrics ===
     district        rmse  log_marginal_likelihood  \
0  AURANGABAD  326.658386              -212.233222   

                                   coefficient_means  \
0  [-0.0005664012690764393, 9.264432448139203e-05...   

                                    coefficient_stds  intercept_mean  \
0  [0.01209601145588414, 0.012094775148694536, 0....      -10.146806   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      1.051622e+08             1     500  

=== Forecast Data ===
              district       date  actual  forecast_mean  forecast_std
date                                                                  
2020-07-01  AURANGABAD 2020-07-01     378     415.996724    124.378009
2020-08-01  AURANGABAD 2020-08-01     729     406.016529    124.372871
2020-09-01  AURANGABAD 2020-09-01     857     395.634332    124.431731
2020-10-01  AURANGABAD 2020-10-01     889     385.498308    124.440566
2020-11-01  AURANGABAD 2020-11-01     837     375.



=== Metrics ===
  district       rmse  log_marginal_likelihood  \
0     BEED  111.55579              -225.534745   

                                   coefficient_means  \
0  [0.04170378605918042, 0.019628136807328268, 0....   

                                    coefficient_stds  intercept_mean  \
0  [0.0657873463744857, 0.06564987558859967, 0.06...      340.691419   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      4.473186e+06             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01     BEED 2020-07-01   465.0     382.412125    165.628271
2020-08-01     BEED 2020-08-01   554.0     376.226597    164.018256
2020-09-01     BEED 2020-09-01   485.0     383.892486    163.240071
2020-10-01     BEED 2020-10-01   495.0     379.278121    164.046553
2020-11-01     BEED 2020-11-01   472.0     381.523202    163.394676




=== Metrics ===
   district       rmse  log_marginal_likelihood  \
0  BHANDARA  53.240374              -193.753492   

                                   coefficient_means  \
0  [0.5444668402985322, -0.015536007401050184, 0....   

                                    coefficient_stds  intercept_mean  \
0  [0.14754304417563166, 0.16934413414508181, 0.1...       191.72144   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      28936.587684             0     500  

=== Forecast Data ===
            district       date  actual  forecast_mean  forecast_std
date                                                                
2020-07-01  BHANDARA 2020-07-01   267.0     278.012002     72.267007
2020-08-01  BHANDARA 2020-08-01   323.0     290.144296     72.262539
2020-09-01  BHANDARA 2020-09-01   344.0     324.664286     74.897934
2020-10-01  BHANDARA 2020-10-01   397.0     329.867772     78.346371
2020-11-01  BHANDARA 2020-11-01   285.0     355.559433     83.401461



=== Metrics ===
   district       rmse  log_marginal_likelihood  \
0  BULDHANA  44.563284              -174.058217   

                                   coefficient_means  \
0  [0.27776009812236896, 0.3469986383764197, -0.3...   

                                    coefficient_stds  intercept_mean  \
0  [0.14739033323472414, 0.14563310663066478, 0.1...      195.053001   

   intercept_std  effective_params  differencing  n_iter  
0            NaN       9992.712368             0     500  

=== Forecast Data ===
            district       date  actual  forecast_mean  forecast_std
date                                                                
2020-07-01  BULDHANA 2020-07-01   186.0     214.165535     50.459199
2020-08-01  BULDHANA 2020-08-01   211.0     226.392956     49.589560
2020-09-01  BULDHANA 2020-09-01   266.0     228.739879     49.639410
2020-10-01  BULDHANA 2020-10-01   280.0     253.123090     55.095791
2020-11-01  BULDHANA 2020-11-01   328.0     270.240137     57.002992



=== Metrics ===
     district        rmse  log_marginal_likelihood  \
0  CHANDRAPUR  214.025344              -204.020756   

                                   coefficient_means  \
0  [0.000798111820509397, -0.001741837426902069, ...   

                                    coefficient_stds  intercept_mean  \
0  [0.013908328314421712, 0.013870654738165547, 0...       -7.942866   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      4.888292e+07             1     500  

=== Forecast Data ===
              district       date  actual  forecast_mean  forecast_std
date                                                                  
2020-07-01  CHANDRAPUR 2020-07-01     445     319.133813     97.607208
2020-08-01  CHANDRAPUR 2020-08-01     502     311.346751     97.619548
2020-09-01  CHANDRAPUR 2020-09-01     592     303.114413     97.621997
2020-10-01  CHANDRAPUR 2020-10-01     628     294.967350     97.624912
2020-11-01  CHANDRAPUR 2020-11-01     562     287.



=== Metrics ===
  district        rmse  log_marginal_likelihood  \
0    DHULE  119.714471              -220.389564   

                                   coefficient_means  \
0  [0.26882757089359055, 0.005233304408511226, -0...   

                                    coefficient_stds  intercept_mean  \
0  [0.12611076598109938, 0.13155713494696913, 0.1...      287.978711   

   intercept_std  effective_params  differencing  n_iter  
0            NaN     447202.813025             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01    DHULE 2020-07-01   106.0     331.861688    138.922901
2020-08-01    DHULE 2020-08-01   142.0     306.197740    132.019558
2020-09-01    DHULE 2020-09-01   210.0     315.600818    130.937078
2020-10-01    DHULE 2020-10-01   167.0     335.087615    131.783241
2020-11-01    DHULE 2020-11-01   356.0     327.363546    130.023724




=== Metrics ===
     district        rmse  log_marginal_likelihood  \
0  GADCHIROLI  104.054289              -185.938343   

                                   coefficient_means  \
0  [0.0005793804862570832, 0.00023769253773332766...   

                                    coefficient_stds  intercept_mean  \
0  [0.012891227968899443, 0.012891131065459254, 0...       -3.143803   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      1.969058e+07             1     500  

=== Forecast Data ===
              district       date  actual  forecast_mean  forecast_std
date                                                                  
2020-07-01  GADCHIROLI 2020-07-01     294     314.929929     57.383367
2020-08-01  GADCHIROLI 2020-08-01     439     311.772421     57.382111
2020-09-01  GADCHIROLI 2020-09-01     482     308.707970     57.412478
2020-10-01  GADCHIROLI 2020-10-01     431     305.572422     57.415106
2020-11-01  GADCHIROLI 2020-11-01     434     302.



=== Metrics ===
  district        rmse  log_marginal_likelihood  \
0   GONDIA  132.880879              -201.243964   

                                   coefficient_means  \
0  [-0.3205455485993083, -0.07493711151747354, 0....   

                                    coefficient_stds  intercept_mean  \
0  [0.1356731336558659, 0.1414348777578426, 0.144...       -1.734229   

   intercept_std  effective_params  differencing  n_iter  
0            NaN     140847.467449             1     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01   GONDIA 2020-07-01     335     238.639960     86.801050
2020-08-01   GONDIA 2020-08-01     304     195.938885     87.071799
2020-09-01   GONDIA 2020-09-01     282     200.779189     85.668409
2020-10-01   GONDIA 2020-10-01     335     215.901738     85.719769
2020-11-01   GONDIA 2020-11-01     425     206.847506     85.524965




=== Metrics ===
  district       rmse  log_marginal_likelihood  \
0  HINGOLI  79.588306              -188.954361   

                                   coefficient_means  \
0  [0.43395419697539717, -0.19153756736259792, 0....   

                                    coefficient_stds  intercept_mean  \
0  [0.14771789138679417, 0.15549821391719998, 0.1...      109.203323   

   intercept_std  effective_params  differencing  n_iter  
0            NaN       29491.68137             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01  HINGOLI 2020-07-01   138.0     142.913941     56.682238
2020-08-01  HINGOLI 2020-08-01    86.0     162.147596     55.940694
2020-09-01  HINGOLI 2020-09-01   165.0     129.749804     54.494327
2020-10-01  HINGOLI 2020-10-01   164.0     179.741968     57.660144
2020-11-01  HINGOLI 2020-11-01   125.0     157.374931     58.731238




=== Metrics ===
  district        rmse  log_marginal_likelihood  \
0  JALGAON  121.813514              -204.143374   

                                   coefficient_means  \
0  [0.18029810975594227, 0.2209014035096421, 0.04...   

                                    coefficient_stds  intercept_mean  \
0  [0.12974726768478778, 0.13097563524890446, 0.1...      136.507786   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      148313.70315             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01  JALGAON 2020-07-01   244.0     333.570448    103.459190
2020-08-01  JALGAON 2020-08-01   292.0     296.591699     95.562169
2020-09-01  JALGAON 2020-09-01   362.0     295.491278     94.753972
2020-10-01  JALGAON 2020-10-01   415.0     309.272206     94.910430
2020-11-01  JALGAON 2020-11-01   469.0     332.525050     98.839420




=== Metrics ===
  district       rmse  log_marginal_likelihood  \
0    JALNA  63.913608               -217.32993   

                                   coefficient_means  \
0  [0.08003586064731931, 0.03252207070181774, 0.0...   

                                    coefficient_stds  intercept_mean  \
0  [0.0948751554477688, 0.09471040674163586, 0.09...      197.435583   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      1.053510e+06             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01    JALNA 2020-07-01   260.0     243.713119    122.246286
2020-08-01    JALNA 2020-08-01   218.0     251.641304    123.235622
2020-09-01    JALNA 2020-09-01   164.0     248.656389    123.439975
2020-10-01    JALNA 2020-10-01   226.0     246.216704    122.867745
2020-11-01    JALNA 2020-11-01   211.0     253.323434    123.644080




=== Metrics ===
   district       rmse  log_marginal_likelihood  \
0  KOLHAPUR  88.206163              -192.406268   

                                   coefficient_means  \
0  [0.0007705882582171547, 0.000681624385132218, ...   

                                    coefficient_stds  intercept_mean  \
0  [0.012111949463133452, 0.012111107559612696, 0...        377.4242   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      2.364312e+07             0     500  

=== Forecast Data ===
            district       date  actual  forecast_mean  forecast_std
date                                                                
2020-07-01  KOLHAPUR 2020-07-01   383.0     378.212145     59.769366
2020-08-01  KOLHAPUR 2020-08-01   292.0     378.125801     59.667866
2020-09-01  KOLHAPUR 2020-09-01   390.0     378.108518     59.655287
2020-10-01  KOLHAPUR 2020-10-01   332.0     378.113795     59.674912
2020-11-01  KOLHAPUR 2020-11-01   381.0     378.154041     59.654682



=== Metrics ===
  district        rmse  log_marginal_likelihood  \
0    LATUR  137.225104              -200.872858   

                                   coefficient_means  \
0  [0.02728236376464464, 0.006085262362534311, 0....   

                                    coefficient_stds  intercept_mean  \
0  [0.05109289746667972, 0.05102864762717674, 0.0...      277.289096   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      1.949954e+06             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01    LATUR 2020-07-01   345.0     294.272545     81.098867
2020-08-01    LATUR 2020-08-01   387.0     294.311101     82.140911
2020-09-01    LATUR 2020-09-01   426.0     295.262290     82.953799
2020-10-01    LATUR 2020-10-01   448.0     296.106632     83.686993
2020-11-01    LATUR 2020-11-01   462.0     297.752059     84.596670




=== Metrics ===
  district       rmse  log_marginal_likelihood  \
0   MUMBAI  206.97427              -235.096562   

                                   coefficient_means  \
0  [-0.001402150853450911, 0.0038623115504848402,...   

                                    coefficient_stds  intercept_mean  \
0  [0.025285670039743833, 0.02528321421919596, 0....      -36.454628   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      9.058555e+07             1     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01   MUMBAI 2020-07-01     513     625.571856    244.516833
2020-08-01   MUMBAI 2020-08-01     579     594.481656    244.545547
2020-09-01   MUMBAI 2020-09-01     683     557.388209    243.428009
2020-10-01   MUMBAI 2020-10-01     763     520.367172    243.442359
2020-11-01   MUMBAI 2020-11-01     583     485.037847    243.436939




=== Metrics ===
  district        rmse  log_marginal_likelihood  \
0   NAGPUR  247.198871              -239.054446   

                                   coefficient_means  \
0  [0.3720308716474792, 0.3024118485075481, -0.04...   

                                    coefficient_stds  intercept_mean  \
0  [0.13883143168420867, 0.14912606777054446, 0.1...       609.48578   

   intercept_std  effective_params  differencing  n_iter  
0            NaN     354796.895476             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01   NAGPUR 2020-07-01   617.0     778.045480    222.649479
2020-08-01   NAGPUR 2020-08-01   638.0     807.707191    223.590810
2020-09-01   NAGPUR 2020-09-01   488.0     805.649827    225.614673
2020-10-01   NAGPUR 2020-10-01   603.0     724.738269    224.461770
2020-11-01   NAGPUR 2020-11-01   560.0     718.494978    226.921912




=== Metrics ===
  district       rmse  log_marginal_likelihood  \
0   NANDED  67.095649               -201.99002   

                                   coefficient_means  \
0  [0.4181720884992838, 0.036604037704522946, -0....   

                                    coefficient_stds  intercept_mean  \
0  [0.14192238274458033, 0.142600699666901, 0.143...      320.105417   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      68929.396246             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01   NANDED 2020-07-01   261.0     376.164840     88.735894
2020-08-01   NANDED 2020-08-01   400.0     365.801742     86.490670
2020-09-01   NANDED 2020-09-01   435.0     415.451653     96.683674
2020-10-01   NANDED 2020-10-01   391.0     438.974256     99.908457
2020-11-01   NANDED 2020-11-01   412.0     409.138958     98.706246




=== Metrics ===
    district       rmse  log_marginal_likelihood  \
0  NANDURBAR  62.146825               -205.92451   

                                   coefficient_means  \
0  [0.5977362965150325, 0.07772107053730919, 0.05...   

                                    coefficient_stds  intercept_mean  \
0  [0.14718736263253712, 0.17117423821382796, 0.1...      254.001074   

   intercept_std  effective_params  differencing  n_iter  
0            NaN       43440.54641             0     500  

=== Forecast Data ===
             district       date  actual  forecast_mean  forecast_std
date                                                                 
2020-07-01  NANDURBAR 2020-07-01   480.0     407.134665    107.376832
2020-08-01  NANDURBAR 2020-08-01   533.0     493.115459    100.526956
2020-09-01  NANDURBAR 2020-09-01   497.0     534.401594    104.382298
2020-10-01  NANDURBAR 2020-10-01   568.0     490.409517    106.992158
2020-11-01  NANDURBAR 2020-11-01   511.0     519.340610    1



=== Metrics ===
    district       rmse  log_marginal_likelihood  \
0  OSMANABAD  37.883607              -165.920969   

                                   coefficient_means  \
0  [0.15712578654759632, 0.011514435337397648, -0...   

                                    coefficient_stds  intercept_mean  \
0  [0.10321253229120052, 0.10437224015352854, 0.1...      155.603878   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      40318.063062             0     500  

=== Forecast Data ===
             district       date  actual  forecast_mean  forecast_std
date                                                                 
2020-07-01  OSMANABAD 2020-07-01   177.0     172.803549     43.565267
2020-08-01  OSMANABAD 2020-08-01   234.0     169.518476     43.999998
2020-09-01  OSMANABAD 2020-09-01   190.0     176.839221     47.246738
2020-10-01  OSMANABAD 2020-10-01   248.0     172.725715     45.601187
2020-11-01  OSMANABAD 2020-11-01   189.0     179.670864     



=== Metrics ===
   district       rmse  log_marginal_likelihood  \
0  PARBHANI  61.197678               -187.68366   

                                   coefficient_means  \
0  [0.6671312139072128, -0.15192936365647963, 0.1...   

                                    coefficient_stds  intercept_mean  \
0  [0.15344038597349335, 0.18042112889707249, 0.1...        93.84186   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      12753.874902             0     500  

=== Forecast Data ===
            district       date  actual  forecast_mean  forecast_std
date                                                                
2020-07-01  PARBHANI 2020-07-01   257.0     158.152739     51.592547
2020-08-01  PARBHANI 2020-08-01   232.0     244.756507     56.689983
2020-09-01  PARBHANI 2020-09-01   291.0     208.900332     56.629059
2020-10-01  PARBHANI 2020-10-01   315.0     276.936216     61.425972
2020-11-01  PARBHANI 2020-11-01   205.0     256.397960     63.023095



=== Metrics ===
  district       rmse  log_marginal_likelihood  \
0   RAIGAD  72.505069              -198.444827   

                                   coefficient_means  \
0  [0.27555428449699754, 0.06456780636047366, -0....   

                                    coefficient_stds  intercept_mean  \
0  [0.13059404105519234, 0.1365745590682498, 0.13...      310.357356   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      92473.405521             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01   RAIGAD 2020-07-01   420.0     391.191796    106.319659
2020-08-01   RAIGAD 2020-08-01   459.0     361.871316    110.414803
2020-09-01   RAIGAD 2020-09-01   475.0     382.247343    117.504478
2020-10-01   RAIGAD 2020-10-01   491.0     390.146815    116.336257
2020-11-01   RAIGAD 2020-11-01   465.0     388.102631    118.704282
=== Metr



=== Metrics ===
  district       rmse  log_marginal_likelihood  \
0   SANGLI  82.663668              -208.841111   

                                   coefficient_means  \
0  [0.18148204597770595, 0.19837384282674492, 0.1...   

                                    coefficient_stds  intercept_mean  \
0  [0.12339480113155565, 0.12293108711165629, 0.1...      233.141201   

   intercept_std  effective_params  differencing  n_iter  
0            NaN     225003.725102             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01   SANGLI 2020-07-01   547.0     421.635565    107.946567
2020-08-01   SANGLI 2020-08-01   406.0     461.239385    111.297167
2020-09-01   SANGLI 2020-09-01   461.0     465.326408    110.666672
2020-10-01   SANGLI 2020-10-01   373.0     454.796262    112.317813
2020-11-01   SANGLI 2020-11-01   410.0     435.845868    110.099691
=== Metr



=== Metrics ===
     district       rmse  log_marginal_likelihood  \
0  SINDHUDURG  22.228006              -149.644218   

                                   coefficient_means  \
0  [0.000973693056116794, 0.001279786563893338, -...   

                                    coefficient_stds  intercept_mean  \
0  [0.013934011281025296, 0.013933207809995534, 0...      118.839205   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      1.547914e+06             0     500  

=== Forecast Data ===
              district       date  actual  forecast_mean  forecast_std
date                                                                  
2020-07-01  SINDHUDURG 2020-07-01   121.0     119.127390     17.731620
2020-08-01  SINDHUDURG 2020-08-01    97.0     119.110256     17.702941
2020-09-01  SINDHUDURG 2020-09-01   141.0     119.094836     17.680758
2020-10-01  SINDHUDURG 2020-10-01   144.0     119.102409     17.714723
2020-11-01  SINDHUDURG 2020-11-01   117.0     119.18



=== Metrics ===
  district        rmse  log_marginal_likelihood  \
0    THANE  262.598543              -225.106069   

                                   coefficient_means  \
0  [-0.0005617450201543172, -0.000539312814121165...   

                                    coefficient_stds  intercept_mean  \
0  [0.011420716993542959, 0.011420530943540367, 0...      -18.262687   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      2.516196e+08             1     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01    THANE 2020-07-01     708     730.760398    181.607341
2020-08-01    THANE 2020-08-01     893     712.615278    181.607479
2020-09-01    THANE 2020-09-01     916     694.176718    181.614203
2020-10-01    THANE 2020-10-01    1156     675.782204    181.614391
2020-11-01    THANE 2020-11-01     848     657.476755    181.625002
=== Me



=== Metrics ===
  district      rmse  log_marginal_likelihood  \
0   WASHIM  21.85879              -181.573953   

                                   coefficient_means  \
0  [0.43539352167465833, 0.02975752096149019, 0.0...   

                                    coefficient_stds  intercept_mean  \
0  [0.13962857765432507, 0.13045321800971696, 0.1...       70.978609   

   intercept_std  effective_params  differencing  n_iter  
0            NaN      22513.085399             0     500  

=== Forecast Data ===
           district       date  actual  forecast_mean  forecast_std
date                                                               
2020-07-01   WASHIM 2020-07-01   154.0     117.905344     44.072922
2020-08-01   WASHIM 2020-08-01   127.0     140.659274     45.718824
2020-09-01   WASHIM 2020-09-01   120.0     125.696619     45.844661
2020-10-01   WASHIM 2020-10-01   113.0     128.253074     45.642519
2020-11-01   WASHIM 2020-11-01   154.0     116.796895     45.870437
=== Metric



{'district': 'MUMBAI SUBURBAN',
 'forecast_df':                    district       date  actual  forecast_mean  forecast_std
 date                                                                       
 2021-02-01  MUMBAI SUBURBAN 2021-02-01   607.0     692.322686    558.723456
 2021-03-01  MUMBAI SUBURBAN 2021-03-01   618.0     776.574762    498.382201,
 'metrics_df':           district        rmse  log_marginal_likelihood  \
 0  MUMBAI SUBURBAN  127.330114               -38.369483   
 
                                    coefficient_means  \
 0  [0.04145388365613513, -0.10246771260009085, -0...   
 
                                     coefficient_stds  intercept_mean  \
 0  [0.26082303717895683, 0.2590727207333168, 0.26...     1417.294754   
 
    intercept_std  effective_params  differencing  n_iter  
 0            NaN      91092.815641             0     500  ,
 'model': BayesianRidge(alpha_1=1e-05, compute_score=True, lambda_1=0.001, n_iter=500),
 'coef_means': array([ 0.04145388, 