In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import mean_squared_error

In [2]:
data = pd.read_csv("../data/HMIS_DATA_CORRECTED_17_21/mh_dist17_21_with_IDs_date_correction.csv")
data = data[(data['indicator_type'] == 'Total [(A+B) or (C+D)]')]
data['date'] = pd.to_datetime(data['date'])
data = data.set_index('date')
data.index = pd.DatetimeIndex(data.index)

In [3]:
def exponential_smoothing_forecast_district(series, district_name, seasonal=False, seasonal_periods=12):
    """
    Complete Exponential Smoothing forecasting with data storage and plot saving
    """
    # Create directory structure
    os.makedirs('ExponentialSmoothing', exist_ok=True)
    
    # 1. Train-Test Split (80:20)
    train_size = int(len(series) * 0.8)
    train, test = series.iloc[:train_size], series.iloc[train_size:]

    # 2. Model Fitting
    if seasonal:
        model = ExponentialSmoothing(train, seasonal='add', seasonal_periods=seasonal_periods)
    else:
        model = ExponentialSmoothing(train)
    model_fit = model.fit()
    
    # 3. Forecasting
    forecast = model_fit.forecast(steps=len(test))

    # 4. Create forecast DataFrame
    forecast_df = pd.DataFrame({
        'district': district_name,
        'date': test.index,
        'actual': test.values,
        'forecast': forecast.values
    })

    # 5. Save forecasts to CSV (append mode)
    forecast_csv_path = os.path.join('ExponentialSmoothing', 'exp_smoothing_forecasts.csv')
    write_header = not os.path.exists(forecast_csv_path)
    forecast_df.to_csv(forecast_csv_path, 
                      mode='a', 
                      header=write_header, 
                      index=False)

    # 6. Calculate and save metrics
    rmse = np.sqrt(mean_squared_error(test, forecast))
    metrics_df = pd.DataFrame([{
        'district': district_name,
        'rmse': rmse,
        'seasonal': seasonal
    }])

    metrics_csv_path = os.path.join('ExponentialSmoothing', 'exp_smoothing_results.csv')
    write_header_metrics = not os.path.exists(metrics_csv_path)
    metrics_df.to_csv(metrics_csv_path, 
                     mode='a', 
                     header=write_header_metrics, 
                     index=False)

    # 7. Save plot
    plt.figure(figsize=(12, 6))
    plt.plot(train, label='Training Data')
    plt.plot(test, label='Actual Values', color='navy')
    plt.plot(forecast, label='Forecast', color='darkorange')
    plt.title(f'Exponential Smoothing Forecast for {district_name}\nRMSE: {rmse:.2f}')
    plt.legend()
    plt.savefig(os.path.join('ExponentialSmoothing', f'ExpSmoothing_forecast_{district_name}.png'))
    plt.close()

    return {
        'district': district_name,
        'forecast_df': forecast_df,
        'metrics_df': metrics_df
    }

In [4]:
districts = data['district'].unique()
rmse_values = []

def run_for_each_district():
    results = {}
    
    for district in districts:
        district_data = data[data['district'] == district]
        ts = district_data["I55"].asfreq('MS')
        
        results = exponential_smoothing_forecast_district(ts, district)
    
        # Show results
        print("=== Metrics ===")
        print(results['metrics_df'])
        print("\n=== Forecast Data ===")
        print(results['forecast_df'].head())
    
    return results
run_for_each_district()

=== Metrics ===
     district         rmse  seasonal
0  AHMEDNAGAR  1090.853655     False

=== Forecast Data ===
     district       date  actual   forecast
0  AHMEDNAGAR 2020-06-01    5761  5670.5829
1  AHMEDNAGAR 2020-07-01    6225  5670.5829
2  AHMEDNAGAR 2020-08-01    6609  5670.5829
3  AHMEDNAGAR 2020-09-01    7268  5670.5829
4  AHMEDNAGAR 2020-10-01    6799  5670.5829
=== Metrics ===
  district        rmse  seasonal
0    AKOLA  365.511742     False

=== Forecast Data ===
  district       date  actual     forecast
0    AKOLA 2020-06-01    1889  2485.768404
1    AKOLA 2020-07-01    1999  2485.768404
2    AKOLA 2020-08-01    1927  2485.768404
3    AKOLA 2020-09-01    2604  2485.768404
4    AKOLA 2020-10-01    2342  2485.768404
=== Metrics ===
   district        rmse  seasonal
0  AMRAVATI  693.037587     False

=== Forecast Data ===
   district       date  actual    forecast
0  AMRAVATI 2020-06-01    3183  2474.91026
1  AMRAVATI 2020-07-01    2772  2474.91026
2  AMRAVATI 2020-08-01  

{'district': 'MUMBAI SUBURBAN',
 'forecast_df':           district       date  actual     forecast
 0  MUMBAI SUBURBAN 2021-01-01    5269  5798.064783
 1  MUMBAI SUBURBAN 2021-02-01    3958  5798.064783
 2  MUMBAI SUBURBAN 2021-03-01    4827  5798.064783,
 'metrics_df':           district         rmse  seasonal
 0  MUMBAI SUBURBAN  1239.450787     False}