In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [2]:
data = pd.read_csv("../data/HMIS_DATA_CORRECTED_17_21/mh_dist17_21_with_IDs_date_correction.csv")
data = data[(data['indicator_type'] == 'Total [(A+B) or (C+D)]')]
data['date'] = pd.to_datetime(data['date'])
data = data.set_index('date')
data.index = pd.DatetimeIndex(data.index)

In [4]:
def ann_regression_lags_only(
    series,
    district_name,
    max_lags=12,
    differencing=True,
    epochs=100,
    batch_size=32,
    hidden_layers=[32, 16],
    activation='relu',
    verbose=0
):
    """
    ANN regression for time series forecasting with lag features.
    Returns (None, None) on error to maintain pipeline flow.
    """
    try:
        # 0. Initial checks
        if len(series) < max_lags + 24:
            raise ValueError(f"Series too short ({len(series)} points) for {max_lags} lags")

        # 1. Directory setup
        os.makedirs('ANN_Forecasts', exist_ok=True)
        
        # 2. Stationarity handling
        original_series = series.copy()
        d = 0
        if differencing:
            adf_result = adfuller(series.dropna())
            if adf_result[1] > 0.05:
                d = 1
                series = series.diff().dropna()
                if len(series) < 10:
                    raise ValueError("Insufficient data after differencing")

        # 3. Create lag features
        df = pd.DataFrame({'y': series})
        for lag in range(1, max_lags + 1):
            df[f'lag_{lag}'] = df['y'].shift(lag)
        df = df.dropna()

        # 4. Temporal split with index preservation
        train_size = int(len(df) * 0.8)
        if train_size < 10 or (len(df) - train_size) < 2:
            raise ValueError("Insufficient train/test split")
            
        train = df.iloc[:train_size]
        test = df.iloc[train_size:]
        test_series = test['y']  # Preserve for index

        # 5. Prepare numpy arrays
        X_train = train.drop(columns=['y']).values
        y_train = train['y'].values.reshape(-1, 1)
        X_test = test.drop(columns=['y']).values
        y_test_np = test['y'].values.reshape(-1, 1)

        # 6. Scaling
        scaler_X = MinMaxScaler()
        scaler_y = MinMaxScaler()

        X_train_scaled = scaler_X.fit_transform(X_train)
        X_test_scaled = scaler_X.transform(X_test)
        y_train_scaled = scaler_y.fit_transform(y_train)
        y_test_scaled = scaler_y.transform(y_test_np)

        # 7. Build ANN model
        model = Sequential()
        model.add(Dense(hidden_layers[0], activation=activation, input_dim=max_lags))
        for units in hidden_layers[1:]:
            model.add(Dense(units, activation=activation))
        model.add(Dense(1))  # Output layer
        model.compile(optimizer='adam', loss='mse')

        # 8. Train with validation
        history = model.fit(
            X_train_scaled, 
            y_train_scaled,
            epochs=epochs,
            batch_size=batch_size,
            verbose=verbose,
            validation_split=0.2,
            shuffle=False
        )

        # 9. Predict and inverse scaling
        y_pred_scaled = model.predict(X_test_scaled, verbose=verbose)
        y_pred = scaler_y.inverse_transform(y_pred_scaled).flatten()

        # 10. Inverse differencing
        if d == 1:
            last_train_value = original_series.iloc[train.index[0] - 1]
            y_pred = np.cumsum(y_pred) + last_train_value
            y_test_values = original_series.iloc[test.index].values
        else:
            y_test_values = test_series.values

        # 11. Create output (preserve original indices)
        forecast_df = pd.DataFrame({
            'district': district_name,
            'date': test_series.index,
            'actual': y_test_values,
            'forecast': y_pred
        })

        # 12. Calculate metrics
        rmse = np.sqrt(mean_squared_error(forecast_df['actual'], forecast_df['forecast']))

        # 13. Save results
        forecast_df.to_csv(
            os.path.join('ANN_Forecasts', f'{district_name}_forecast.csv'), 
            index=False
        )

        # 14. Plotting
        plt.figure(figsize=(12, 6))
        plt.plot(original_series.index, original_series, label='Original')
        plt.plot(forecast_df['date'], forecast_df['forecast'], label='ANN Forecast')
        plt.title(f'{district_name} ANN Forecast (RMSE: {rmse:.2f})')
        plt.legend()
        plt.savefig(os.path.join('ANN_Forecasts', f'{district_name}_forecast_plot.png'))
        plt.close()

        return {
            'district': district_name,
            'rmse': rmse,
            'epochs': epochs,
            'batch_size': batch_size,
            'training_loss': history.history['loss'],
            'validation_loss': history.history['val_loss']
        }, forecast_df

    except Exception as e:
        print(f"⚠️ Skipping {district_name}: {str(e)}")
        return None, None

In [5]:
districts = data['district'].unique()
rmse_values = []

# Usage Example


def run_for_each_district():
    results = {}
    
    for district in districts:
        district_data = data[data['district'] == district]
        ts = district_data["I1"].asfreq('MS')
        
        results, forecast = ann_regression_lags_only(
            series=ts,
            district_name=district,
            max_lags=6,
            epochs=200,
            batch_size=4,
            hidden_layers=[32, 16],
            activation='relu',
            verbose=1
        )

        
    
        # Show results
        print("=== Metrics ===")
        print(results)
        print("\n=== Forecast Data ===")
        print(forecast)
    
    return results
run_for_each_district()

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78