In [5]:
pip install statsmodels

Collecting statsmodels
  Using cached statsmodels-0.14.6-cp314-cp314-win_amd64.whl.metadata (9.8 kB)
Collecting patsy>=0.5.6 (from statsmodels)
  Using cached patsy-1.0.2-py2.py3-none-any.whl.metadata (3.6 kB)
Using cached statsmodels-0.14.6-cp314-cp314-win_amd64.whl (9.6 MB)
Using cached patsy-1.0.2-py2.py3-none-any.whl (233 kB)
Installing collected packages: patsy, statsmodels

   ---------------------------------------- 0/2 [patsy]
   ---------------------------------------- 0/2 [patsy]
   ---------------------------------------- 0/2 [patsy]
   ---------------------------------------- 0/2 [patsy]
   -------------------- ------------------- 1/2 [statsmodels]
   -------------------- ------------------- 1/2 [statsmodels]
   -------------------- ------------------- 1/2 [statsmodels]
   -------------------- ------------------- 1/2 [statsmodels]
   -------------------- ------------------- 1/2 [statsmodels]
   -------------------- ------------------- 1/2 [statsmodels]
   ------------------

In [7]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings("ignore")


class WasteOverflowForecast:

    def __init__(self, file_path):
        self.file_path = file_path
        self.data = None
        self.zone_models = {}

    # -----------------------------
    # Step 1: Load and preprocess data
    # -----------------------------
    def load_and_prepare_data(self):
        df = pd.read_csv(self.file_path)

        # Convert date
        df['date'] = pd.to_datetime(df['date'])

        # Aggregate to zone-level
        zone_daily = (
            df.groupby(['zone', 'date'])
              .agg(
                  avg_fill=('fill_percentage', 'mean'),
                  max_fill=('fill_percentage', 'max'),
                  bins_over_80=('fill_percentage', lambda x: (x > 80).sum())
              )
              .reset_index()
        )

        zone_daily = zone_daily.sort_values(['zone', 'date'])
        self.data = zone_daily
        return zone_daily


    # -----------------------------
    # Step 2: Train ARIMA per zone
    # -----------------------------
    def train_models(self):
        zones = self.data['zone'].unique()

        for zone in zones:
            zone_df = self.data[self.data['zone'] == zone]

            series = zone_df.set_index('date')['avg_fill']

            # Simple safe ARIMA configuration
            model = ARIMA(series, order=(2, 1, 2))
            model_fit = model.fit()

            self.zone_models[zone] = model_fit

        print("Models trained for zones:", list(self.zone_models.keys()))


    # -----------------------------
    # Step 3: Forecast
    # -----------------------------
    def forecast_zone(self, zone, days=3):

        if zone not in self.zone_models:
            raise ValueError("Zone model not found. Train models first.")

        model_fit = self.zone_models[zone]

        forecast = model_fit.get_forecast(steps=days)

        predicted_mean = forecast.predicted_mean
        conf_int = forecast.conf_int()

        # Extract uncertainty bands
        lower_bound = conf_int.iloc[:, 0]
        upper_bound = conf_int.iloc[:, 1]

        # Overflow probability calculation
        overflow_prob = (predicted_mean > 100).sum() / days

        result = {
            "zone": zone,
            "predicted_fill": predicted_mean.values.tolist(),
            "lower_bound": lower_bound.values.tolist(),
            "upper_bound": upper_bound.values.tolist(),
            "overflow_probability": float(overflow_prob)
        }

        return result


    # -----------------------------
    # Step 4: Forecast all zones
    # -----------------------------
    def forecast_all_zones(self, days=3):
        results = []

        for zone in self.zone_models.keys():
            res = self.forecast_zone(zone, days)
            results.append(res)

        return results


# -----------------------------
# Example usage
# -----------------------------
if __name__ == "__main__":
    path="data\\waste_fill.csv"

    model = WasteOverflowForecast(path)

    print("Loading data...")
    model.load_and_prepare_data()

    print("Training models...")
    model.train_models()

    print("Forecasting...")
    forecasts = model.forecast_all_zones(days=3)

    for f in forecasts:
        print("\nZone:", f["zone"])
        print("Predicted Fill:", f["predicted_fill"])
        print("Overflow Probability:", f["overflow_probability"])


Loading data...
Training models...
Models trained for zones: ['Central Zone', 'East Zone', 'North Zone', 'North-West Zone', 'South Zone', 'South-East Zone', 'South-West Zone', 'West Zone']
Forecasting...

Zone: Central Zone
Predicted Fill: [62.79752959413501, 60.68550573477245, 59.949469130099146]
Overflow Probability: 0.0

Zone: East Zone
Predicted Fill: [27.730929669192843, 45.93366241432119, 33.79902225406123]
Overflow Probability: 0.0

Zone: North Zone
Predicted Fill: [26.869500016002235, 24.13153779804453, 25.405436016514813]
Overflow Probability: 0.0

Zone: North-West Zone
Predicted Fill: [6.867487874802766, 9.9322540749434, -0.30824762229672586]
Overflow Probability: 0.0

Zone: South Zone
Predicted Fill: [61.194956744489716, 98.30248821793005, 99.25548671658441]
Overflow Probability: 0.0

Zone: South-East Zone
Predicted Fill: [48.11540082797474, 62.20006498456127, 56.85046542427148]
Overflow Probability: 0.0

Zone: South-West Zone
Predicted Fill: [24.683566975165114, 31.07162081