In [6]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Preprocessing function
def preprocess_data(data, entry_time="14:00:00", entries_per_day=1):
    data['date_time'] = pd.to_datetime(data['date'] + ' ' + data['time'])
    if entries_per_day == 1:
        data = data[data['date_time'].dt.time == pd.to_datetime(entry_time).time()]
    else:
        data = data.groupby(['date', 'dustbin_id']).apply(lambda x: x.head(entries_per_day)).reset_index(drop=True)
    return data

# EARIMA function
def apply_earima(train, test, order=(5, 1, 0)):
    model = ARIMA(train, order=order)
    model_fit = model.fit()
    predictions = model_fit.forecast(len(test))
    return mean_squared_error(test, predictions, squared=False)

# ESARIMAX function
def apply_esarimax(train, test, seasonal_order=(1, 0, 0, 7)):
    model = SARIMAX(train, order=(1, 1, 1), seasonal_order=seasonal_order)
    model_fit = model.fit(disp=False)
    predictions = model_fit.forecast(len(test))
    return mean_squared_error(test, predictions, squared=False)

# Exponential Smoothing function with fallback logic
def apply_exponential_smoothing(train, test, seasonal='add', seasonal_periods=7):
    try:
        if len(train) < seasonal_periods * 2:
            raise ValueError("Insufficient data for the specified seasonal_periods.")
        model = ExponentialSmoothing(train, seasonal=seasonal, seasonal_periods=seasonal_periods)
        model_fit = model.fit()
        predictions = model_fit.forecast(len(test))
    except ValueError as e:
        print(f"Warning: {e}. Using a simple average model instead.")
        predictions = [np.mean(train)] * len(test)
    return mean_squared_error(test, predictions, squared=False)

# Main function for experiments
def run_experiments(data, test_windows, algorithms=['EARIMA', 'ESARIMAX', 'ELSTM']):
    results = {algo: [] for algo in algorithms}
    bin_ids = sorted(data['dustbin_id'].unique())
    for algo in algorithms:
        algo_results = []
        for bin_id in bin_ids:
            bin_data = data[data['dustbin_id'] == bin_id]
            bin_rmse = []
            for days in test_windows:
                train_data = bin_data.iloc[:-days]['filled_capacity']
                test_data = bin_data.iloc[-days:]['filled_capacity']
                if algo == 'EARIMA':
                    rmse = apply_earima(train_data.values, test_data.values)
                elif algo == 'ESARIMAX':
                    rmse = apply_esarimax(train_data.values, test_data.values)
                elif algo == 'ELSTM':
                    rmse = apply_exponential_smoothing(train_data.values, test_data.values)
                bin_rmse.append(rmse)
            algo_results.append(np.mean(bin_rmse))  # Average RMSE for the bin
        results[algo] = algo_results
    return results, bin_ids

# Load your dataset
data = pd.read_csv("synthetic_waste_data.csv")
data['date_time'] = pd.to_datetime(data['date'] + ' ' + data['time'])

# Preprocess the data for single entry per day
data_single_entry = preprocess_data(data.copy(), entry_time="14:00:00", entries_per_day=1)

# Run experiments
algorithms = ['ELSTM', 'EARIMA', 'ESARIMAX']  # Add XGBoost if needed
test_windows = [1, 7, 15]
results, bins = run_experiments(data_single_entry, test_windows, algorithms)

# Format results into a DataFrame
results_df = pd.DataFrame(results, index=bins).T
results_df.columns = [f'Bin {bin_id}' for bin_id in bins]
results_df.index.name = "ALGORITHM"

# Display the results
print(results_df)




              Bin 1     Bin 2     Bin 3     Bin 4     Bin 5     Bin 6
ALGORITHM                                                            
ELSTM      1.137191  1.355091  0.891074  0.804110  1.117328  0.868004
EARIMA     1.219376  1.451405  1.058904  0.897392  0.857515  0.870458
ESARIMAX   1.093219  1.253669  0.861881  0.800908  1.134326  0.797343


