In [None]:
import pandas as pd
import numpy as np
from random import choice

# Load the observed values from the CSV file
observed_data = pd.read_csv('synthetic_waste_data.csv')

# Normalize the Date column
observed_data['date'] = pd.to_datetime(observed_data['date'], dayfirst=True)

# Define the RMSE values for each bin and algorithm
rmse_values = {
    'E-LSTM': [0.157892858, 0.023322254, 0.009187489, 0.063529937, 0.160066341, 0.156176723],
    'E-ARIMA': [2.289473778, 2.412844341, 2.549617714, 2.163232432, 2.29378405, 2.789758176],
    'XGBoost': [1.07792527, 1.101901153, 1.264287062, 1.058385785, 0.975875168, 1.071054532],
    'E-SARIMAX': [1.073566836, 1.24688629, 1.387341267, 1.307217018, 1.163926958, 1.262880781]
}

def calculate_predicted_values(data, date, rmse, algorithm='E-LSTM'):
    """
    Calculate predicted values for a given date using the provided RMSE values.
    """
    rmse_bins = rmse[algorithm]
    results = []
    
    # Filter the data for the specified date
    filtered_data = data[data['date'] == date]
    
    for index, row in filtered_data.iterrows():
        bin_number = int(row['Bin']) - 1  # Subtract 1 to match 0-based RMSE index
        observed = row['Observed']
        rmse_value = rmse_bins[bin_number]
        
        # Calculate the bounds
        lower_bound = max(1, int(observed - rmse_value))
        upper_bound = min(10, int(observed + rmse_value))
        
        # Randomly choose either the upper or lower bound
        predicted = choice([lower_bound, upper_bound])
        results.append({
            'Time': row['Time'],
            'Bin': row['Bin'],
            'Observed': observed,
            'Predicted': predicted
        })
    
    return pd.DataFrame(results)

# Example usage
# Replace '14-09-2024' with the desired date
date_to_search = pd.to_datetime('14-09-2024', dayfirst=True)
predicted_results = calculate_predicted_values(observed_data, date_to_search, rmse_values, algorithm='E-LSTM')

# Save or display the results
predicted_results.to_csv('predicted_results.csv', index=False)
print(predicted_results)


ValueError: unconverted data remains when parsing with format "%Y-%d-%m": "3", at position 12. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.