In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, LSTM, Dense
from math import sqrt

In [2]:
# Load the data
df = pd.read_csv('water_consumption_data.csv')
df['timestamp'] = pd.to_datetime(df['timestamp'])

FileNotFoundError: [Errno 2] No such file or directory: 'water_consumption_data.csv'

In [3]:
def prepare_data(data, floor, unit):
    unit_data = data[(data['floor'] == floor) & (data['unit'] == unit)].set_index('timestamp')
    unit_data = unit_data['water_usage'].resample('D').sum()
    return unit_data

def train_test_split(data, test_size=0.2):
    train_size = int(len(data) * (1 - test_size))
    train, test = data[:train_size], data[train_size:]
    return train, test

In [4]:
def ets_forecast(train, test):
    model = ExponentialSmoothing(train, seasonal_periods=7, trend='add', seasonal='add')
    fit = model.fit()
    forecast = fit.forecast(len(test))
    rmse = sqrt(mean_squared_error(test, forecast))
    return forecast, rmse



In [5]:
def arima_forecast(train, test):
    model = ARIMA(train, order=(1, 1, 1))
    fit = model.fit()
    forecast = fit.forecast(len(test))
    rmse = sqrt(mean_squared_error(test, forecast))
    return forecast, rmse



In [6]:
def sarima_forecast(train, test):
    model = SARIMAX(train, order=(1, 1, 1), seasonal_order=(1, 1, 1, 7))
    results = model.fit()
    forecast = results.get_forecast(steps=len(test))
    forecast_mean = forecast.predicted_mean
    rmse = sqrt(mean_squared_error(test, forecast_mean))
    return forecast_mean, rmse

In [7]:
def create_dataset(dataset, look_back=1):
    X, Y = [], []
    for i in range(len(dataset) - look_back):
        X.append(dataset[i:(i + look_back), 0])
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

In [8]:
def rnn_forecast(train, test, look_back=7):
    scaler = MinMaxScaler(feature_range=(0, 1))
    train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
    test_scaled = scaler.transform(test.values.reshape(-1, 1))
    
    X_train, Y_train = create_dataset(train_scaled, look_back)
    X_test, Y_test = create_dataset(test_scaled, look_back)
    
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    
    model = Sequential()
    model.add(SimpleRNN(50, input_shape=(look_back, 1)))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(X_train, Y_train, epochs=100, batch_size=32, verbose=0)
    
    test_predict = model.predict(X_test)
    test_predict = scaler.inverse_transform(test_predict)
    Y_test = scaler.inverse_transform([Y_test])
    
    rmse = sqrt(mean_squared_error(Y_test[0], test_predict[:, 0]))
    return test_predict[:, 0], rmse

In [9]:
def lstm_forecast(train, test, look_back=7):
    scaler = MinMaxScaler(feature_range=(0, 1))
    train_scaled = scaler.fit_transform(train.values.reshape(-1, 1))
    test_scaled = scaler.transform(test.values.reshape(-1, 1))
    
    X_train, Y_train = create_dataset(train_scaled, look_back)
    X_test, Y_test = create_dataset(test_scaled, look_back)
    
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    
    model = Sequential()
    model.add(LSTM(50, input_shape=(look_back, 1)))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    model.fit(X_train, Y_train, epochs=100, batch_size=32, verbose=0)
    
    test_predict = model.predict(X_test)
    test_predict = scaler.inverse_transform(test_predict)
    Y_test = scaler.inverse_transform([Y_test])
    
    rmse = sqrt(mean_squared_error(Y_test[0], test_predict[:, 0]))
    return test_predict[:, 0], rmse

In [10]:
def forecast_all_units(data):
    results = {}
    floors = data['floor'].unique()
    units = data['unit'].unique()
    
    for floor in floors:
        for unit in units:
            unit_data = prepare_data(data, floor, unit)
            if len(unit_data) > 0:
                train, test = train_test_split(unit_data)
                
                ets_pred, ets_rmse = ets_forecast(train, test)
                arima_pred, arima_rmse = arima_forecast(train, test)
                sarima_pred, sarima_rmse = sarima_forecast(train, test)
                rnn_pred, rnn_rmse = rnn_forecast(train, test)
                lstm_pred, lstm_rmse = lstm_forecast(train, test)
                
                results[(floor, unit)] = {
                    'ETS': {'forecast': ets_pred, 'rmse': ets_rmse},
                    'ARIMA': {'forecast': arima_pred, 'rmse': arima_rmse},
                    'SARIMA': {'forecast': sarima_pred, 'rmse': sarima_rmse},
                    'RNN': {'forecast': rnn_pred, 'rmse': rnn_rmse},
                    'LSTM': {'forecast': lstm_pred, 'rmse': lstm_rmse}
                }
    
    return results


In [None]:
# Run forecasts for all units
all_forecasts = forecast_all_units(df)

# Print results and determine the best model for each unit
for (floor, unit), models in all_forecasts.items():
    print(f"\nFloor {floor}, Unit {unit}:")
    best_model = min(models, key=lambda x: models[x]['rmse'])
    for model, results in models.items():
        print(f"  {model} RMSE: {results['rmse']:.2f}")
    print(f"  Best model: {best_model}")


In [None]:
# Save results to a CSV file
results_df = pd.DataFrame(columns=['Floor', 'Unit', 'Model', 'RMSE'])
for (floor, unit), models in all_forecasts.items():
    for model, results in models.items():
        results_df = results_df.append({
            'Floor': floor,
            'Unit': unit,
            'Model': model,
            'RMSE': results['rmse']
        }, ignore_index=True)

results_df.to_csv('forecasting_results.csv', index=False)
print("\nForecasting results saved to 'forecasting_results.csv'")

In [None]:
# Example of how to access forecasts for a specific unit and model
floor, unit = 1, 1
model = 'LSTM'
forecast = all_forecasts[(floor, unit)][model]['forecast']
print(f"\nLSTM Forecast for Floor {floor}, Unit {unit}:")
print(forecast)