# Appliance Energy Augmentation and Forecasting

A project for enhancing appliance energy consumption data using TimeGAN for data augmentation and Facebook's Prophet for time series forecasting.

# Time Series Forecasting using Facebook's Prophet

In this notebook, we will use Facebook's Prophet package to forecast energy consumption.

## 1. Introduction

This notebook focuses on forecasting appliance energy consumption using Facebook's Prophet. The objective is to provide accurate and reliable forecasts based on augmented energy consumption data.

## 2. Background on Time Series Data

Time series data is a sequence of data points collected or recorded at specific time intervals. Understanding the characteristics of time series data, such as trends, seasonality, and noise, is crucial for accurate forecasting.

## 3. Installation of Required Packages

```python
# Install necessary packages

In [None]:
!pip install fpdf
!pip install pystan==2.19.1.1
!pip install fbprophet

## 4. import required packages

```python
# Import necessary packages


In [None]:
import os
import shutil
import glob
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation, performance_metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error

import re
import tempfile
import warnings
warnings.filterwarnings("ignore", message=".*to_pydatetime.*")
from fpdf import FPDF
import zipfile
import pdb
import io

print("Import DONE")

### Define input and output folders and important variables

In [None]:
The_input_folder = r'/kaggle/input/converted-augmented-data'
The_output_folder = r'/kaggle/working'

The_training_start_date = '2024-01-01'
The_training_end_date = '2024-12-31'

plot_start_date = '2024-01-01'
plot_end_date = '2025-12-31'

## 5. Define holidays

#### Define holidays for 2024 in UAE to be used for forecasting

In [None]:
# Define holidays
uae_holidays_2024 = [
    {"name": "New Year's Day", "date": "2024-01-01"},
    {"name": "Eid Al Fitr", "date": "2024-04-10"},
    {"name": "Eid Al Fitr Holiday", "date": "2024-04-11"},
    {"name": "Eid Al Fitr Holiday", "date": "2024-04-12"},
    {"name": "Arafat Day", "date": "2024-06-15"},
    {"name": "Eid Al Adha", "date": "2024-06-16"},
    {"name": "Eid Al Adha Holiday", "date": "2024-06-17"},
    {"name": "Eid Al Adha Holiday", "date": "2024-06-18"},
    {"name": "Islamic New Year", "date": "2024-07-07"},
    {"name": "Prophet Muhammad's Birthday", "date": "2024-09-15"},
    {"name": "Commemoration Day", "date": "2024-12-01"},
    {"name": "UAE National Day", "date": "2024-12-02"},
    {"name": "UAE National Day Holiday", "date": "2024-12-03"}
]
holiday_df = pd.DataFrame(uae_holidays_2024)
holiday_df['ds'] = pd.to_datetime(holiday_df['date'])
holiday_df = holiday_df[['ds', 'name']].rename(columns={'name': 'holiday'})

## 5. Define helper functions

In [None]:
def calculate_mase(y_true, y_pred, window_size=7):
    """
    Calculate the Mean Absolute Scaled Error (MASE).
    Parameters:
    y_true (pandas.Series or numpy.ndarray): The true values.
    y_pred (pandas.Series or numpy.ndarray): The predicted values.
    window_size (int): The size of the window for calculating the naive forecast.
    Returns:
    float: The MASE value.
    """
    # Calculate the naive forecast
    naive_forecast = y_true.shift(1)
    naive_forecast.iloc[0] = y_true.iloc[0]
    # Calculate the in-sample MAE of the naive forecast
    mae_naive = np.mean(np.abs(y_true[window_size:] - naive_forecast[window_size:]))
    # Calculate the MAE of the model forecast
    mae_model = np.mean(np.abs(y_true[window_size:] - y_pred[window_size:]))
    # Calculate the MASE
    mase = mae_model / mae_naive
    return mase
 
def extract_info(device_name):
    appliances = ["Water", "Printer", "Microwave", "Kettle", "Fridge", "Coffee", "PC1","water", "printer", "microwave", "kettle", "fridge", "coffee", "pc1"]
    appliance = next((appl for appl in appliances if appl.lower() in device_name.lower()), "Unknown")
    # Extract date (assuming it's the first part before an underscore)
    date = device_name.split('_')[0] if '_' in device_name else "Unknown"
    # The rest of the string after the appliance name will be considered as status
    status = device_name.split(appliance)[-1].strip('_') if appliance in device_name else "Unknown"
    return date, appliance, status

## 6. Define the main processing function

In [None]:
# Function to process each file
def process_file(file_path):
    try:
        # Read the CSV file
        df = pd.read_csv(file_path)
        
        # Extract device information
        appliancesList = {"Water", "Printer", "Microwave", "Kettle", "Fridge", "Coffee", "PC1","water", "printer", "microwave", "kettle", "fridge", "coffee", "pc1"}
        device_name = os.path.splitext(os.path.basename(file_path))[0]
        appliance, date, status, status2 = device_name.split('_')
        appliance = next((appl for appl in appliancesList if appl in device_name), "unknown appliance")
        
        # Check if 'time' and 'value' columns exist
        if 'time' not in df.columns or 'kWh.mean_value' not in df.columns:
            # Try to identify time and value columns
            time_col = df.select_dtypes(include=['datetime64', 'object']).columns[0]
            value_col = df.select_dtypes(include=['float64', 'int64']).columns[0]
            print(f"Assuming '{time_col}' as time column and '{value_col}' as value column")
            # Rename columns to match Prophet requirements
            df = df.rename(columns={time_col: 'ds', value_col: 'y'})
        else:
            # Rename columns to match Prophet requirements
            df = df.rename(columns={'time': 'ds', 'kWh.mean_value': 'y'})
            
        # Convert 'ds' column to datetime and remove timezone information
        df['ds'] = pd.to_datetime(df['ds'], utc=True).dt.tz_localize(None)
        
        # Define the start and end dates for the time frame
        start_date = The_training_start_date
        end_date = The_training_end_date
 
        # Filter the DataFrame to include only the data within the specified time frame
        filtered_df = df[(df['ds'] >= start_date) & (df['ds'] <= end_date)]
 
        # Remove outliers
        filtered_df = filtered_df[filtered_df['y'] < filtered_df['y'].quantile(0.98)]
 
        # Add a small constant to avoid log(0)
        filtered_df['y'] = filtered_df['y'] + 1e-8
        
         
        # Define the Prophet model with holidays and seasonality parameters
        if appliance == "Water":
            model = Prophet(holidays=holiday_df, weekly_seasonality=True, yearly_seasonality=True, daily_seasonality=True)
        elif appliance == "Printer":
            model = Prophet(holidays=holiday_df, weekly_seasonality=True, yearly_seasonality=True, daily_seasonality=True)
        elif appliance == "Microwave":
            model = Prophet(holidays=holiday_df, weekly_seasonality=True, yearly_seasonality=True, daily_seasonality=True)
        elif appliance == "Kettle":
            model = Prophet(holidays=holiday_df, weekly_seasonality=True, yearly_seasonality=True, daily_seasonality=True)
        elif appliance == "Fridge":
            model = Prophet(holidays=holiday_df, weekly_seasonality=True, yearly_seasonality=True, daily_seasonality=True)
        elif appliance == "Coffee":
            model = Prophet(holidays=holiday_df, weekly_seasonality=True, yearly_seasonality=True, daily_seasonality=True)
        elif appliance == "PC1":
            model = Prophet(holidays=holiday_df, weekly_seasonality=True, yearly_seasonality=True, daily_seasonality=True)
        else:
            model = Prophet(holidays=holiday_df, weekly_seasonality=True, yearly_seasonality=True, daily_seasonality=True)
 
        # Add country holidays
        model.add_country_holidays(country_name='AE')
 
        model.fit(filtered_df)
 
        # Make future dataframe for predictions
        future = model.make_future_dataframe(periods=360, freq='12h', include_history=True)
 
        # Make predictions
        forecast = model.predict(future)
        # Clip negative forecast values to zero to ensure all predictions are non-negative
        forecast['yhat'] = forecast['yhat'].clip(lower=0)
        forecast['yhat_lower'] = forecast['yhat_lower'].clip(lower=0)
        forecast['yhat_upper'] = forecast['yhat_upper'].clip(lower=0)
 
        # Get the forecast values only for the time period that corresponds to the original data in df
        forecast_values = forecast['yhat'][:len(filtered_df)]
        og_data = df['y'][:len(forecast)]
        # Replace zeros in df['y'] with a very small number to avoid division by zero, or use conditional to avoid zero division
        df_non_zero = filtered_df[filtered_df['y'] != 0].copy()
        forecast_values_non_zero = forecast_values[df['y'] != 0]
        print(appliance)
        MSE = np.sqrt(mean_squared_error(y_true=filtered_df['y'], y_pred=forecast_values))
        MAE = mean_absolute_error(y_true=filtered_df['y'], y_pred=forecast_values)
        MASE = calculate_mase(filtered_df['y'], forecast_values)
        
        print("MSE: ", MSE)
        print("MAE: ", MAE)
        print("MASE: ", MASE)
 
 
        df = df[(df['ds'] >= start_date) & (df['ds'] <= end_date)]
 
        # Create the plot for forecast
        fig1, ax1 = plt.subplots(figsize=(21, 10))
        ax1.plot(df['ds'], df['y'], label='The Original Data', linewidth=2)
        ax1.plot(forecast['ds'], forecast['yhat'], color='green', label='Forecast', linewidth=2)
        ax1.set_xlim(pd.Timestamp(plot_start_date), pd.Timestamp(plot_end_date))
        ax1.fill_between(forecast['ds'], 
                         (forecast['yhat_lower'] + (forecast['yhat_upper'] - forecast['yhat_lower'])*0.1), 
                         (forecast['yhat_upper'] - (forecast['yhat_upper'] - forecast['yhat_lower'])*0.1), 
                         color='green', alpha=0.4)
        ax1.legend()
        ax1.set_title('Forecast Plot of ' + date + ' ' + appliance + ' ' +  status)
        plt.show()
 
        # Create the components plot
        fig2 = model.plot_components(forecast, weekly_start=1)

 
        # Save plots to memory
        img1 = io.BytesIO()
        fig1.savefig(img1, format='png')
        img1.seek(0)
        img2 = io.BytesIO()
        fig2.savefig(img2, format='png')
        img2.seek(0)
        # Clear the plots to free up memory
        plt.close(fig1)
        plt.close(fig2)
        #Save the forecasted data to a CSV file
        forecasted_data = pd.DataFrame({'ds': future['ds'], 'yhat': forecast['yhat'], 'yhat_lower': forecast['yhat_lower'], 'yhat_upper': forecast['yhat_upper']})
        forecasted_data.to_csv(os.path.join(The_output_folder, f"{appliance}_forecasted_data.csv"), index=False)
        return file_path, img1, img2
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return file_path, None, None

## 7. Main function to process files

In [None]:
# Main function to process files and generate reports
def main():
    input_folder = The_input_folder
    output_folder = The_output_folder
    
    # Create output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)
    
    # Get list of CSV files
    csv_files = glob.glob(os.path.join(input_folder, '*.csv'))
    device_results = {}
    
    # Process files sequentially
    for file in csv_files:
        file_path, img1, img2 = process_file(file)
        if img1 and img2:
            # Store results
            device_name = os.path.splitext(os.path.basename(file_path))[0]
            device_results[device_name] = (img1, img2)
            # Create a subfolder for each device
            device_folder = os.path.join(output_folder, device_name)
            os.makedirs(device_folder, exist_ok=True)
            # Save images
            with open(os.path.join(device_folder, 'forecast_plot.png'), 'wb') as f:
                f.write(img1.getvalue())
            with open(os.path.join(device_folder, 'components_plot.png'), 'wb') as f:
                f.write(img2.getvalue())
    print("All files processed.")
 
if __name__ == "__main__":
    main()