In [2]:
import pytz
from datetime import datetime
import pandas as pd
import numpy as np
import os
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import keras
import pandas as pd
from itables import show
from keras.models import Sequential # type: ignore
from keras.layers import Dense, Input, LSTM, Embedding, SimpleRNN, GRU, Bidirectional, Conv1D, MaxPooling1D, Flatten, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.models import load_model
from keras.metrics import MeanSquaredError, MeanAbsoluteError, RootMeanSquaredError       
from keras.regularizers import l2
from sklearn.metrics import mean_squared_error, mean_absolute_error, root_mean_squared_error, mean_absolute_percentage_error
import scipy
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sktime.transformations.series.holiday import HolidayFeatures
from holidays import country_holidays, financial_holidays
from sklearn.multioutput import MultiOutputRegressor
from lightgbm import LGBMRegressor
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from padasip.filters import FilterRLS
import pickle
from sklearn.model_selection import TimeSeriesSplit
np.printoptions(suppress=True)
pd.set_option('display.float_format', lambda x: '%.2f' % x)
from weather_utils import WeatherUtils
import seaborn as sns
import holidays
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', 1000)


In [None]:
class ForecasterUtils:
    def get_barbados_holidays_list(self, years):
        barbados_holidays_list = holidays.Barbados(years=years)
        barbados_holidays_df = pd.DataFrame(barbados_holidays_list.values(), index=pd.to_datetime(list(barbados_holidays_list.keys())))
        return barbados_holidays_df

    def remove_first_and_last_days(self, raw_data):
        raw_data = raw_data.copy()
        first_day = raw_data.index[0].strftime("%Y-%m-%d")
        last_day = raw_data.index[-1].strftime("%Y-%m-%d")

        raw_data = raw_data.drop(raw_data.loc[first_day].index)
        raw_data = raw_data.drop(raw_data.loc[last_day].index)

        return raw_data

    def remove_last_day(self, raw_data):
        raw_data = raw_data.copy()
        # first_day = raw_data.index[0].strftime("%Y-%m-%d")
        last_day = raw_data.index[-1].strftime("%Y-%m-%d")

        # raw_data = raw_data.drop(raw_data.loc[first_day].index)
        raw_data = raw_data.drop(raw_data.loc[last_day].index)

        return raw_data

    def add_extra_hour_in_beginning(self, raw_data):
        raw_data = raw_data.copy()
        first_index = raw_data.index[0]
        hour_before_first_index = first_index - pd.Timedelta(hours=1)
        hour_before_first_index = pd.Timestamp(hour_before_first_index)
        raw_data.loc[hour_before_first_index] = raw_data.loc[first_index]
        raw_data = raw_data.sort_index()

        return raw_data

    def remove_incomplete_days(self, raw_data):
        raw_data = raw_data.copy()
        grouped_raw_data = raw_data.groupby(by=raw_data.index.date).count()
        incomplete_days = grouped_raw_data[grouped_raw_data != 24].index
        raw_data = self.remove_dates_from_raw_data(raw_data, incomplete_days, "Removing Incomplete Days")

        return raw_data

    # This function assumes that the incoming raw data has a datetime index
    def preprocess_raw_data(self, data, column_name, threshold):
        data = data.copy()
        data = self.add_extra_hour_in_beginning(data)
        data = data.resample(rule="h", closed="left", label="right").mean()
        data = self.remove_last_day(data)
        data = data[column_name]
        filtered_data, nan_values_filtered = self.remove_nan_days_above_threshold(threshold, data)
        filtered_data = filtered_data.interpolate(method="time")
        filtered_data = self.remove_incomplete_days(filtered_data)

        return filtered_data

    def remove_dates_from_raw_data(self, raw_data, dates, comment):
        print(f"{comment} - Total Dropped Days: ", len(dates))
        filtered_data = raw_data.copy()

        for date in dates:
            date = date.strftime("%Y-%m-%d")
            filtered_data = filtered_data.drop(filtered_data.loc[date].index)

        return filtered_data

    def remove_nan_days_above_threshold(self, threshold, raw_data):
        nan_values = raw_data[raw_data.isna()].fillna(0)
        nan_values = nan_values.groupby(by=nan_values.index.date).count()
        nan_values.index = pd.to_datetime(nan_values.index)
        nan_values_filtered = nan_values[nan_values > threshold]
        filtered_data = self.remove_dates_from_raw_data(raw_data, nan_values_filtered.index, "Removing Nan Values")

        return filtered_data, nan_values_filtered

    def remove_space_in_column_names(self, data):
        data.columns = data.columns.str.replace(" ", "_")
        return data

    def get_raw_data_from_path(self, filepath):
        raw_data = pd.read_csv(
            filepath,
            na_values=[""],
            skipinitialspace=True,
            index_col="Time",
            parse_dates=True,
        ).dropna()

        raw_data.index = pd.to_datetime(raw_data.index).tz_localize(None)
        raw_data = raw_data[~raw_data.index.duplicated(keep="last")]

        return raw_data

    def minmax_scale(self, data, min, max):
        return (data - min) / (max - min)

    def minmax_inverse_scale(self, data, min, max):
        return (data * (max - min)) + min

    def get_unique_dates_from_hourly_data(self, data):
        return pd.to_datetime(data.index.date).unique()

    # def get_one_hot_day_of_week(self, dates_index):
    #     day_of_week = pd.get_dummies(dates_index.dayofweek).astype(int)
    #     day_of_week.index = dates_index
    #     print(day_of_week)
    #     day_of_week.columns = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
    #     return day_of_week
    
    def get_one_hot_day_of_week(self, dates_index):
        all_days = np.arange(7) 
        dates_index = pd.to_datetime(dates_index)
        day_of_week = dates_index.dayofweek
        one_hot_encoded = pd.DataFrame(0, index=dates_index, columns=all_days)
            
        for index, day in zip(dates_index, day_of_week):
            print(index, day)
            one_hot_encoded.loc[index, day] = 1
            
        one_hot_encoded.columns=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
        return one_hot_encoded

    def add_is_holiday_column_to_one_hot_day_of_week(self, day_of_week, holidays_df):
        day_of_week["is_holiday"] = day_of_week.index.isin(holidays_df.index).astype(int)
        return day_of_week

    def reshape_X_to_LSTM_shape(self, X):
        return X.reshape(X.shape[0], 1, X.shape[1])

    def create_X_y(self, data, target_column, hours_in_day, day_of_week):
        X = []
        y_bl = []
        y_cl = []

        prev_dates = []
        cur_dates = []

        for i in range(hours_in_day, len(data), hours_in_day):
            cur = i
            prev = i - hours_in_day

            if data.index[cur] != data.index[prev] + pd.Timedelta(days=1):
                print(
                    f"ALERT: Current: {data.index[cur]} to {data.index[cur + hours_in_day - 1]}, Previous: {data.index[prev]} to {data.index[prev + hours_in_day - 1]}"
                )
                print("ALERT: Day Change Detected")
                continue

            print(
                f"Current: {data.index[cur]} to {data.index[cur + hours_in_day - 1]}, Previous: {data.index[prev]} to {data.index[prev + hours_in_day - 1]}"
            )

            prev_historic_temp = data["temperature_2m_historic"].iloc[prev:cur].values.flatten()
            prev_forecasted_temp = data["temperature_2m_forecast"][prev:cur].values.flatten()
            prev_historic_shortwave_radiation = data["shortwave_radiation_historic"].iloc[prev:cur].values.flatten()
            prev_forecasted_shortwave_radiation = data["shortwave_radiation_forecast"][prev:cur].values.flatten()
            prev_load_demand = data[target_column].iloc[prev:cur].values.flatten()

            cur_historic_temp = data["temperature_2m_historic"].iloc[cur : cur + hours_in_day].values.flatten()
            cur_forecasted_temp = data["temperature_2m_forecast"].iloc[cur : cur + hours_in_day].values.flatten()
            cur_historic_shortwave_radiation = data["shortwave_radiation_historic"].iloc[cur : cur + hours_in_day].values.flatten()
            cur_forecasted_shortwave_radiation = data["shortwave_radiation_forecast"].iloc[cur : cur + hours_in_day].values.flatten()
            cur_load_demand = data[target_column].iloc[cur : cur + hours_in_day].values.flatten()
            cur_day_of_week = day_of_week.iloc[cur // hours_in_day].values.flatten()

            X_day = np.hstack(
                [
                    prev_load_demand,
                    prev_historic_temp,
                    prev_forecasted_temp,
                    cur_historic_temp,
                    cur_forecasted_temp,
                    prev_historic_shortwave_radiation,
                    prev_forecasted_shortwave_radiation,
                    cur_historic_shortwave_radiation,
                    cur_forecasted_shortwave_radiation,
                    cur_day_of_week,
                ]
            )
            y_day_bl = cur_load_demand
            y_day_cl = cur_load_demand - prev_load_demand

            X.append(X_day)
            y_bl.append(y_day_bl)
            y_cl.append(y_day_cl)

            prev_day_date = data.index[prev:cur].values.flatten()
            cur_day_date = data.index[cur : cur + hours_in_day].values.flatten()

            prev_dates.append(prev_day_date)
            cur_dates.append(cur_day_date)

        X = np.array(X)
        X_LSTM = self.reshape_X_to_LSTM_shape(X)
        y_bl = np.array(y_bl)
        y_cl = np.array(y_cl)

        cur_dates = np.array(cur_dates)
        prev_dates = np.array(prev_dates)

        return X, X_LSTM, y_bl, y_cl, cur_dates, prev_dates

    def convert_cl_to_bl(self, y_pred_cl, y, X, hours_in_day):
        y_prev_bl = X[:, :hours_in_day]
        # y_pred_cl_bl = []

        print(y_prev_bl.shape, y_pred_cl.shape)

        if y_prev_bl.shape == y_pred_cl.shape:
            y_pred_cl_bl = y_pred_cl + y_prev_bl

        return y_pred_cl_bl

    # callbacks=[EarlyStopping(patience=50), ReduceLROnPlateau(patience=20, factor=0.1, min_lr=0.000000001)]

    def predict_model(self, model, X):
        y_pred = model.predict(X)
        return y_pred

    def get_trained_model(self, path, model_type, feeder_filesave_name):
        model_save_path = f"{path}/{model_type}/{feeder_filesave_name}.keras"
        model = load_model(model_save_path)

        return model

    def get_train_stats(self, train_stats_path, feeder_filesave_name):
        train_stats_file_path = f"{train_stats_path}/{feeder_filesave_name}_train_stats.csv"
        train_stats = pd.read_csv(train_stats_file_path, index_col=0)

        return train_stats

    def adapt_rls_combiner_and_predict(self, results, hours, rls_combiner=None, adapt=True):
        rls_preds = []
        samples = results.shape[0]
        # hours = y.shape[1]

        if not rls_combiner:
            print("Building a new RLS Combiner")
            rls_combiner = [FilterRLS(n=results.shape[1] - 1, mu=0.99) for _ in range(hours)]

        for i in range(0, samples, hours):
            for j in range(hours):
                rls_train_sample = [results.iloc[i][col] for col in results.columns[1:]]
                rls_train_sample = np.array(rls_train_sample).reshape(-1)
                rls_actual_sample = results.iloc[i, 0]

                rls_preds_sample = rls_combiner[j].predict(rls_train_sample)
                if adapt:
                    rls_combiner[j].adapt(rls_actual_sample, rls_train_sample)
                    rls_preds.append(rls_preds_sample)

        results["RLS"] = rls_preds
        return results, rls_combiner

    def build_rls_combiner(self, hours_in_day, n_preds):
        return [FilterRLS(n=n_preds, mu=0.99) for _ in range(hours_in_day)]

    def adapt_and_predict_rls_combiner(self, actual, pred_1, pred_2, hours_in_day, rls_combiner=None):
        if not rls_combiner:
            rls_combiner = self.build_rls_combiner(hours_in_day, 2)

        samples = actual.shape[0]
        rls_preds = []

        for i in range(samples):
            for j in range(hours_in_day):
                rls_train_sample = [pred_1[i, j], pred_2[i, j]]
                rls_train_sample = np.array(rls_train_sample).reshape(-1)
                rls_actual_sample = actual[i, j]

                rls_preds_sample = rls_combiner[j].predict(rls_train_sample)
                rls_combiner[j].adapt(rls_actual_sample, rls_train_sample)
                rls_preds.append(rls_preds_sample)

                # i += 1

        rls_preds = np.array(rls_preds).reshape(samples, hours_in_day)

        return rls_preds, rls_combiner

    def smape_metric(self, y_true, y_pred):
        return 100 * np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true)))

    def get_metrics(self, results):
        results = results.copy()
        actual = results["Actual"]
        preds = results.drop(columns=["Actual"])

        smape = preds.apply(lambda x: self.smape_metric(actual, x))
        mae = preds.apply(lambda x: mean_absolute_error(actual, x))
        rmse = preds.apply(lambda x: root_mean_squared_error(actual, x))

        metrics = pd.DataFrame({"SMAPE": smape, "MAE": mae, "RMSE": rmse})

        return metrics

    def train_ann_model(self, X_train, y_train, callbacks=None):
        model = Sequential()
        model.add(Input(shape=(X_train.shape[1],)))
        model.add(Dense(46, activation='relu'))
        model.add(Dense(y_train.shape[1], activation='linear'))

        model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=[MeanAbsoluteError(), RootMeanSquaredError()])
        model.summary()

        model.fit(X_train, y_train, epochs=200, batch_size=32, callbacks=callbacks, shuffle=False)
        
        return model

    def train_lstm_model(self, X_train_LSTM, y_train, callbacks=None):
        model = Sequential()
        model.add(Input(shape=(X_train_LSTM.shape[1], X_train_LSTM.shape[2])))
        model.add(LSTM(150, return_sequences=False))
        model.add(Dense(y_train.shape[1], activation='linear'))

        model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=[MeanAbsoluteError(), RootMeanSquaredError()])
        model.summary()

        model.fit(X_train_LSTM, y_train, epochs=50, batch_size=1, callbacks=callbacks, shuffle=False)
        
        return model

    def save_trained_model(self, model, path, model_type, feeder_filesave_name):
        model_save_path = f'{path}/{model_type}/{feeder_filesave_name}.keras'
        model.save(model_save_path)
        
        return
    
    def save_rls_combiner(self, rls_combiner, model_type, path, feeder_filesave_name):
        rls_combiner_path = f'{path}/{model_type}/{feeder_filesave_name}.pkl'
        os.makedirs(f'{path}/{model_type}', exist_ok=True)
        
        with open(rls_combiner_path, 'wb') as f:
            pickle.dump(rls_combiner, f)
        
        return
    
    def get_saved_rls_combiner(self, path, model_type, feeder_filesave_name):
        with open(f'{path}/{model_type}/{feeder_filesave_name}.pkl', 'rb') as f:
            rls_combiner = pickle.load(f)
            
        return rls_combiner
    
    def save_results(self, results, path, feeder_filesave_name, is_validation=False):
        if is_validation:
            results.to_csv(f"{path}/{feeder_filesave_name}_Validation_Results.csv")
        else:
            results.to_csv(f"{path}/{feeder_filesave_name}_Test_Results.csv")
        
    def save_metrics(self, metrics, path, feeder_filesave_name, is_validation=False):
        if is_validation:
            metrics.to_csv(f"{path}/{feeder_filesave_name}_Validation_Metrics.csv")
        else:
            metrics.to_csv(f"{path}/{feeder_filesave_name}_Test_Metrics.csv")
    
    def day_ahead_forecast_feeders_data_from_trained_models(self, start_date, end_date, is_validation=False):
        prev_date = pd.to_datetime(start_date) - pd.Timedelta(days=1)
        prev_date = prev_date.strftime("%Y-%m-%d")
        feeders_metadata = pd.read_csv(self.feeders_metadata_path)
        weather_utils = WeatherUtils()
        feeders_root_path = self.feeders_root_path
        saved_models_path = self.saved_models_path
        train_stats_path = self.train_stats_path
        hours_in_day = self.hours_in_day
        target_column = self.target_column
        barbados_holidays_df = self.barbados_holidays_df
        threshold = self.threshold

        for i in range(9, feeders_metadata.shape[0] - 2):
            feeder_metadata = feeders_metadata.iloc[i]
            feeder_name = feeder_metadata["FeederName"]
            feeder_data_filename = feeder_metadata["FileName"]
            feeder_filesave_name = feeder_metadata["FileSaveName"]
            feeder_data_path = feeders_root_path + feeder_data_filename
            feeder_data = self.get_raw_data_from_path(feeder_data_path)
            net_load_demand = self.preprocess_raw_data(feeder_data, "NetLoadDemand", threshold)
            latitude = feeder_metadata["Latitude"]
            longitude = feeder_metadata["Longitude"]

            print("========================================")
            print(feeder_name)
            print("========================================")

            historic_weather_data = weather_utils.fetch_historic_data_from_api(latitude, longitude, prev_date, end_date)
            forecast_weather_data = weather_utils.fetch_forecast_data_from_api(latitude, longitude, prev_date, end_date)

            temperature_historic = historic_weather_data["temperature_2m_historic"]
            temperature_forecast = forecast_weather_data["temperature_2m_forecast"]
            shortwave_radiation_historic = historic_weather_data["shortwave_radiation_historic"]
            shortwave_radiation_forecast = forecast_weather_data["shortwave_radiation_forecast"]

            combined_data = pd.concat(
                [net_load_demand, temperature_historic, temperature_forecast, shortwave_radiation_historic, shortwave_radiation_forecast], axis=1
            ).dropna()
            combined_data = combined_data.loc[prev_date:end_date]
            # combined_data_daytime = combined_data.between_time("06:00", "20:00")

            train_stats = self.get_train_stats(train_stats_path, feeder_filesave_name)
            print(train_stats)

            # normalized_data = self.minmax_scale(combined_data_daytime, train_stats["Min"], train_stats["Max"])
            normalized_data = self.minmax_scale(combined_data, train_stats["Min"], train_stats["Max"])
            print(normalized_data.describe())

            # denormalized_data = minmax_inverse_scale(normalized_data, train_stats['Min'], train_stats['Max'])
            # print(denormalized_data.describe())
            print(normalized_data)
            dates_index = self.get_unique_dates_from_hourly_data(normalized_data)
            print(dates_index)
            day_of_week = self.get_one_hot_day_of_week(dates_index)
            day_of_week = self.add_is_holiday_column_to_one_hot_day_of_week(day_of_week, barbados_holidays_df)

            X, X_LSTM, y_bl, y_cl, cur_dates, prev_dates = self.create_X_y(normalized_data, target_column, hours_in_day, day_of_week)

            trained_ann_model_bl = self.get_trained_model(saved_models_path, "ANN_BL", feeder_filesave_name)
            trained_ann_model_cl = self.get_trained_model(saved_models_path, "ANN_CL", feeder_filesave_name)
            trained_lstm_model_bl = self.get_trained_model(saved_models_path, "LSTM_BL", feeder_filesave_name)
            trained_lstm_model_cl = self.get_trained_model(saved_models_path, "LSTM_CL", feeder_filesave_name)
            
            adapted_rls_combiner_ann = self.get_saved_rls_combiner(saved_models_path, "RLS_ANN", feeder_filesave_name)
            adapted_rls_combiner_lstm = self.get_saved_rls_combiner(saved_models_path, "RLS_LSTM", feeder_filesave_name)
            adapted_rls_combiner_final = self.get_saved_rls_combiner(saved_models_path, "RLS_Final", feeder_filesave_name)

            y_pred_bl = self.predict_model(trained_ann_model_bl, X)
            y_pred_cl = self.predict_model(trained_ann_model_cl, X)
            y_pred_bl_LSTM = self.predict_model(trained_lstm_model_bl, X_LSTM)
            y_pred_cl_LSTM = self.predict_model(trained_lstm_model_cl, X_LSTM)

            y_pred_bl_cl = self.convert_cl_to_bl(y_pred_cl, y_bl, X, hours_in_day)
            y_pred_bl_cl_LSTM = self.convert_cl_to_bl(y_pred_cl_LSTM, y_bl, X, hours_in_day)

            print(X.shape, X_LSTM.shape, y_bl.shape, y_cl.shape, cur_dates.shape, prev_dates.shape)
            print(y_pred_bl.shape, y_pred_cl.shape, y_pred_bl_LSTM.shape, y_pred_cl_LSTM.shape)

            y_pred_ann_rls, rls_combiner_ann = self.adapt_and_predict_rls_combiner(y_bl, y_pred_bl, y_pred_bl_cl, hours_in_day, adapted_rls_combiner_ann)
            y_pred_lstm_rls, rls_combiner_lstm = self.adapt_and_predict_rls_combiner(y_bl, y_pred_bl_LSTM, y_pred_bl_cl_LSTM, hours_in_day, adapted_rls_combiner_lstm)
            y_pred_final_rls, rls_combiner_final = self.adapt_and_predict_rls_combiner(y_bl, y_pred_ann_rls, y_pred_lstm_rls, hours_in_day, adapted_rls_combiner_final)

            final_results = pd.DataFrame([y_bl.flatten(), y_pred_ann_rls.flatten(), y_pred_lstm_rls.flatten(), y_pred_final_rls.flatten()]).T
            final_results.columns = ["Actual", "ANN_RLS", "LSTM_RLS", "Final_RLS"]
            final_results.index = cur_dates.flatten()
            
            final_metrics = self.get_metrics(final_results)

            final_results = final_results.apply(
                lambda x: self.minmax_inverse_scale(x, train_stats.loc[target_column, "Min"], train_stats.loc[target_column, "Max"])
            )
            
            px.line(final_results).show()
            print(final_metrics)

            # self.save_results(final_results, self.saved_results_path, feeder_filesave_name, is_validation)
            # self.save_metrics(final_metrics, self.saved_metrics_path, feeder_filesave_name, is_validation)
            
    def batch_forecast_feeders_data_from_trained_models(self, start_date, end_date, is_validation=False):
        prev_date = pd.to_datetime(start_date) - pd.Timedelta(days=1)
        prev_date = prev_date.strftime("%Y-%m-%d")
        feeders_metadata = pd.read_csv(self.feeders_metadata_path)
        weather_utils = WeatherUtils()
        feeders_root_path = self.feeders_root_path
        saved_models_path = self.saved_models_path
        train_stats_path = self.train_stats_path
        hours_in_day = self.hours_in_day
        target_column = self.target_column
        barbados_holidays_df = self.barbados_holidays_df
        threshold = self.threshold

        for i in range(feeders_metadata.shape[0]):
            feeder_metadata = feeders_metadata.iloc[i]
            feeder_name = feeder_metadata["FeederName"]
            feeder_data_filename = feeder_metadata["FileName"]
            feeder_filesave_name = feeder_metadata["FileSaveName"]
            feeder_data_path = feeders_root_path + feeder_data_filename
            feeder_data = self.get_raw_data_from_path(feeder_data_path)
            net_load_demand = self.preprocess_raw_data(feeder_data, "NetLoadDemand", threshold)
            latitude = feeder_metadata["Latitude"]
            longitude = feeder_metadata["Longitude"]

            print("========================================")
            print(feeder_name)
            print("========================================")

            historic_weather_data = weather_utils.fetch_historic_data_from_api(latitude, longitude, start_date, end_date)
            forecast_weather_data = weather_utils.fetch_forecast_data_from_api(latitude, longitude, start_date, end_date)

            temperature_historic = historic_weather_data["temperature_2m_historic"]
            temperature_forecast = forecast_weather_data["temperature_2m_forecast"]
            shortwave_radiation_historic = historic_weather_data["shortwave_radiation_historic"]
            shortwave_radiation_forecast = forecast_weather_data["shortwave_radiation_forecast"]

            combined_data = pd.concat(
                [net_load_demand, temperature_historic, temperature_forecast, shortwave_radiation_historic, shortwave_radiation_forecast], axis=1
            ).dropna()
            combined_data = combined_data.loc[start_date:end_date]
            # combined_data_daytime = combined_data.between_time("06:00", "20:00")

            train_stats = self.get_train_stats(train_stats_path, feeder_filesave_name)
            print(train_stats)

            # normalized_data = self.minmax_scale(combined_data_daytime, train_stats["Min"], train_stats["Max"])
            normalized_data = self.minmax_scale(combined_data, train_stats["Min"], train_stats["Max"])
            print(normalized_data.describe())

            # denormalized_data = minmax_inverse_scale(normalized_data, train_stats['Min'], train_stats['Max'])
            # print(denormalized_data.describe())

            dates_index = self.get_unique_dates_from_hourly_data(normalized_data)
            # print(dates_index)
            day_of_week = self.get_one_hot_day_of_week(dates_index)
            day_of_week = self.add_is_holiday_column_to_one_hot_day_of_week(day_of_week, barbados_holidays_df)

            X, X_LSTM, y_bl, y_cl, cur_dates, prev_dates = self.create_X_y(normalized_data, target_column, hours_in_day, day_of_week)

            trained_ann_model_bl = self.get_trained_model(saved_models_path, "ANN_BL", feeder_filesave_name)
            trained_ann_model_cl = self.get_trained_model(saved_models_path, "ANN_CL", feeder_filesave_name)
            trained_lstm_model_bl = self.get_trained_model(saved_models_path, "LSTM_BL", feeder_filesave_name)
            trained_lstm_model_cl = self.get_trained_model(saved_models_path, "LSTM_CL", feeder_filesave_name)
            
            adapted_rls_combiner_ann = self.get_saved_rls_combiner(saved_models_path, "RLS_ANN", feeder_filesave_name)
            adapted_rls_combiner_lstm = self.get_saved_rls_combiner(saved_models_path, "RLS_LSTM", feeder_filesave_name)
            adapted_rls_combiner_final = self.get_saved_rls_combiner(saved_models_path, "RLS_Final", feeder_filesave_name)

            y_pred_bl = self.predict_model(trained_ann_model_bl, X)
            y_pred_cl = self.predict_model(trained_ann_model_cl, X)
            y_pred_bl_LSTM = self.predict_model(trained_lstm_model_bl, X_LSTM)
            y_pred_cl_LSTM = self.predict_model(trained_lstm_model_cl, X_LSTM)

            y_pred_bl_cl = self.convert_cl_to_bl(y_pred_cl, y_bl, X, hours_in_day)
            y_pred_bl_cl_LSTM = self.convert_cl_to_bl(y_pred_cl_LSTM, y_bl, X, hours_in_day)

            print(X.shape, X_LSTM.shape, y_bl.shape, y_cl.shape, cur_dates.shape, prev_dates.shape)
            print(y_pred_bl.shape, y_pred_cl.shape, y_pred_bl_LSTM.shape, y_pred_cl_LSTM.shape)

            y_pred_ann_rls, rls_combiner_ann = self.adapt_and_predict_rls_combiner(y_bl, y_pred_bl, y_pred_bl_cl, hours_in_day, adapted_rls_combiner_ann)
            y_pred_lstm_rls, rls_combiner_lstm = self.adapt_and_predict_rls_combiner(y_bl, y_pred_bl_LSTM, y_pred_bl_cl_LSTM, hours_in_day, adapted_rls_combiner_lstm)
            y_pred_final_rls, rls_combiner_final = self.adapt_and_predict_rls_combiner(y_bl, y_pred_ann_rls, y_pred_lstm_rls, hours_in_day, adapted_rls_combiner_final)

            final_results = pd.DataFrame([y_bl.flatten(), y_pred_ann_rls.flatten(), y_pred_lstm_rls.flatten(), y_pred_final_rls.flatten()]).T
            final_results.columns = ["Actual", "ANN_RLS", "LSTM_RLS", "Final_RLS"]
            final_results.index = cur_dates.flatten()
            
            final_metrics = self.get_metrics(final_results)

            final_results = final_results.apply(
                lambda x: self.minmax_inverse_scale(x, train_stats.loc[target_column, "Min"], train_stats.loc[target_column, "Max"])
            )

            self.save_results(final_results, self.saved_results_path, feeder_filesave_name, is_validation)
            self.save_metrics(final_metrics, self.saved_metrics_path, feeder_filesave_name, is_validation)
            
    def get_stats(self, data):
        mean = data.mean()
        std = data.std()
        min = data.min()
        max = data.max()
        
        return mean, std, min, max
            
    def build_models_and_train_feeders_data(self, start_date, end_date):
        feeders_metadata = pd.read_csv(self.feeders_metadata_path)
        weather_utils = WeatherUtils()
        feeders_root_path = self.feeders_root_path
        saved_models_path = self.saved_models_path
        train_stats_path = self.train_stats_path
        hours_in_day = self.hours_in_day
        target_column = self.target_column
        barbados_holidays_df = self.barbados_holidays_df
        threshold = self.threshold

        for i in range(feeders_metadata.shape[0]):
            feeder_metadata = feeders_metadata.iloc[i]
            feeder_name = feeder_metadata["FeederName"]
            feeder_data_filename = feeder_metadata["FileName"]
            feeder_filesave_name = feeder_metadata["FileSaveName"]
            feeder_data_path = feeders_root_path + feeder_data_filename
            feeder_data = self.get_raw_data_from_path(feeder_data_path)
            net_load_demand = self.preprocess_raw_data(feeder_data, "NetLoadDemand", threshold)
            latitude = feeder_metadata["Latitude"]
            longitude = feeder_metadata["Longitude"]

            print("========================================")
            print(feeder_name)
            print("========================================")

            historic_weather_data = weather_utils.fetch_historic_data_from_api(latitude, longitude, start_date, end_date)
            forecast_weather_data = weather_utils.fetch_forecast_data_from_api(latitude, longitude, start_date, end_date)

            temperature_historic = historic_weather_data["temperature_2m_historic"]
            temperature_forecast = forecast_weather_data["temperature_2m_forecast"]
            shortwave_radiation_historic = historic_weather_data["shortwave_radiation_historic"]
            shortwave_radiation_forecast = forecast_weather_data["shortwave_radiation_forecast"]

            combined_data = pd.concat(
                [net_load_demand, temperature_historic, temperature_forecast, shortwave_radiation_historic, shortwave_radiation_forecast], axis=1
            ).dropna()
            combined_data = combined_data.loc[start_date:end_date]
            # combined_data_daytime = combined_data.between_time("06:00", "20:00")

            mean, std, min, max = self.get_stats(combined_data)
            # mean, std, min, max = self.get_stats(combined_data_daytime)
            
            train_stats = pd.concat([mean, std, min, max], axis=1)
            train_stats.columns = ['Mean', 'Std', 'Min', 'Max']
            train_stats.to_csv(f"{train_stats_path}/{feeder_filesave_name}_train_stats.csv")

            # normalized_data = self.minmax_scale(combined_data_daytime, train_stats["Min"], train_stats["Max"])
            normalized_data = self.minmax_scale(combined_data, train_stats["Min"], train_stats["Max"])
            print(normalized_data.describe())

            # denormalized_data = minmax_inverse_scale(normalized_data, train_stats['Min'], train_stats['Max'])
            # print(denormalized_data.describe())

            dates_index = self.get_unique_dates_from_hourly_data(normalized_data)
            day_of_week = self.get_one_hot_day_of_week(dates_index)
            day_of_week = self.add_is_holiday_column_to_one_hot_day_of_week(day_of_week, barbados_holidays_df)

            X, X_LSTM, y_bl, y_cl, cur_dates, prev_dates = self.create_X_y(normalized_data, target_column, hours_in_day, day_of_week)
            
            trained_ann_model_bl = self.train_ann_model(X, y_bl)
            trained_ann_model_cl = self.train_ann_model(X, y_cl)
            trained_lstm_model_bl = self.train_lstm_model(X_LSTM, y_bl)
            trained_lstm_model_cl = self.train_lstm_model(X_LSTM, y_cl)

            self.save_trained_model(trained_ann_model_bl, saved_models_path, "ANN_BL", feeder_filesave_name)
            self.save_trained_model(trained_ann_model_cl, saved_models_path, "ANN_CL", feeder_filesave_name)
            self.save_trained_model(trained_lstm_model_bl, saved_models_path, "LSTM_BL", feeder_filesave_name)
            self.save_trained_model(trained_lstm_model_cl, saved_models_path, "LSTM_CL", feeder_filesave_name)
            
            y_pred_bl = self.predict_model(trained_ann_model_bl, X)
            y_pred_cl = self.predict_model(trained_ann_model_cl, X)
            y_pred_bl_LSTM = self.predict_model(trained_lstm_model_bl, X_LSTM)
            y_pred_cl_LSTM = self.predict_model(trained_lstm_model_cl, X_LSTM)

            y_pred_bl_cl = self.convert_cl_to_bl(y_pred_cl, y_bl, X, hours_in_day)
            y_pred_bl_cl_LSTM = self.convert_cl_to_bl(y_pred_cl_LSTM, y_bl, X, hours_in_day)

            print(X.shape, X_LSTM.shape, y_bl.shape, y_cl.shape, cur_dates.shape, prev_dates.shape)
            print(y_pred_bl.shape, y_pred_cl.shape, y_pred_bl_LSTM.shape, y_pred_cl_LSTM.shape)

            y_pred_ann_rls, rls_combiner_ann = self.adapt_and_predict_rls_combiner(y_bl, y_pred_bl, y_pred_bl_cl, hours_in_day)
            y_pred_lstm_rls, rls_combiner_lstm = self.adapt_and_predict_rls_combiner(y_bl, y_pred_bl_LSTM, y_pred_bl_cl_LSTM, hours_in_day)
            y_pred_final_rls, rls_combiner_final = self.adapt_and_predict_rls_combiner(y_bl, y_pred_ann_rls, y_pred_lstm_rls, hours_in_day)
            
            self.save_rls_combiner(rls_combiner_ann, "RLS_ANN", saved_models_path, feeder_filesave_name)
            self.save_rls_combiner(rls_combiner_lstm, "RLS_LSTM", saved_models_path, feeder_filesave_name)
            self.save_rls_combiner(rls_combiner_final, "RLS_Final", saved_models_path, feeder_filesave_name)
            
            

    def __init__(self):
        self.saved_ann_models_path = "./../Saved_Models/ANN_Saved_Models"
        self.saved_lstm_models_path = "./../Saved_Models/LSTM_Saved_Models"
        self.saved_rls_combiner_models_path = "./../Saved_Models/RLS_Combiner_Saved_Models"
        self.feeders_metadata_path = "./../Data/Filtered_Feeders_Metadata/Final_Selected_Feeders_Data_with_Coordinates.csv"
        self.feeders_root_path = "./../Data/Feeder_Data/"
        self.openmeteo_weather_data_path = "./../Data/Feeder_Weather_Combined_Data/"
        self.filtered_feeders_metadata_file_path = "../Data/Filtered_Feeders_Metadata/Final_Selected_Feeders_Data_with_Coordinates.csv"
        self.weather_data_path = "./../Data/Weather_Data/high_resolution_weather_arch_hall.csv"
        self.saved_models_path = "./../Saved_Models"
        self.saved_results_path = "./../Results"
        self.saved_metrics_path = "./../Metrics"
        self.train_stats_path = "./../Data/Filtered_Feeders_Metadata/Train_Stats"
        self.end_train = "2024-06-16 23:59:59"
        self.end_val = "2024-06-30 23:59:59"
        self.end_test = "2024-07-31 23:59:59"
        self.start_train_date = "2024-01-01"
        self.start_val_date = "2024-06-17"
        self.start_test_date = "2024-07-01"
        self.end_train_date = "2024-06-16"
        self.end_val_date = "2024-06-30"
        self.end_test_date = "2024-07-31"
        self.freq = "h"
        self.threshold = 6
        self.holiday_years = [2020, 2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030]
        self.hours_in_day = 24
        self.target_column = "NetLoadDemand"
        self.barbados_holidays_df = self.get_barbados_holidays_list(self.holiday_years)


barbados_trainer = ForecasterUtils()
# barbados_trainer.build_models_and_train_feeders_data(barbados_trainer.start_train_date, barbados_trainer.end_train_date)
forecaster = ForecasterUtils()
forecaster.batch_forecast_feeders_data_from_trained_models(forecaster.start_val_date, forecaster.end_val_date, is_validation=True)
forecaster.batch_forecast_feeders_data_from_trained_models(forecaster.start_test_date, forecaster.end_test_date)
# forecaster.day_ahead_forecast_feeders_data_from_trained_models("2024-03-18", "2024-03-18")

Removing Nan Values - Total Dropped Days:  3
Removing Incomplete Days - Total Dropped Days:  0
Arch Hall
Coordinates 13.181018829345703°N -59.56243896484375°E
Elevation 114.0 m asl
Timezone b'America/Barbados' b'GMT-4'
Timezone difference to GMT+0 -14400 s
<openmeteo_sdk.VariablesWithTime.VariablesWithTime object at 0x000001953A0D46D0>
Coordinates 13.125°N -59.625°E
Elevation 114.0 m asl
Timezone b'America/Barbados' b'GMT-4'
Timezone difference to GMT+0 -14400 s
<openmeteo_sdk.VariablesWithTime.VariablesWithTime object at 0x000001953A0D74C0>
                                Mean     Std      Min      Max
NetLoadDemand                4219.63 2976.71 -4721.06 11074.83
temperature_2m_historic        26.90    2.01    22.30    32.40
temperature_2m_forecast        27.17    1.63    24.03    31.73
shortwave_radiation_historic  250.09  320.07     0.00  1026.00
shortwave_radiation_forecast  252.75  321.05     0.00   987.00
       NetLoadDemand  temperature_2m_historic  temperature_2m_forecast  sh

In [None]:
# forecaster_utils = ForecasterUtils()
# feeders_metadata = pd.read_csv(feeders_metadata_path)
# weather_utils = WeatherUtils()

# for i in range(feeders_metadata.shape[0] - 8):
#     feeder_metadata = feeders_metadata.iloc[i]
#     feeder_name = feeder_metadata['FeederName']
#     feeder_data_filename = feeder_metadata['FileName']
#     feeder_filesave_name = feeder_metadata['FileSaveName']
#     feeder_data_path = feeders_root_path + feeder_data_filename
#     ann_save_path = f"{saved_models_path}/ANN_Saved_Models/{feeder_filesave_name}_ANN_Model.keras"
#     lstm_save_path = f"{saved_models_path}/LSTM_Saved_Models/{feeder_filesave_name}_LSTM_Model.keras"
#     rls_combiner_save_path = f"{saved_models_path}/RLS_Combiner_Saved_Models/{feeder_filesave_name}_RLS_Combiner.pkl"
#     feeder_data = get_raw_data_from_path(feeder_data_path)
#     net_load_demand = preprocess_raw_data(feeder_data, "NetLoadDemand")
#     latitude = feeder_metadata['Latitude']
#     longitude = feeder_metadata['Longitude']
    
    
#     print("========================================")
#     print(feeder_name)
#     print("========================================")
    
#     historic_weather_data = weather_utils.fetch_historic_data_from_api(latitude, longitude, start_test_date, end_test_date)
#     forecast_weather_data = weather_utils.fetch_forecast_data_from_api(latitude, longitude, start_test_date, end_test_date)
    
#     temperature_historic = historic_weather_data["temperature_2m_historic"]
#     temperature_forecast = forecast_weather_data["temperature_2m_forecast"]
#     shortwave_radiation_historic = historic_weather_data["shortwave_radiation_historic"]
#     shortwave_radiation_forecast = forecast_weather_data["shortwave_radiation_forecast"]
    
    
#     combined_data = pd.concat([net_load_demand, temperature_historic, temperature_forecast, shortwave_radiation_historic, shortwave_radiation_forecast], axis=1).dropna()
#     combined_data = combined_data.loc[start_test_date:]
#     combined_data_daytime = combined_data.between_time('06:00', '20:00')
    
#     train_stats = get_train_stats(train_stats_path, feeder_filesave_name)
#     print(train_stats)
    
#     normalized_data = minmax_scale(combined_data_daytime, train_stats['Min'], train_stats['Max'])
#     print(normalized_data.describe())
    
#     denormalized_data = minmax_inverse_scale(normalized_data, train_stats['Min'], train_stats['Max'])
#     # print(denormalized_data.describe())
    
#     dates_index = get_unique_dates_from_hourly_data(normalized_data)
#     day_of_week = get_one_hot_day_of_week(dates_index)
#     day_of_week = add_is_holiday_column_to_one_hot_day_of_week(day_of_week, barbados_holidays_df)
    
#     X_val, X_val_LSTM, y_val_bl, y_val_cl, cur_dates_val, prev_dates_val = create_X_y(normalized_data, target_column, hours_in_day, day_of_week)
    
#     trained_ann_model_bl = get_trained_model(saved_models_path, "ANN_BL", feeder_filesave_name)
#     trained_ann_model_cl = get_trained_model(saved_models_path, "ANN_CL", feeder_filesave_name)
#     trained_lstm_model_bl = get_trained_model(saved_models_path, "LSTM_BL", feeder_filesave_name)
#     trained_lstm_model_cl = get_trained_model(saved_models_path, "LSTM_CL", feeder_filesave_name)
    
    
#     y_pred_val_bl = predict_model(trained_ann_model_bl, X_val)
#     y_pred_val_cl = predict_model(trained_ann_model_cl, X_val)
#     y_pred_val_bl_LSTM = predict_model(trained_lstm_model_bl, X_val_LSTM)
#     y_pred_val_cl_LSTM = predict_model(trained_lstm_model_cl, X_val_LSTM)
    
#     y_pred_val_bl_cl = convert_cl_to_bl(y_pred_val_cl, y_val_bl, X_val)
#     y_pred_val_bl_cl_LSTM = convert_cl_to_bl(y_pred_val_cl_LSTM, y_val_bl, X_val)
    
#     print(X_val.shape, X_val_LSTM.shape, y_val_bl.shape, y_val_cl.shape, cur_dates_val.shape, prev_dates_val.shape)
#     print(y_pred_val_bl.shape, y_pred_val_cl.shape, y_pred_val_bl_LSTM.shape, y_pred_val_cl_LSTM.shape)
    
#     y_pred_ann_rls, rls_combiner_ann = adapt_and_predict_rls_combiner(y_val_bl, y_pred_val_bl, y_pred_val_bl_cl, hours_in_day)
#     y_pred_lstm_rls, rls_combiner_lstm = adapt_and_predict_rls_combiner(y_val_bl, y_pred_val_bl_LSTM, y_pred_val_bl_cl_LSTM, hours_in_day)
#     y_pred_final_rls, rls_combiner_final = adapt_and_predict_rls_combiner(y_val_bl, y_pred_ann_rls, y_pred_lstm_rls, hours_in_day)
    
#     final_results = pd.DataFrame([y_val_bl.flatten(), y_pred_ann_rls.flatten(), y_pred_lstm_rls.flatten(), y_pred_final_rls.flatten()]).T
#     final_results.columns = ["Actual", "ANN_RLS", "LSTM_RLS", "Final_RLS"]
#     final_results.index = cur_dates_val.flatten()
    
#     final_results = final_results.apply(lambda x: minmax_inverse_scale(x, train_stats.loc[target_column, 'Min'], train_stats.loc[target_column, 'Max']))
#     final_metrics = get_metrics(final_results)
    
#     px.line(final_results.loc['2024-07-06':]).show()
#     print(final_metrics)


# # arch_hall_feeder_data_path = feeders_root_path + "Arch_Hall_ST2B13.csv"
# # arch_hall_feeder_data = get_raw_data_from_path(arch_hall_feeder_data_path)
# # arch_hall_feeder_data = add_extra_hour_in_beginning(arch_hall_feeder_data)
# # arch_hall_net_load_demand = preprocess_raw_data(arch_hall_feeder_data, "NetLoadDemand")
# # arch_hall_net_load_demand

In [22]:
def get_one_hot_day_of_week(dates_index):
    all_days = np.arange(7) 
    dates_index = pd.to_datetime(dates_index)
    day_of_week = dates_index.dayofweek
    one_hot_encoded = pd.DataFrame(0, index=dates_index, columns=all_days)
        
    for index, day in zip(dates_index, day_of_week):
        print(index, day)
        one_hot_encoded.loc[index, day] = 1
        
    one_hot_encoded.columns=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
    return one_hot_encoded


# test_dates = ["1998-06-20", "2020-06-20", "2005-06-20", "2025-06-20"]
test_dates = ["1998-06-20"]

get_one_hot_day_of_week(test_dates)

1998-06-20 00:00:00 5


Unnamed: 0,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday
1998-06-20,0,0,0,0,0,1,0


In [12]:
pd.DataFrame(0, index=[pd.to_datetime(test_date)], columns=["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"])

Unnamed: 0,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday
2024-06-20,0,0,0,0,0,0,0
