In [2]:
import pandas as pd
import numpy as np
import os

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [6]:
import numpy as np
import pandas as pd

def max_error(y_ref, y_test):
    """
    Calculate the maximum error between two arrays.

    Parameters:
        y_ref (array-like): Reference array.
        y_test (array-like): Test array.

    Returns:
        float: Maximum error between the two arrays.
    """
    y_test = np.array(y_test)
    y_ref = np.array(y_ref)
    y_mean = np.mean(y_ref)
    return np.max(np.divide(np.abs(y_test-y_ref), y_mean))


def median_error(y_ref, y_test):
    """
    Calculate the median error between two arrays.

    Parameters:
        y_ref (array-like): Reference array.
        y_test (array-like): Test array.

    Returns:
        float: Median error between the two arrays.
    """
    y_test = np.array(y_test)
    y_ref = np.array(y_ref)
    y_mean = np.mean(y_ref)
    return np.median(np.divide(np.abs(y_test-y_ref), y_mean))


def error(y_ref, y_test):
    """
    Calculate the error between two arrays.

    Parameters:
        y_ref (array-like): Reference array.
        y_test (array-like): Test array.

    Returns:
        array: Error between the two arrays.
    """
    y_test = np.array(y_test)
    y_ref = np.array(y_ref)
    y_mean = np.mean(y_ref)
    return np.divide(np.abs(y_test-y_ref), y_mean)


def MBE(y_ref, y_test):
    """
    Calculate the Mean Bias Error (MBE) between two arrays.

    Parameters:
        y_ref (array-like): Reference array.
        y_test (array-like): Test array.

    Returns:
        float: Mean Bias Error (MBE).
    """
    mbe = np.sum(y_test-y_ref)/len(y_ref)
    return mbe


def invTransform(scaler, data, colName, colNames):
    """
    Inverse transform a column in a DataFrame using a scaler.

    Parameters:
        scaler (object): Scaler object with inverse_transform method.
        data (array-like): Data to transform.
        colName (str): Name of the column to transform.
        colNames (list): List of column names in the DataFrame.

    Returns:
        array: Inverse transformed column data.
    """
    dummy = pd.DataFrame(np.zeros((len(data), len(colNames))), columns=colNames)
    dummy[colName] = data
    dummy = pd.DataFrame(scaler.inverse_transform(dummy), columns=colNames)
    return dummy[colName].values


def mean_absolute_percentage_error(y_true, y_pred):
    """
    Calculate the Mean Absolute Percentage Error (MAPE) between two arrays.

    Parameters:
        y_true (array-like): True values.
        y_pred (array-like): Predicted values.

    Returns:
        float: Mean Absolute Percentage Error (MAPE).
    """
    y_true = np.array(y_true)
    y_pred = np.array(y_pred)
    # Check for zero division (when y_true is zero, MAPE is undefined)
    mask = y_true != 0
    y_true_nonzero = y_true[mask]
    y_pred_nonzero = y_pred[mask]
    # Calculate the percentage error for each data point
    percentage_errors = np.abs((y_true_nonzero - y_pred_nonzero) / y_true_nonzero)
    # Calculate the mean absolute percentage error
    mape = np.mean(percentage_errors) * 100.0

    return mape


In [11]:
# Define directories and constants
data_dir = './GHI_dataset/cleaned_sampled/'
reportPersistence_dir = './reports/persistenceModel/'
samplingFrequencies_eng = ["15_minutes", "30_minutes", "45_minutes", "1_hour", "2_hours",
                            "4_hours", "6_hours", "12_hours", "24_hours", "48_hours",
                            "72_hours", "4_days", "5_days", "6_days", "7_days"]

# Making folder for reports if it doesn't exist
if not os.path.exists(reportPersistence_dir):
    os.makedirs(reportPersistence_dir)

# Initialize DataFrame to store results
persistenceResult_df = pd.DataFrame(columns=['samplingFrequency', 'accuracy', 'MAPE', 'RMSE',
                                             'nRMSE', 'MAE', 'nMAE', 'R2', 'MBE', 'maxNormError'])

# Loop through different sampling frequencies
for k, samplingFrequency in enumerate(samplingFrequencies_eng):

    # Importing the data
    file_path = os.path.join(data_dir, f'GHI_sampled_{samplingFrequency}.csv')
    if os.path.exists(file_path):
        data_df = pd.read_csv(file_path, index_col=0)

        # Split the data into train, validation, and test sets
        y = data_df['GHI'].values
        y_train, y_test = train_test_split(y, test_size=0.2, shuffle=False)
        y_validation, y_test = train_test_split(y_test, test_size=0.5, shuffle=False)

        # Testing the same 10% of the dataset we're testing the LSTM on
        pred = np.roll(y_test, shift=1)[1:]
        test = y_test[1:]

        # Persistence Performance evaluation
        persistenceResult_df.loc[k, 'samplingFrequency'] = samplingFrequency
        persistenceResult_df.loc[k, 'accuracy'] = np.mean(np.abs(pred - test))  # Accuracy
        persistenceResult_df.loc[k, 'MAPE'] = mean_absolute_percentage_error(test, pred)  # MAPE
        persistenceResult_df.loc[k, 'RMSE'] = np.sqrt(mean_squared_error(test, pred))  # RMSE
        persistenceResult_df.loc[k, 'nRMSE'] = np.sqrt(mean_squared_error(test, pred)) / np.mean(test)  # nRMSE
        persistenceResult_df.loc[k, 'MAE'] = mean_absolute_error(test, pred)  # MAE
        persistenceResult_df.loc[k, 'nMAE'] = mean_absolute_error(test, pred) / np.mean(test)  # nMAE
        persistenceResult_df.loc[k, 'R2'] = r2_score(test, pred)  # R2
        persistenceResult_df.loc[k, 'MBE'] = MBE(test, pred)  # MBE
        persistenceResult_df.loc[k, 'maxNormError'] = max_error(test, pred)  # Maximum normalized error

    else:
        print(f"File not found: {file_path}")

csv_file_path = os.path.join(reportPersistence_dir, 'persistence_results.csv')
persistenceResult_df.to_csv(csv_file_path, index=False)

print(f"persistenceResult_df exported to {csv_file_path}")

TypeError: join() got an unexpected keyword argument 'index'

Unnamed: 0,samplingFrequency,accuracy,MAPE,RMSE,nRMSE,MAE,nMAE,R2,MBE,maxNormError
0,15_minutes,50.037244,23.114405,79.443056,0.215623,50.037244,0.13581,0.906006,0.004138,1.594096
1,30_minutes,66.647429,34.460131,93.065597,0.257494,66.647429,0.184401,0.868706,0.046301,1.722154
2,45_minutes,83.055855,43.177417,108.982196,0.303121,83.055855,0.23101,0.816728,0.058751,1.468474
3,1_hour,98.329583,54.814131,124.481801,0.356445,98.329583,0.28156,0.759804,0.038327,1.913365
4,2_hours,154.170072,90.499979,186.999271,0.565926,154.170072,0.466574,0.427942,0.012591,1.807656
5,4_hours,208.921731,114.745667,265.923367,0.824139,208.921731,0.647481,-0.401044,-0.299912,2.114744
6,6_hours,202.746162,231.334551,267.326001,0.982484,202.746162,0.745138,-0.766104,-0.295447,2.201131
7,12_hours,89.253696,36.716694,125.85947,0.347621,89.253696,0.246517,0.231103,0.671947,1.346809
8,24_hours,105.453443,46.019015,146.721308,0.404108,105.453443,0.290445,-0.155207,1.674083,1.200097
9,48_hours,89.755542,29.523625,114.719648,0.314817,89.755542,0.24631,-0.0977,2.660777,0.79365
