# Model Evaluation

## Imoprt libraries

In [46]:
import numpy as np
import pandas as pd
from dataclasses import dataclass
from enum import Enum
from typing import List
import os

## Settings

In [47]:
RESULT_PARENT_DIRECTORY_PATH = os.path.join("Result")
TEST_DATA_FRAME_PATH = os.path.join("test_data_frame.csv")
REVENUE_TEST_DATA_FRAME_PATH = os.path.join(
    os.pardir, "datasets", "revenue_test_data_by_date_store.csv"
)

## Enums

In [48]:
class Metric(Enum):
    RMSSE = 0  # Root Mean Squared Scaled Error

## Classes

In [49]:
@dataclass
class Data_Frame:
    name: str
    data_frame: pd.DataFrame


@dataclass
class MetricResult:
    data_frame_name: str
    metric: Metric
    point: float


@dataclass
class Result:
    author: str
    data_frame_list: List[Data_Frame]
    metric_result_list: List[MetricResult]

## Helper Functions

### Create test data frame

In [50]:
def create_test_data_frame() -> pd.DataFrame:
    if os.path.isfile(TEST_DATA_FRAME_PATH):
        print(f"Test data frame already exists at {TEST_DATA_FRAME_PATH}")
        test_df = pd.read_csv(TEST_DATA_FRAME_PATH)
        return test_df.copy()

    if not os.path.isfile(REVENUE_TEST_DATA_FRAME_PATH):
        raise FileNotFoundError(
            f"Revenue test data frame not found at {REVENUE_TEST_DATA_FRAME_PATH}."
        )

    revenue_test_df = pd.read_csv(REVENUE_TEST_DATA_FRAME_PATH)

    pivoted_revenue_test_df = revenue_test_df.pivot(
        index=["date", "d"], columns="store_id", values="revenue"
    ).reset_index()

    pivoted_revenue_test_df.insert(0, "id", [(i + 1) for i in range(1941)])

    pivoted_revenue_test_df.to_csv(TEST_DATA_FRAME_PATH, index=False)

    return pivoted_revenue_test_df.copy()

### Read results

In [51]:
def read_result(parent_directory: str) -> List[Result]:
    # Check if the given path exists and is a directory
    if not os.path.isdir(parent_directory):
        raise NotADirectoryError(f"Directory '{parent_directory}' is not found.")

    # Get a list of all subdirectories within the parent directory
    member_subdirectories = [
        d
        for d in os.listdir(parent_directory)
        if os.path.isdir(os.path.join(parent_directory, d))
    ]

    # Check if there are any subdirectories; if not, raise an error
    if not member_subdirectories:
        raise NotADirectoryError("Author subdirectories are not found.")

    result_list: List[Result] = []

    for member in member_subdirectories:
        result = Result(author=member, data_frame_list=[], metric_result_list=[])

        member_path = os.path.join(parent_directory, member)

        csv_files = [
            f
            for f in os.listdir(member_path)
            if f.endswith(".csv") and os.path.isfile(os.path.join(member_path, f))
        ]

        for csv_file in csv_files:
            data_frame = Data_Frame(name=csv_file, data_frame=pd.read_csv(csv_file))
            result.data_frame_list.append(data_frame)

        result_list.append(result)

    return result_list

## Main Flow

### Create test data frame

In [52]:
test_df = create_test_data_frame()

In [53]:
# test_df

### Read results

In [54]:
result_list = read_result(RESULT_PARENT_DIRECTORY_PATH)

In [55]:
if not result_list:
    raise FileNotFoundError("No data frame found.")

### Evaluate models

#### Metric: Root Mean Squared Scaled Error

In [56]:
def rmsse(train: np.ndarray, actual: np.ndarray, forecast: np.ndarray) -> float:
    """
    Compute Root Mean Squared Scaled Error (RMSSE) based on the correct formula.

    Parameters:
        actual (np.ndarray): Array of actual values for the test period
        forecast (np.ndarray): Array of forecasted values
        train (np.ndarray): Array of historical (training) data

    Returns:
        float: RMSSE value
    """
    # Compute scale denominator
    denominator = np.mean(np.square(np.diff(train)))

    if denominator == 0:
        return np.nan  # Avoid division by zero

    # Compute numerator
    numerator = np.mean(np.square(actual - forecast))

    return np.sqrt(numerator / denominator)