# Model Evaluation

## Imoprt libraries

In [190]:
import pandas as pd
from dataclasses import dataclass
from enum import Enum
from typing import List
import os

## Settings

In [191]:
RESULT_PARENT_DIRECTORY_PATH = os.path.join("Result")
TEST_DATA_FRAME_PATH = os.path.join("test_data_frame.csv")
REVENUE_TEST_DATA_FRAME_PATH = os.path.join(
    os.pardir, "datasets", "revenue_test_data_by_date_store.csv"
)

## Enums

In [192]:
class Metric(Enum):
    RMSSE = 0  # Root Mean Squared Scaled Error

## Classes

In [193]:
@dataclass
class Evaluation:
    name: str
    data_frame: pd.DataFrame


@dataclass
class MetricResult:
    data_frame_name: str
    metric: Metric
    point: float


@dataclass
class Result:
    author: str
    evaluation_list: List[Evaluation]
    metric_result_list: List[MetricResult]

## Helper Functions

### Create test data frame

In [194]:
def create_test_data_frame() -> pd.DataFrame:
    if os.path.isfile(TEST_DATA_FRAME_PATH):
        print(f"Test data frame already exists at {TEST_DATA_FRAME_PATH}")
        test_df = pd.read_csv(TEST_DATA_FRAME_PATH)
        return test_df.copy()

    if not os.path.isfile(REVENUE_TEST_DATA_FRAME_PATH):
        raise FileNotFoundError(
            f"Revenue test data frame not found at {REVENUE_TEST_DATA_FRAME_PATH}."
        )

    revenue_test_df = pd.read_csv(REVENUE_TEST_DATA_FRAME_PATH)

    pivoted_revenue_test_df = revenue_test_df.pivot(
        index=["date", "d"], columns="store_id", values="revenue"
    ).reset_index()

    pivoted_revenue_test_df["d_numeric"] = (
        pivoted_revenue_test_df["d"].str.extract(r"d_(\d+)").astype(int)
    )

    trimmed_pivoted_test_df = (
        pivoted_revenue_test_df[pivoted_revenue_test_df["d_numeric"] >= 1914]
        .drop(columns=["d_numeric"])
        .reset_index(drop=True)
    )

    trimmed_pivoted_test_df.to_csv(TEST_DATA_FRAME_PATH, index=False)

    return trimmed_pivoted_test_df.copy()

### Read results

In [195]:
def read_result(parent_directory: str):
    # Check if the given path exists and is a directory
    if not os.path.isdir(parent_directory):
        raise NotADirectoryError(f"Directory '{parent_directory}' is not found.")

    # Get a list of all subdirectories within the parent directory
    subdirectories = [
        d
        for d in os.listdir(parent_directory)
        if os.path.isdir(os.path.join(parent_directory, d))
    ]

    # Check if there are any subdirectories; if not, raise an error
    if not subdirectories:
        raise NotADirectoryError("Author subdirectories are not found.")

## Main Flow

### Create test data frame

In [196]:
test_df = create_test_data_frame()

In [197]:
test_df

store_id,date,d,CA_1,CA_2,CA_3,CA_4,TX_1,TX_2,TX_3,WI_1,WI_2,WI_3
0,2016-04-25,d_1914,13887.08,12858.75,21662.51,9353.61,9354.89,12968.33,12579.32,9498.48,11950.82,9248.18
1,2016-04-26,d_1915,11996.21,11855.37,17924.1,8345.31,8934.54,11595.4,11658.45,9446.94,11942.89,8950.87
2,2016-04-27,d_1916,12037.84,11445.13,16858.21,8180.92,8942.52,10495.51,11325.3,9484.43,10862.16,8152.41
3,2016-04-28,d_1917,11842.57,12615.2,16662.98,7523.25,7901.84,11490.03,12127.21,9651.43,12624.97,8073.34
4,2016-04-29,d_1918,15531.11,13850.23,19288.78,8411.11,11775.6,12775.54,14664.75,12292.07,16617.89,10366.75
5,2016-04-30,d_1919,18653.94,20725.21,24333.26,9719.27,12424.21,15266.48,14068.86,16026.76,15435.87,12722.83
6,2016-05-01,d_1920,20957.14,20896.89,26670.55,11039.87,13884.16,16405.93,16612.23,15987.16,15403.56,13468.86
7,2016-05-02,d_1921,15497.53,14959.28,22582.73,10336.66,9503.97,12288.21,13812.1,10656.36,16007.25,12123.56
8,2016-05-03,d_1922,14949.03,13332.37,20335.42,9478.27,10288.27,13094.97,15545.13,10604.54,18652.08,12606.96
9,2016-05-04,d_1923,13300.04,13149.59,20043.67,8395.94,9370.0,11202.27,11748.55,10455.68,14654.52,10565.6


### Read results

In [198]:
read_result(RESULT_PARENT_DIRECTORY_PATH)

### Evaluate models

#### Metric: Root Mean Squared Scaled Error