In [None]:
#| default_exp validation

In [None]:
#| export
import numpy as np

from utilsforecast.compat import DataFrame

In [None]:
from utilsforecast.data import generate_series

In [None]:
#| export
def validate_format(
    df: DataFrame,
    id_col: str = 'unique_id',
    time_col: str = 'ds',
    target_col: str = 'y',
) -> None:
    # required columns
    missing_cols = sorted({id_col, time_col, target_col} - set(df.columns))
    if missing_cols:
        raise ValueError(f"The following columns are missing: {missing_cols}")

    # time col
    times_dtype = df[time_col].head(1).to_numpy().dtype
    if not (np.issubdtype(times_dtype, np.datetime64) or np.issubdtype(times_dtype, np.integer)):
        raise ValueError(f"The time column ('{time_col}') should have either datetimes or integers, got '{times_dtype}'.")

    # target col
    target_dtype = df[target_col].head(1).to_numpy().dtype
    if not np.issubdtype(target_dtype, np.number):
        raise ValueError(f"The target column ('{target_col}') should have a numeric data type, got '{target_dtype}')")

In [None]:
import datetime

import pandas as pd
import polars as pl

from fastcore.test import test_fail

In [None]:
for constructor in (pd.DataFrame, pl.DataFrame):
    df = constructor({'unique_id': [1]})
    test_fail(lambda: validate_format(df), contains="missing: ['ds', 'y']")
    df = constructor({'unique_id': [1], 'time': ['x'], 'y': [1]})
    test_fail(lambda: validate_format(df, time_col='time'), contains="('time') should have either datetimes or integers")
    for time in [1, datetime.datetime(2000, 1, 1)]:
        df = constructor({'unique_id': [1], 'ds': [time], 'sales': ['x']})
        test_fail(lambda: validate_format(df, target_col='sales'), contains="('sales') should have a numeric data type")