In [1]:
"""
Data cleaning and preprocessing functions for soybean meal and oil datasets.
"""

import pandas as pd


def load_and_filter_excel(path, current_date=None):
    """
    Load Excel data and filter out future dates.

    Args:
        path (str): Path to Excel file.
        current_date (pd.Timestamp, optional): Current date for filtering. 
                                               Defaults to pd.Timestamp.now().

    Returns:
        pd.DataFrame: Cleaned DataFrame with Date and numeric columns.
    """
    if current_date is None:
        current_date = pd.Timestamp.now()

    df = pd.read_excel(path)
    df = df[df["Date"] <= current_date].copy()

    df["Date"] = pd.to_datetime(df["Date"])
    df.sort_values(by="Date", ascending=True, inplace=True)
    df.reset_index(drop=True, inplace=True)

    numeric_cols = df.columns.drop("Date")
    df[numeric_cols] = df[numeric_cols].astype("float64")

    return df


def extract_last_n_years(df, years=2):
    """
    Extract the last n years of data.

    Args:
        df (pd.DataFrame): DataFrame with a 'Date' column.
        years (int): Number of years to keep (default=2).

    Returns:
        pd.DataFrame: Subset of data from the last n years.
    """
    return df[df["Date"] >= (df["Date"].max() - pd.DateOffset(years=years))]


def save_processed_data(df, path):
    """
    Save DataFrame to CSV.

    Args:
        df (pd.DataFrame): Cleaned DataFrame.
        path (str): Output file path (.csv).
    """
    df.to_csv(path, index=False)
