In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from pathlib import Path
import joblib
import hashlib
from datetime import datetime
from typing import Tuple, List, Dict, Any, Optional
import os
import glob

# List of model types to evaluate
MODELS = ["linear", "randomforest", "xgboost"]

def get_model(model_name: str):
    """
    Returns a model instance based on the model name.

    Parameters
    ----------
    model_name : str
        Name of the model to instantiate.

    Returns
    -------
    model
        Instance of the requested model.
    """
    if model_name == "linear":
        from sklearn.linear_model import LinearRegression
        return LinearRegression()
    elif model_name == "randomforest":
        from sklearn.ensemble import RandomForestRegressor
        from sklearn.model_selection import GridSearchCV

        param_grid = {
            'n_estimators': [50, 100, 200],
            'max_depth': [None, 10, 20, 30],
            'min_samples_split': [2, 5, 10]
        }

        rf = RandomForestRegressor(random_state=42)
        return GridSearchCV(rf, param_grid, cv=5, scoring='neg_mean_squared_error')
    elif model_name == "xgboost":
        import xgboost as xgb
        from sklearn.model_selection import GridSearchCV

        param_grid = {
            'n_estimators': [50, 100, 200],
            'max_depth': [3, 5, 7],
            'learning_rate': [0.01, 0.1, 0.2]
        }

        xgb_model = xgb.XGBRegressor(random_state=42)
        return GridSearchCV(xgb_model, param_grid, cv=5, scoring='neg_mean_squared_error')
    else:
        raise ValueError(f"Unknown model name: {model_name}")

def evaluate_model(model, model_name: str, X_test: pd.DataFrame, y_test: pd.Series) -> Tuple[float, float, float]:
    """
    Evaluate a model using test data.

    Parameters
    ----------
    model : estimator
        Trained model to evaluate.
    model_name : str
        Name of the model.
    X_test : pd.DataFrame
        Test features.
    y_test : pd.Series
        Test target values.

    Returns
    -------
    tuple
        (MAE, RMSE, R2) metrics.
    """
    predictor = model if model_name == "linear" else model.best_estimator_

    y_pred = predictor.predict(X_test)

    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    return mae, rmse, r2

def _data_hash(arrays) -> str:
    """
    Generate a hash based on input data arrays.

    Parameters
    ----------
    arrays : list
        List of arrays to hash.

    Returns
    -------
    str
        Hexadecimal hash string.
    """
    m = hashlib.sha256()
    for arr in arrays:
        m.update(
            arr.to_numpy().tobytes()
            if isinstance(arr, (pd.Series, pd.DataFrame))
            else arr.tobytes()
        )
    return m.hexdigest()[:12]

def _model_path(model_name: str, data_hash: str, option_type: Optional[str] = None) -> Path:
    """
    Generate a path for saving a model.

    Parameters
    ----------
    model_name : str
        Name of the model.
    data_hash : str
        Hash of the training data.
    option_type : str, optional
        Type of option ('call' or 'put').

    Returns
    -------
    Path
        Path where the model should be saved.
    """
    models_dir = Path("models")
    models_dir.mkdir(exist_ok=True, parents=True)

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    if option_type:
        return models_dir / f"{model_name}_{option_type}_{data_hash}_{timestamp}.joblib"
    else:
        return models_dir / f"{model_name}_{data_hash}_{timestamp}.joblib"

def _save_model(model: Any, path: Path) -> None:
    """
    Save a model to disk.

    Parameters
    ----------
    model : Any
        Model to save.
    path : Path
        Path where to save the model.
    """
    path.parent.mkdir(exist_ok=True, parents=True)
    joblib.dump(model, path, compress=("xz", 3))
    print(f"Saved model to {path}")

def _load_model(path: Path) -> Any:
    """
    Load a model from disk.

    Parameters
    ----------
    path : Path
        Path to the model file.

    Returns
    -------
    Any
        Loaded model.
    """
    if path.exists():
        print(f"Loading model from {path}")
        return joblib.load(path)
    return None

def get_latest_model(model_name: str, option_type: Optional[str] = None) -> Tuple[Any, Optional[Path]]:
    """
    Get the latest model of the specified type.

    Parameters
    ----------
    model_name : str
        Name of the model to retrieve.
    option_type : str, optional
        Type of option ('call' or 'put').

    Returns
    -------
    tuple
        (model, path) or (None, None) if no model is found.
    """
    models_dir = Path("models")
    if not models_dir.exists():
        return None, None

    pattern = f"{model_name}_"
    if option_type:
        pattern += f"{option_type}_"
    pattern += "*.joblib"

    model_files = list(models_dir.glob(pattern))

    if not model_files:
        return None, None

    # Sort by modification time (most recent first)
    model_files.sort(key=os.path.getmtime, reverse=True)
    latest_model_path = model_files[0]

    # Load the latest model
    model = _load_model(latest_model_path)

    return model, latest_model_path

def fit_model(model_