In [1]:
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.metrics import mean_squared_error, mean_absolute_error

# 1. Naive Forecast for Time Series
class NaiveForecast:
    def __init__(self, lag=1):
        """
        Naive Forecast predicts the last observed value

        Parameters:
        - lag: number of time steps to look back (default is 1)
        """
        self.lag = lag
        self.last_value = None

    def fit(self, X, y):
        """
        Store the last value to use for prediction

        Parameters:
        - X: input features (not used in naive forecast)
        - y: target time series
        """
        self.last_value = y[-self.lag]
        return self

    def predict(self, X):
        """
        Predict the last observed value for all inputs

        Parameters:
        - X: input features (number of predictions to make)
        """
        return np.full(len(X), self.last_value)

# 2. Mean/Median Predictor (Scikit-learn style)
class MeanMedianPredictor(BaseEstimator, RegressorMixin):
    def __init__(self, strategy='mean'):
        """
        Predictor that always predicts mean or median

        Parameters:
        - strategy: 'mean' or 'median'
        """
        self.strategy = strategy
        self.prediction_value = None

    def fit(self, X, y):
        """
        Calculate mean or median of training data

        Parameters:
        - X: input features
        - y: target values
        """
        if self.strategy == 'mean':
            self.prediction_value = np.mean(y)
        elif self.strategy == 'median':
            self.prediction_value = np.median(y)
        else:
            raise ValueError("Strategy must be 'mean' or 'median'")
        return self

    def predict(self, X):
        """
        Predict the same value for all inputs

        Parameters:
        - X: input features
        """
        return np.full(len(X), self.prediction_value)

# Example Usage
def demonstrate_baseline_predictors():
    # Simulated time series data
    time_series = [10, 15, 12, 18, 20, 22, 25, 23]

    # 1. Naive Forecast Example
    print("Naive Forecast Example:")
    X_naive = np.arange(len(time_series))  # dummy X for demonstration
    naive_model = NaiveForecast(lag=1)
    naive_model.fit(X_naive[:-1], time_series[:-1])
    naive_predictions = naive_model.predict(X_naive[-1:])
    print("Last observed value:", time_series[-2])
    print("Naive Forecast Prediction:", naive_predictions)

    # 2. Mean/Median Predictor Example
    print("\nMean/Median Predictor Example:")
    # Regression-like scenario
    regression_data = [10, 15, 12, 18, 20, 22, 25, 23]

    # Mean Predictor
    mean_predictor = MeanMedianPredictor(strategy='mean')
    mean_predictor.fit(X_naive, regression_data)
    mean_predictions = mean_predictor.predict(X_naive)
    print("Mean:", np.mean(regression_data))
    print("Mean Predictions:", mean_predictions)

    # Median Predictor
    median_predictor = MeanMedianPredictor(strategy='median')
    median_predictor.fit(X_naive, regression_data)
    median_predictions = median_predictor.predict(X_naive)
    print("Median:", np.median(regression_data))
    print("Median Predictions:", median_predictions)

    # Performance Evaluation
    actual_values = regression_data
    print("\nPerformance Metrics:")
    print("Mean Squared Error (Mean Predictor):",
          mean_squared_error(actual_values, mean_predictions))
    print("Mean Absolute Error (Mean Predictor):",
          mean_absolute_error(actual_values, mean_predictions))
    print("Mean Squared Error (Median Predictor):",
          mean_squared_error(actual_values, median_predictions))
    print("Mean Absolute Error (Median Predictor):",
          mean_absolute_error(actual_values, median_predictions))

# Run the demonstration
demonstrate_baseline_predictors()

Naive Forecast Example:
Last observed value: 25
Naive Forecast Prediction: [25]

Mean/Median Predictor Example:
Mean: 18.125
Mean Predictions: [18.125 18.125 18.125 18.125 18.125 18.125 18.125 18.125]
Median: 19.0
Median Predictions: [19. 19. 19. 19. 19. 19. 19. 19.]

Performance Metrics:
Mean Squared Error (Mean Predictor): 25.359375
Mean Absolute Error (Mean Predictor): 4.375
Mean Squared Error (Median Predictor): 26.125
Mean Absolute Error (Median Predictor): 4.375
