[Reference](https://levelup.gitconnected.com/how-to-mlforecast-your-time-series-data-c583283e0c28)

In [1]:
from mlforecast import MLForecast
from coreforecast.scalers import AutoDifferences
import lightgbm as lgb

# Create an MLForecast instance with AutoDifferences
fcst = MLForecast(
    models=[lgb.LGBMRegressor()],
    freq='D',
    lags=[1, 7, 14],
    target_transforms=[AutoDifferences(max_diffs=2)]
)

# Fit the model and make predictions
fcst.fit(train_data)
predictions = fcst.predict(horizon=30)

In [2]:
# For Quarterly Data:
from mlforecast import MLForecast
from mlforecast.lag_transforms import RollingMean, ExpandingMean

fcst = MLForecast(
    models=[],  # Add your models here
    freq='Q',   # Quarterly frequency
    lags=[1, 4],  # Lag of 1 quarter and 1 year
    lag_transforms={
        1: [ExpandingMean()],
        4: [RollingMean(window_size=4)]  # Rolling mean over the past year
    },
    date_features=['quarter']
)

In [3]:
# For Weekly Data
fcst = MLForecast(
    models=[],  # Add your models here
    freq='W',   # Weekly frequency
    lags=[1, 4, 52],  # Lag of 1 week, 1 month, and 1 year
    lag_transforms={
        1: [ExpandingMean()],
        4: [RollingMean(window_size=4)],  # Rolling mean over the past month
        52: [RollingMean(window_size=52)]  # Rolling mean over the past year
    },
    date_features=['week']
)

In [4]:
# For Daily Data
fcst = MLForecast(
    models=[],  # Add your models here
    freq='D',   # Daily frequency
    lags=[1, 7, 30, 365],  # Lag of 1 day, 1 week, 1 month, and 1 year
    lag_transforms={
        1: [ExpandingMean()],
        7: [RollingMean(window_size=7)],   # Rolling mean over the past week
        30: [RollingMean(window_size=30)], # Rolling mean over the past month
        365: [RollingMean(window_size=365)] # Rolling mean over the past year
    },
    date_features=['dayofweek', 'month']
)

In [5]:
# For Hourly Data
fcst = MLForecast(
    models=[],  # Add your models here
    freq='H',   # Hourly frequency
    lags=[1, 24, 168, 720],  # Lag of 1 hour, 1 day, 1 week, and 1 month
    lag_transforms={
        1: [ExpandingMean()],
        24: [RollingMean(window_size=24)],   # Rolling mean over the past day
        168: [RollingMean(window_size=168)], # Rolling mean over the past week
        720: [RollingMean(window_size=720)]  # Rolling mean over the past month
    },
    date_features=['hour', 'dayofweek']
)

In [6]:
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted

class CustomModel(BaseEstimator, RegressorMixin):
    def __init__(self, param1=1, param2=1):
        self.param1 = param1
        self.param2 = param2

    def fit(self, X, y):
        # Check that X and y have correct shape
        X, y = check_X_y(X, y)
        # Store the classes seen during fit
        self.classes_ = unique_labels(y)

        # Your model fitting logic here

        # Return the classifier
        return self

    def predict(self, X):
        # Check is fit had been called
        check_is_fitted(self)
        # Input validation
        X = check_array(X)

        # Your prediction logic here

        return y_pred

In [7]:
from mlforecast import MLForecast
from your_module import CustomModel

mlf = MLForecast(
    models=[CustomModel()],  # Use your custom model here
    freq='D',  # Frequency of the data - 'D' for daily frequency
    lags=[1, 2, 3],  # Lag features to use
    date_features=['dayofweek', 'month'],  # Date-based features
)

In [8]:
# Fit the model
mlf.fit(df)

# Make predictions
predictions = mlf.predict(horizon=7)

In [11]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from mlforecast import MLForecast
from mlforecast.target_transforms import Differences
from numba import njit
import lightgbm as lgb
import xgboost as xgb
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from statsmodels.tsa.seasonal import seasonal_decompose
from mlforecast import MLForecast
from mlforecast.lag_transforms import (
    RollingMean, RollingStd, RollingMin, RollingMax, RollingQuantile,
    SeasonalRollingMean, SeasonalRollingStd, SeasonalRollingMin,
    SeasonalRollingMax, SeasonalRollingQuantile,
    ExpandingMean
)
from coreforecast.scalers import AutoDifferences

file_path = "USD-INR.csv"
df = pd.read_csv(file_path)
df['Month'] = pd.to_datetime(df['Month'])
df = df.set_index('Month').resample('MS').mean()
df = df.interpolate() #to interpolate and fill missing values
df.reset_index(inplace=True)
print(df.head())

result = seasonal_decompose(df['RUPEES/US$'], model='additive')

fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(10, 12))

result.observed.plot(ax=ax1, color="#69d")
ax1.set_ylabel('Observed')

result.trend.plot(ax=ax2, color='#ff7f0e')
ax2.set_ylabel('Trend')

result.seasonal.plot(ax=ax3, color='#ff7f0e')
ax3.set_ylabel('Seasonal')

result.resid.plot(ax=ax4, color='#ff7f0e')
ax4.set_ylabel('Residual')

plt.tight_layout()
plt.show()

df = pd.DataFrame({'unique_id':[1]*len(df),
        'ds': df["Month"], "y":df['RUPEES/US$']})

#Train-Test Split
train_size = int(len(df) * 0.8)
train, test = df.iloc[:train_size], df.iloc[train_size:]

print(f'Train set size: {len(train)}')
print(f'Test set size: {len(test)}')

models = [LinearRegression(),  # Simple linear regression model
    lgb.LGBMRegressor(verbosity=-1),  # LightGBM regressor with verbosity turned off
    xgb.XGBRegressor(),  # XGBoost regressor with default parameters
    RandomForestRegressor(random_state=0),  # Random Forest regressor with fixed random state for reproducibility
]
fcst = MLForecast(
    models=models,  # List of models to be used for forecasting
    freq='MS',  # Monthly frequency, starting at the beginning of each month
    lags=[1,3,5,7,12],  # Lag features: values from 1, 3, 5, 7, and 12 time steps ago
    lag_transforms={
        1: [  # Transformations applied to lag 1
            RollingMean(window_size=3),  # Rolling mean with a window of 3 time steps
            RollingStd(window_size=3),  # Rolling standard deviation with a window of 3 time steps
            RollingMin(window_size=3),  # Rolling minimum with a window of 3 time steps
            RollingMax(window_size=3),  # Rolling maximum with a window of 3 time steps
            RollingQuantile(p=0.5, window_size=3),  # Rolling median (50th percentile) with a window of 3 time steps
            ExpandingMean()  # Expanding mean (mean of all previous values)
        ],
        6:[  # Transformations applied to lag 6
            RollingMean(window_size=6),  # Rolling mean with a window of 6 time steps
            RollingStd(window_size=6),  # Rolling standard deviation with a window of 6 time steps
            RollingMin(window_size=6),  # Rolling minimum with a window of 6 time steps
            RollingMax(window_size=6),  # Rolling maximum with a window of 6 time steps
            RollingQuantile(p=0.5, window_size=6),  # Rolling median (50th percentile) with a window of 6 time steps
        ],
        12: [  # Transformations applied to lag 12 (likely for yearly seasonality)
            SeasonalRollingMean(season_length=12, window_size=3),  # Seasonal rolling mean with 12-month seasonality and 3-month window
            SeasonalRollingStd(season_length=12, window_size=3),  # Seasonal rolling standard deviation with 12-month seasonality and 3-month window
            SeasonalRollingMin(season_length=12, window_size=3),  # Seasonal rolling minimum with 12-month seasonality and 3-month window
            SeasonalRollingMax(season_length=12, window_size=3),  # Seasonal rolling maximum with 12-month seasonality and 3-month window
            SeasonalRollingQuantile(p=0.5, season_length=12, window_size=3)  # Seasonal rolling median with 12-month seasonality and 3-month window
        ]
    },
    date_features=['year', 'month', 'quarter'],  # Extract year, month, and quarter from the date as features
    target_transforms=[Differences([1])])  # Apply first-order differencing to the target variable

preprocessed_df = fcst.preprocess(train)
print(preprocessed_df)

fcst.fit(train_)
# Fits the MLForecast model to the training data
# This trains all specified models (LinearRegression, LGBMRegressor, XGBRegressor, RandomForestRegressor)
# and prepares the feature engineering pipeline

ml_prediction = fcst.predict(len(test_))
# Generates predictions for a horizon equal to the length of the test set
# Returns a DataFrame with predictions from all models

ml_prediction.rename(columns={'ds': 'Month'}, inplace=True)
# Renames the 'ds' column (default name for date/time column in MLForecast) to 'Month'
# This is done in-place, modifying the original DataFrame

fcst_result = test.copy()
# Creates a copy of the test DataFrame to store the results
# This preserves the original test data while allowing us to add predictions

fcst_result.set_index("Month", inplace=True)
# Sets the 'Month' column as the index of the fcst_result DataFrame
# This is done in-place, modifying the DataFrame

fcst_result["LinearRegression_fcst"]=ml_prediction["LinearRegression"].values
# Adds a new column 'LinearRegression_fcst' to fcst_result
# Populates it with the predictions from the LinearRegression model

fcst_result["LGBM_fcst"]=ml_prediction["LGBMRegressor"].values
# Adds a new column 'LGBM_fcst' to fcst_result
# Populates it with the predictions from the LGBMRegressor model

fcst_result["XGB_fcst"]=ml_prediction["XGBRegressor"].values
# Adds a new column 'XGB_fcst' to fcst_result
# Populates it with the predictions from the XGBRegressor model

fcst_result["RandomForest_fcst"]=ml_prediction["RandomForestRegressor"].values
# Adds a new column 'RandomForest_fcst' to fcst_result
# Populates it with the predictions from the RandomForestRegressor model

fcst_result.head()
# Displays the first five rows of the fcst_result DataFrame
# This allows you to see a preview of the results, including the actual values and predictions from all models

#Defining a function to calculate the error metrics
def calculate_error_metrics(actual_values, predicted_values):
    actual_values = np.array(actual_values)
    predicted_values = np.array(predicted_values)

    metrics_dict = {
        'MAE': np.mean(np.abs(actual_values - predicted_values)),  # Mean Absolute Error
        'RMSE': np.sqrt(np.mean((actual_values - predicted_values)**2)),  # Root Mean Square Error
        'MAPE': np.mean(np.abs((actual_values - predicted_values) / actual_values)) * 100}  # Mean Absolute Percentage Error

    result_df = pd.DataFrame(list(metrics_dict.items()), columns=['Metric', 'Value'])
    return result_df

# Extracting actual values from the result DataFrame
actuals = fcst_result['RUPEES/US$']

# Dictionary to store error metrics for each model
error_metrics_dict = {}

# Calculating error metrics for each model's predictions
for col in fcst_result.columns[1:]:  # Iterating through prediction columns (skipping the first column which is likely the actual values)
    predicted_values = fcst_result[col]
    error_metrics_dict[col] = calculate_error_metrics(actuals, predicted_values)['Value'].values  # Extracting 'Value' column

# Creating a DataFrame from the error metrics dictionary
error_metrics_df = pd.DataFrame(error_metrics_dict).T.reset_index()
error_metrics_df.columns = ['Model', 'MAE', 'RMSE', 'MAPE']  # Renaming columns for clarity
print(error_metrics_df)