# Evaluating decomposed predictions by Orbit (**O**bject-**OR**iented **B**ayes**I**an **T**ime Series)

- [Orbit: A Python Package for Bayesian Forecasting](https://github.com/uber/orbit)
- [Orbit’s Documentation](https://uber.github.io/orbit/)
- [Quick Start](https://uber.github.io/orbit/tutorials/quick_start.html)
- [Orbit: Probabilistic Forecast with Exponential Smoothing](https://arxiv.org/abs/2004.08492) Paper


### Implemented Models

- ETS (which stands for Error, Trend, and Seasonality) Model
- Methods of Estimations
    - Maximum a Posteriori (MAP)
    - Full Bayesian Estimation
    - Aggregated Posteriors
- Damped Local Trend (DLT)
    - Global Trend Configurations:
        - Linear Global Trend
        - Log-Linear Global Trend
        - Flat Global Trend
        - Logistic Global Trend
    - Damped Local Trend Full Bayesian Estimation (DLTFull)
- Local Global Trend (LGT)
    - Local Global Trend Maximum a Posteriori (LGTMAP)
    - Local Global Trend for full Bayesian prediction (LGTFull)
    - Local Global Trend for aggregated posterior prediction (LGTAggregated)
- Using Pyro for Estimation
    - MAP Fit and Predict
    - VI Fit and Predict
- Kernel-based Time-varying Regression (KTR)
    - Kernel-based Time-varying Regression Lite (KTRLite)

In [None]:
!pip install awswrangler

In [None]:
!pip install orbit-ml --no-input

In [None]:
import awswrangler as wr
import boto3
from sagemaker import get_execution_role
import pandas as pd
import numpy as np

import orbit
from orbit import *
from orbit.models.dlt import ETSFull, ETSMAP, ETSAggregated, DLTMAP, DLTFull, DLTMAP, DLTAggregated
from orbit.models.lgt import LGTMAP, LGTAggregated, LGTFull
from orbit.models.ktrlite import KTRLiteMAP

from orbit.estimators.pyro_estimator import PyroEstimatorVI, PyroEstimatorMAP

In [None]:
import warnings

warnings.simplefilter('ignore')
warnings.filterwarnings('ignore')

## Uploading data

- uploading data for **models**

In [None]:
role = get_execution_role()
bucket='...'
data_key = '...csv' 
data_location = 's3://{}/{}'.format(bucket, data_key)

In [None]:
df = pd.DataFrame(pd.read_csv(data_location))

In [None]:
df = df.rename({'Unnamed: 0': 'Date'}, axis = 1)
df.index = df['Date']

In [None]:
df.shape

In [None]:
df

In [None]:
curve_df = df.drop(['curve'], axis = 0)

## Orbit Models

In [None]:
# ETS (which stands for Error, Trend, and Seasonality)

# Methods of Estimations

# Maximum a Posteriori (MAP)

# The advantage of MAP estimation is a faster computational speed.

def ETSMAP_model(date_col, response_col, train_df, test_df):
    ets = ETSMAP(
        response_col=response_col,
        date_col=date_col,
        seasonality=52,
        seed=8888,
    )
    
    ets.fit(df=train_df)
    predicted_df_MAP = ets.predict(df=test_df)
    
    return predicted_df_MAP['prediction'][:11]

# Full Bayesian Estimation


def ETSFull_model(date_col, response_col, train_df, test_df):
    ets = ETSFull(
        response_col=response_col,
        date_col=date_col,
        seasonality=52,
        seed=8888,
        num_warmup=400,
        num_sample=400,
    )
    
    ets.fit(df=train_df)
    predicted_df_ETSFull = ets.predict(df=test_df)
    
    return predicted_df_ETSFull['prediction'][:11]

# Aggregated Posteriors

def ETSAggregated_model(date_col, response_col, train_df, test_df):
    ets = ETSAggregated(
        response_col=response_col,
        date_col=date_col,
        seasonality=52,
        seed=8888,
    )
    ets.fit(df=train_df)
    predicted_df_ETSAggregated = ets.predict(df=test_df)
    
    return predicted_df_ETSAggregated['prediction'][:11]


# Damped Local Trend (DLT)

# Global Trend Configurations

# Linear Global Trend

# linear global trend
def DLTMAP_lin(date_col, response_col, train_df, test_df):
    dlt = DLTMAP(
        response_col=response_col,
        date_col=date_col,
        seasonality=52,
        seed=8888,
    )

    dlt.fit(train_df)
    predicted_df_DLTMAP_lin = dlt.predict(test_df)
    
    return predicted_df_DLTMAP_lin['prediction'][:11]


# log-linear global trend
def DLTMAP_log_lin(date_col, response_col, train_df, test_df):
    dlt = DLTMAP(
        response_col=response_col,
        date_col=date_col,
        seasonality=52,
        seed=8888,
        global_trend_option='loglinear'
    )

    dlt.fit(train_df)
    predicted_df_DLTMAP_log_lin = dlt.predict(test_df)
    
    return predicted_df_DLTMAP_log_lin['prediction'][:11]


# log-linear global trend
def DLTMAP_flat(date_col, response_col, train_df, test_df):
    dlt = DLTMAP(
        response_col=response_col,
        date_col=date_col,
        seasonality=52,
        seed=8888,
        global_trend_option='flat'
    )

    dlt.fit(train_df)
    predicted_df_DLTMAP_flat = dlt.predict(test_df)
    
    return predicted_df_DLTMAP_flat['prediction'][:11]


# logistic global trend
def DLTMAP_logistic(date_col, response_col, train_df, test_df):
    dlt = DLTMAP(
        response_col=response_col,
        date_col=date_col,
        seasonality=52,
        seed=8888,
        global_trend_option='logistic'
    )

    dlt.fit(train_df)
    predicted_df_DLTMAP_logistic = dlt.predict(test_df)
    
    return predicted_df_DLTMAP_logistic['prediction'][:11]


# Damped Local Trend Full Bayesian Estimation (DLTFull)

def DLTFull_model(date_col, response_col, train_df, test_df):
    dlt = DLTFull(
        response_col=response_col,
        date_col=date_col,
        num_warmup=400,
        num_sample=400,
        seasonality=52,
        seed=8888
    )
    
    dlt.fit(df=train_df)
    predicted_df_DLTFull = dlt.predict(df=test_df)

    return predicted_df_DLTFull['prediction'][:11]


# Damped Local Trend Full (DLTAggregated)

def DLTAggregated_model(date_col, response_col, train_df, test_df):
    ets = DLTAggregated(
        response_col=response_col,
        date_col=date_col,
        seasonality=52,
        seed=8888,
    )
    
    ets.fit(df=train_df)
    predicted_df_DLTAggregated = ets.predict(df=test_df)
    
    return predicted_df_DLTAggregated['prediction'][:11]


# Local Global Trend (LGT) Model

# Local Global Trend Maximum a Posteriori (LGTMAP)

def LGTMAP_model(date_col, response_col, train_df, test_df):
    lgt = LGTMAP(
        response_col=response_col,
        date_col=date_col,
        seasonality=52,
        seed=8888,
    )

    lgt.fit(df=train_df)
    predicted_df_LGTMAP = lgt.predict(df=test_df)
    
    return predicted_df_LGTMAP['prediction'][:11]

# LGTFull

def LGTFull_model(date_col, response_col, train_df, test_df):
    lgt = LGTFull(
        response_col=response_col,
        date_col=date_col,
        seasonality=52,
        seed=8888,
    )

    lgt.fit(df=train_df)
    predicted_df_LGTFull = lgt.predict(df=test_df)
    
    return predicted_df_LGTFull['prediction'][:11]

# LGTAggregated

def LGTAggregated_model(date_col, response_col, train_df, test_df):
    lgt = LGTAggregated(
        response_col=response_col,
        date_col=date_col,
        seasonality=52,
        seed=8888,
    )

    lgt.fit(df=train_df)
    predicted_df_LGTAggregated = lgt.predict(df=test_df)
    
    return predicted_df_LGTAggregated['prediction'][:11]

# Using Pyro for Estimation

# MAP Fit and Predict

def LGTMAP_PyroEstimatorMAP(date_col, response_col, train_df, test_df):
    lgt_map = LGTMAP(
        response_col=response_col,
        date_col=date_col,
        seasonality=52,
        seed=8888,
        estimator_type=PyroEstimatorMAP,
    )

    lgt_map.fit(df=train_df)
    predicted_df_LGTMAP_pyro = lgt_map.predict(df=test_df)
    
    return predicted_df_LGTMAP_pyro['prediction'][:11]

# VI Fit and Predict

def LGTFull_pyro(date_col, response_col, train_df, test_df):
    lgt_vi = LGTFull(
        response_col=response_col,
        date_col=date_col,
        seasonality=52,
        seed=8888,
        num_steps=101,
        num_sample=100,
        learning_rate=0.1,
        n_bootstrap_draws=-1,
        estimator_type=PyroEstimatorVI,
    )

    lgt_vi.fit(df=train_df)

    predicted_df_LGTFull_pyro = lgt_vi.predict(df=test_df)
    
    return predicted_df_LGTFull_pyro['prediction'][:11]


# Kernel-based Time-varying Regression (KTR)

# KTRLite

def ktrlite_MAP(date_col, response_col, train_df, test_df):
    ktrlite = KTRLiteMAP(
        response_col=response_col,
        #response_col=np.log(df[response_col]),
        date_col=date_col,
        level_knot_scale=.1,
        span_level=.05,
    )
    
    ktrlite.fit(train_df)
    
    predicted_df_ktrlite_MAP = ktrlite.predict(df=test_df, decompose=True)
    
    return predicted_df_ktrlite_MAP['prediction'][:11]

## Root-Mean-Square Deviation (RMSD) or Root-Mean-Square Error (RMSE)

In [None]:
def rmse(actual, pred): 
    actual, pred = np.array(actual), np.array(pred)
    return np.sqrt(np.square(np.subtract(actual,pred)).mean())

In [None]:
def evaluating_models(index, column):
    
    '''
    Parameters:
        index: column index
        column: column name
    
    Returns:
        models_df: new dataframe with 
    '''
    
    tmp_df['Date'] = pd.to_datetime(curve_df['Date'].astype(str))
    tmp_df['Penetration'] = curve_df[column].astype(float)
    
    date_col = 'Date'
    response_col = 'Penetration'
    

    # Decompose Prediction

    train_df = tmp_df[tmp_df['Date'] < '2022-01-01']
    test_df = tmp_df[tmp_df['Date'] <= '2025-01-01']
    
    models_df.at[index ,'Item Name'] = column

    
    # Making predictions with each model
    try:
        models_df.at[index , 'ETSMAP'] = rmse(
            tmp_df['Penetration'][:11], 
            (ETSMAP_model(date_col, response_col, train_df, test_df))).astype(float)
    except:
        models_df.at[index , 'ETSMAP'] = 100
    try:    
        models_df.at[index , 'ETSFull'] = rmse(
            tmp_df['Penetration'][:11], 
            (ETSFull_model(date_col, response_col, train_df, test_df))).astype(float)
    except:
        models_df.at[index , 'ETSFull'] = 100
    try:
        models_df.at[index , 'ETSAggregated'] = rmse(
            tmp_df['Penetration'][:11], 
            (ETSAggregated_model(date_col, response_col, train_df, test_df))).astype(float)
    except:
        models_df.at[index , 'ETSAggregated'] = 100

    
    try:
        models_df.at[index , 'DLTMAP_lin'] = rmse(
            tmp_df['Penetration'][:11], 
            (DLTMAP_lin(date_col, response_col, train_df, test_df))).astype(float)
    except:
        models_df.at[index , 'DLTMAP_lin'] = 100
    try:
        models_df.at[index , 'DLTMAP_log_lin'] = rmse(
            tmp_df['Penetration'][:11], 
            (DLTMAP_log_lin(date_col, response_col, train_df, test_df))).astype(float)
    except:
        models_df.at[index , 'DLTMAP_log_lin'] = 100
    try:
        models_df.at[index , 'DLTMAP_flat'] = rmse(
            tmp_df['Penetration'][:11], 
            (DLTMAP_flat(date_col, response_col, train_df, test_df))).astype(float)
    except:
        models_df.at[index , 'DLTMAP_flat'] = 100
    try:
        models_df.at[index , 'DLTMAP_logistic'] = rmse(
            tmp_df['Penetration'][:11], 
            (DLTMAP_logistic(date_col, response_col, train_df, test_df))).astype(float)
    except:
        models_df.at[index , 'DLTMAP_logistic'] = 100
    try:    
        models_df.at[index , 'DLTFull'] = rmse(
            tmp_df['Penetration'][:11], 
            (DLTFull_model(date_col, response_col, train_df, test_df))).astype(float)
    except:
        models_df.at[index , 'DLTFull'] = 100
    try:
        models_df.at[index , 'DLTAggregated'] = rmse(
            tmp_df['Penetration'][:11], 
            (DLTAggregated_model(date_col, response_col, train_df, test_df))).astype(float)
    except:  
        models_df.at[index , 'DLTAggregated'] = 100
    
    
    try:
        models_df.at[index , 'LGTMAP'] = rmse(
            tmp_df['Penetration'][:11], 
            (LGTMAP_model(date_col, response_col, train_df, test_df))).astype(float)
    except:
        models_df.at[index , 'LGTMAP'] = 100
    try:
        models_df.at[index , 'LGTFull'] = rmse(
            tmp_df['Penetration'][:11], 
            (LGTFull_model(date_col, response_col, train_df, test_df))).astype(float)
    except: 
        models_df.at[index , 'LGTFull'] = 100
    try: 
        models_df.at[index , 'LGTAggregated'] = rmse(
            tmp_df['Penetration'][:11], 
            (LGTAggregated_model(date_col, response_col, train_df, test_df))).astype(float)
    except:
        models_df.at[index , 'LGTAggregated'] = 100

    
    # Using Pyro for Estimation
    try:
        models_df.at[index , 'LGTMAP_PyroEstimatorMAP'] = rmse(
            tmp_df['Penetration'][:11], (LGTMAP_PyroEstimatorMAP(date_col, 
                                                    response_col, train_df, test_df))).astype(float)
    except:
        models_df.at[index , 'LGTMAP_PyroEstimatorMAP'] = 100
    try:
        models_df.at[index , 'LGTFull_pyro4'] = rmse(
            tmp_df['Penetration'][:11], 
            (LGTFull_pyro(date_col, response_col, train_df, test_df))).astype(float)
    except:
         models_df.at[index , 'LGTFull_pyro4'] = 100
        
    # Kernel-based Time-varying Regression (KTR)
    try:
        models_df.at[index , 'KTR_Lite_MAP'] = rmse(
            tmp_df['Penetration'][:11], 
            (ktrlite_MAP(date_col, response_col, train_df, test_df))).astype(float)
    except:
        models_df.at[index , 'KTR_Lite_MAP'] = 100
    
    
    models_df.at[index, 'Curve Type'] = df[column].iloc[-1]
        
        
    return models_df

## Calculating minimal RMSE value for each item

In [None]:
def min_value(df):
    
    '''
    Parameters:
        df: input dataframe with multiple columns and values in a row
    
    Returns:
        models_df: existing dataframe with added the new 'Model' column filled with 
        the name of the best-fitted model for each item
    '''
        
    df.iloc[:, 1:-1].apply(pd.to_numeric)
    df['Model'] = df.iloc[:, 1:-1].idxmin(axis=1)
    
    return models_df

## Evaluating Orbit models for each item

In [None]:
import time


tmp_df = pd.DataFrame()
models_df = pd.DataFrame()

start = time.time()

for index, column in enumerate(curve_df.columns[1:2]):
    evaluating_models(index, column)
    
end = time.time()
print(end - start)

In [None]:
models_df

In [None]:
min_value(models_df)