<span style="font-size: 14pt">YEREVAN, May-June 2017</span>

<span style="font-size: 16pt"> HW #5: Compositions of time series forecasting algorithms

<span style="color:blue; font-size: 12pt">Alexey Romanenko </span>,
<span style="color:blue; font-size: 12pt; font-family: 'Verdana'">alexromsput@gmail.com</span>


<h1 align="center"> Home Work 5 </h1>
** Realize following composions for adaptive algorithms (ESM): Adaptive Selection and Adaptive Composition.**

Start with the code below

In [None]:
import pandas as pd
import numpy as np
import math
from datetime import datetime, timedelta
from utils import qualityRMSE, qualityMedianAE, qualityMACAPE
from utils import ExponentialSmoothing as SimpleExponentialSmoothing, AdaptiveExponentialSmoothing #, TheilWageExponentialSmoothing
%matplotlib inline


In [None]:
BaseAlgs = [{'BaseAlg':'SimpleExponentialSmoothing', 'BaseAlgParams':{'alpha':0.1, 'AdaptationPeriod':10}},
            {'BaseAlg':'AdaptiveExponentialSmoothing', 'BaseAlgParams':{'alpha':0.2,'gamma':0.01, 'AdaptationPeriod':10}},
                  {'BaseAlg':'AdaptiveExponentialSmoothing', 'BaseAlgParams':{'alpha':0.1, 'gamma':0.01,  'AdaptationPeriod':10}},
                 #{'BaseAlg':'TheilWageSmoothing', 'BaseAlgParams':{'alpha':0.3, 'beta':0.9,'delta':0.9,  'AdaptationPeriod':10}}
]

In [None]:
def AdaptiveSelection(x, h, Params):
    '''
    Parameters
    x <array> - time series
    h <integer scalar>- forecasting delay
    Params <dict> - dictionary with 
        gamma <scalar in [0,1]> - smoothing parameter of error
        eps <scalar> - bound for best indistinctive models
        BaseAlgs - array of <dict> with params
            BaseAlg <string> - name of base algorithm
            BaseAlfParams <dict> dictionary of base algorithm's params
    '''
    x = np.nan_to_num(x)
    T = len(x)
    FORECAST = [np.NaN]*(T+h)
    # Base Algs Forecasts     
    BaseAlgs = Params['BaseAlgsParams']
    N = len(BaseAlgs)
    FORECAST_BA = np.array([np.NAN]*(T+h)*N).reshape(N, T+h)
    
    #forecast TS by each base algs
    #be careful with eval() function    
    for ba in range(len(BaseAlgs)):
        FORECAST_BA[ba]= eval(BaseAlgs[ba]['BaseAlg']+"(x,h,"+"BaseAlgs["+str(ba)+"]['BaseAlgParams'])")
        
    #internal params of composition
    gamma = Params['gamma']
    eps = Params['eps']
    
    if gamma>1:
        w.warn('Gamma can not be more than 1')
        #alpha = 1
        return FORECAST
    if gamma<0:
        w.warn('Gamma can not be less than 0')
        #alpha = 0
        return FORECAST

    
    e1= [0]*N  # initialization of errors of base algorithms 
    j_best = [0]*N
    y = 0
    
    for t in range(0, T):
        if not math.isnan(x[t]):
            
            if t>= h:
                for index,key in enumerate(FORECAST_BA):
                    e1[index] = gamma*np.abs(x[t]-key.transpose()[t])+(1-gamma)*e1[index]

                j_best = np.argmin(e1)
                idx_bestinsdistinctive = []
                # select best indistinctive 
                for index, error in enumerate(e1):
                    if abs(error - e1[j_best]) < eps:
                        idx_bestinsdistinctive.append(index)
                y = FORECAST_BA[idx_bestinsdistinctive][:,t].mean()
        FORECAST[t+h] = y
    return FORECAST

In [None]:
def AdaptiveCombination(x, h, Params):
    '''
    Parameters
    x <array> - time series
    h <integer scalar>- forecasting delay
    Params <dict> - dictionary with 
        gamma <scalar in [0,1]> - smoothing parameter of error
        BaseAlgs - array of <dict> with params
            BaseAlg <string> - name of base algorithm
            BaseAlfParams <dict> dictionary of base algorithm's params
    '''
    x = np.nan_to_num(x)
    T = len(x)
    FORECAST = [np.NaN]*(T+h)

    
    
    # Base Algs Forecasts     
    BaseAlgs = Params['BaseAlgsParams']
    N = len(BaseAlgs)
    FORECAST_BA = np.array([np.NAN]*(T+h)*N).reshape(N, T+h)
    
    
    #     forecast TS by each base algs
    #     be careful with eval() function    
    for ba in range(len(BaseAlgs)):
        FORECAST_BA[ba]= eval(BaseAlgs[ba]['BaseAlg']+"(x,h,"+"BaseAlgs["+str(ba)+"]['BaseAlgParams'])")
    
    # internal params of composition
    gamma = Params['gamma']

    if gamma>1:
        w.warn('Gamma can not be more than 1')
        #alpha = 1
        return FORECAST
    if gamma<0:
        w.warn('Gamma can not be less than 0')
        #alpha = 0
        return FORECAST

    
    e1= [0]*N  # initialization of errors of base algorithms 
    weights = [0]*N
    y = 0
    for t in range(0, T):
        if not math.isnan(x[t]):
            
            if t>= h:
                for index,key in enumerate(FORECAST_BA):
                    e1[index] = gamma*np.abs(x[t]-key.transpose()[t])+(1-gamma)*e1[index]

                w = np.ones(len(FORECAST_BA))
                sum_err = 0.0
                for index, error in enumerate(e1):
                    if error != 0: 
                        sum_err+=float(1.0/error)
                    else:
                        sum_err = 1

                for index, error in enumerate(e1):
                    if error!=0:
                        w[index] = (1/error)/sum_err
                    else:
                        w[index] = 1/len(FORECAST_BA)
                y = 0
                for index, wage in enumerate(w):
                    y += wage * FORECAST_BA[index][t]
            
        FORECAST[t+h] = y
    return FORECAST

In [None]:
for ba in range(len(BaseAlgs)):
    print(BaseAlgs[ba]['BaseAlg']+"(x,h,"+"BaseAlgs["+str(ba)+"]['BaseAlgsParams'])")

In [None]:
ts = pd.read_csv('./data/TimeSeries_Data10.csv', sep=',', decimal='.', parse_dates=True, dayfirst=True, index_col='Dates')
ts.index.names=['Timestamp']
# ts = pd.read_csv('./data/TimeSeries_Data10.csv', sep=',', decimal='.')
ts = ts.sort_index() # sort index

In [None]:
h =1 
frc_ts_s = pd.DataFrame(index = ts.index.append(pd.date_range(ts.index[-1]+timedelta(1), ts.index[-1]+timedelta(h)))
                      , columns = ts.columns)
frc_ts_c = pd.DataFrame(index = ts.index.append(pd.date_range(ts.index[-1]+timedelta(1), ts.index[-1]+timedelta(h)))
                      , columns = ts.columns)
FRC_TS = dict()

In [None]:
# run Adaptive Selection
gamma=0.01
eps = 0.01

for cntr in ts.columns:
    frc_ts_s[cntr]= AdaptiveSelection(ts[cntr],h, {'gamma':gamma, 'eps': eps, 'BaseAlgsParams':BaseAlgs})
FRC_TS['AS gamma %.2f eps %.2f' % (gamma, eps)] = frc_ts_s

In [None]:
ts['Item: 2765'].plot()
frc_ts_s['Item: 2765'].plot()

In [None]:
# run Adaptive Composition
gamma=0.01

for cntr in ts.columns:
    frc_ts_c[cntr]= AdaptiveCombination(ts[cntr],h, {'gamma':gamma, 'BaseAlgsParams':BaseAlgs})
FRC_TS['AC gamma %.2f' % (gamma)] = frc_ts_c

In [None]:
ts['Item: 2765'].plot()
frc_ts_c['Item: 2765'].plot()

In [None]:
QualityStr = pd.DataFrame(index = ts.columns, columns = sorted(FRC_TS.keys()))

for model in QualityStr.columns:
    frc_ts = FRC_TS[model]
    for ts_num in ts.columns:
        QualityStr[model][ts_num],_ = qualityMedianAE(ts[ts_num], frc_ts[ts_num])

QualityStr.plot(figsize = (15,6))