In [1]:
import numpy as np
from scipy.linalg import expm, sinm, cosm
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import math
from scipy.special import iv
from statsmodels.tsa.arima.model import ARIMA

In [2]:
def mets_filter(ts,rho,alpha):
    ts_mean = np.mean(ts)
    ts = np.append(ts,ts_mean)
    sample_size = len(ts)
    
    L_approx = np.zeros([sample_size, sample_size])

    for row in range(0,len(L_approx)-1):
        L_approx[row][row+1] =1

    L_approx[sample_size -1 ] = np.zeros(sample_size)
    filter_matrix = expm(rho*L_approx)
    
    output = np.dot(filter_matrix,ts) + alpha

    return output[:-1]

def ols_mets(ts,grid_size):
    min_alpha = 0.0
    min_rho = 0.0
    min_obj = np.square(mets_filter(ts, 0.0, 0.0)).mean()
    ts_mean  = np.mean(ts)
    
    for alpha in np.linspace(-ts_mean, ts_mean, grid_size):
        for rho in np.linspace(-2,2,grid_size):
            obj = np.square(mets_filter(ts, rho, alpha)).mean()
            
            if obj < min_obj:
                min_alpha = alpha
                min_rho = rho
                min_obj = obj
                
    return [min_alpha, min_rho, min_obj]

In [26]:
# m3_df = pd.read_excel( 'C:/Users/michelj8/Documents/GitHub/exp_smooth_lasso/m3_data.xls', sheet_name = 'M3Month')
# m3_df = pd.read_excel( 'C:/Users/michelj8/Documents/GitHub/exp_smooth_lasso/m3_data.xls', sheet_name = 'M3Quart')
m3_df = pd.read_excel( 'C:/Users/michelj8/Documents/GitHub/exp_smooth_lasso/m3_data.xls', sheet_name = 'M3Year')
m3_df.head()

Unnamed: 0,Series,N,NF,Category,Starting Year,Unnamed: 5,1,2,3,4,...,38,39,40,41,42,43,44,45,46,47
0,N 1,20,6,MICRO,1975,1,940.66,1084.86,1244.98,1445.02,...,,,,,,,,,,
1,N 2,20,6,MICRO,1975,1,1991.05,2306.4,2604.0,2992.3,...,,,,,,,,,,
2,N 3,20,6,MICRO,1975,1,1461.57,1692.5,2193.82,2459.68,...,,,,,,,,,,
3,N 4,20,6,MICRO,1975,1,744.54,1105.16,1417.4,1838.04,...,,,,,,,,,,
4,N 5,20,6,MICRO,1975,1,4977.18,5248.0,5370.0,6184.89,...,,,,,,,,,,


In [28]:
ar_mse = []
mets_mse  = []

# for index in m3_df.index:
for index in range(0, 50):
#     ts = m3_df.drop(columns = ['Series','N','NF','Category', 'Starting Year','Starting Quarter']).loc[index].dropna()
#     ts = m3_df.drop(columns = ['Series','N','NF','Category', 'Starting Year','Starting Month']).loc[index].dropna()
    ts = m3_df.drop(columns = ['Series','N','NF','Category', 'Starting Year','Unnamed: 5']).loc[index].dropna()
    ts = np.log(ts).diff(1).dropna().values
    mets_mse.append( ols_mets(ts, 20)[2])
    
    ar_model = ARIMA(ts,order = (1,0,0)).fit()
    ar_mse.append(np.mean(np.square(ts- ar_model.predict())))

In [29]:
error_df = pd.DataFrame({'mets_mse':mets_mse, 'ar_mse':ar_mse})
error_df.head()

Unnamed: 0,mets_mse,ar_mse
0,0.000923,0.000856
1,0.019928,0.019733
2,0.027816,0.027321
3,0.029796,0.029826
4,0.014861,0.01489


In [30]:
len(error_df.query('mets_mse < ar_mse'))/len(error_df)

0.36

In [22]:
len(error_df)

50