In [1]:
import numpy as np
from scipy.linalg import expm, sinm, cosm
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import math
from scipy.special import iv
from statsmodels.tsa.arima.model import ARIMA
import altair

In [2]:
def mets_filter(ts,rho,alpha):
    ts_mean = np.mean(ts)
    ts = np.append(ts,ts_mean)
    sample_size = len(ts)
    
    L_approx = np.zeros([sample_size, sample_size])

#     for row in range(0,len(L_approx)-1):
#         L_approx[row][row+1] =1

    for row in range(1,len(L_approx)):
        L_approx[row][row-1] =1


    filter_matrix = expm(rho*L_approx)
    
    output = np.dot(filter_matrix,ts) + alpha

    return output[:-1]


def ols_mets(ts, grid_size):
    sample_size = len(ts)
    L_approx = np.zeros([sample_size, sample_size])

#     for row in range(0,len(L_approx)-1):
#         L_approx[row][row+1] =1

    for row in range(1,len(L_approx)):
        L_approx[row][row-1] =1
        
    min_rho =  0.0
    filter_matrix = expm(min_rho*L_approx)
    resid = np.dot(filter_matrix,ts)
    min_alpha =  np.mean(resid)
    min_obj = np.square(np.std(resid))
    
    for rho in np.linspace(-2,2,grid_size):
        filter_matrix = expm(rho*L_approx)
        resid =  np.dot(filter_matrix,ts)
        alpha =  np.mean(resid)
        obj = np.square(np.std(resid))
        
        if obj < min_obj:
            min_alpha = alpha
            min_rho = rho
            min_obj = obj

    return [min_alpha, min_rho, min_obj]    

# Annual Data

In [3]:
m3_df = pd.read_excel( 'C:/Users/michelj8/Documents/GitHub/exp_smooth_lasso/m3_data.xls', sheet_name = 'M3Year')
m3_df.head()

Unnamed: 0,Series,N,NF,Category,Starting Year,Unnamed: 5,1,2,3,4,...,38,39,40,41,42,43,44,45,46,47
0,N 1,20,6,MICRO,1975,1,940.66,1084.86,1244.98,1445.02,...,,,,,,,,,,
1,N 2,20,6,MICRO,1975,1,1991.05,2306.4,2604.0,2992.3,...,,,,,,,,,,
2,N 3,20,6,MICRO,1975,1,1461.57,1692.5,2193.82,2459.68,...,,,,,,,,,,
3,N 4,20,6,MICRO,1975,1,744.54,1105.16,1417.4,1838.04,...,,,,,,,,,,
4,N 5,20,6,MICRO,1975,1,4977.18,5248.0,5370.0,6184.89,...,,,,,,,,,,


In [4]:
ar_mse = []
mets_mse  = []
series_type = []

ar_coef = []
mets_coef = []

for index in m3_df.index:
# for index in range(0, 10):
    series_type.append( m3_df.loc[index,'Category'] )
    ts = m3_df.drop(columns = ['Series','N','NF','Category', 'Starting Year','Unnamed: 5']).loc[index].dropna()
    ts = ts.diff(1).dropna().values
    mets_model = ols_mets(ts, 50)
    mets_mse.append(mets_model[2] )
    mets_coef.append(-1*mets_model[1])
    
    ar_model = ARIMA(ts,order = (1,0,0)).fit()
    ar_mse.append(np.mean(np.square(ts- ar_model.predict())))
    ar_coef.append(ar_model.params[1])

  warn('Non-stationary starting autoregressive parameters'


In [5]:
yearly_est_df = pd.DataFrame({'mets_mse':mets_mse, 
                              'ar_mse':ar_mse,
                              'series_type':series_type,
                             'ar_coef':ar_coef,
                             'mets_coef':mets_coef})

yearly_est_df['MSE_Ratio'] =  100*(yearly_est_df['mets_mse']-yearly_est_df['ar_mse'])/yearly_est_df['ar_mse']
yearly_est_df['mets_beats_ar'] = 1*(yearly_est_df['MSE_Ratio'] < 0)
yearly_est_df.head()

Unnamed: 0,mets_mse,ar_mse,series_type,ar_coef,mets_coef,MSE_Ratio,mets_beats_ar
0,31750.155545,28149.95006,MICRO,0.7568,0.612245,12.789385,0
1,394146.637981,393259.580735,MICRO,0.191862,0.204082,0.225565,0
2,333505.484397,332617.127877,MICRO,0.337169,0.367347,0.267081,0
3,368548.64709,368491.054723,MICRO,0.034072,0.040816,0.015629,0
4,480815.530412,480701.033278,MICRO,0.017069,0.040816,0.023819,0


In [6]:
print (100*yearly_est_df['mets_beats_ar'].mean())
print()
yearly_est_df[['mets_beats_ar','series_type']].groupby('series_type').mean().multiply(100).apply(lambda s: round(s,1))

60.62015503875969



Unnamed: 0_level_0,mets_beats_ar
series_type,Unnamed: 1_level_1
DEMOGRAPHIC,47.8
FINANCE,65.5
INDUSTRY,67.6
MACRO,68.7
MICRO,67.8
OTHER,100.0


In [7]:
(altair.Chart(yearly_est_df, title = 'Quarterly M3 Estimation')
 .mark_point()
 .encode(x=altair.X('ar_coef', title = 'AR(1) Coefficient'),
         y=altair.Y('mets_coef',title = 'METS Coefficient'))
#          color=altair.Color('series_type', title = 'Series Type') )
 .properties(
    width=180,
    height=180)
.facet(
    facet=altair.Facet('series_type', title = 'Series Type'),
    columns=3
)
)

In [8]:
(altair.Chart(yearly_est_df).mark_bar().encode(
    altair.X("MSE_Ratio", bin=True, title = 'Normalized difference in MSE'),
    y='count()')
 .properties(
    width=180,
    height=180)
.facet(
    facet=altair.Facet('series_type', title = 'Series Type'),
    columns=3
)
)

In [9]:
(altair.Chart(yearly_est_df, title = 'Quarterly M3 Estimation')
 .mark_point()
 .encode(x=altair.X('mets_coef', title = 'METS Coefficient'),
         y=altair.Y('MSE_Ratio',title = 'Normalized difference in MSE'))
#          color=altair.Color('series_type', title = 'Series Type') )
 .properties(
    width=180,
    height=180)
.facet(
    facet=altair.Facet('series_type', title = 'Series Type'),
    columns=3
)
)

In [10]:
(altair.Chart(yearly_est_df, title = 'Quarterly M3 Estimation')
 .mark_point()
 .encode(x=altair.X('ar_coef', title = 'AR Coefficient'),
         y=altair.Y('MSE_Ratio',title = 'Normalized difference in MSE'))
#          color=altair.Color('series_type', title = 'Series Type') )
 .properties(
    width=180,
    height=180)
.facet(
    facet=altair.Facet('series_type', title = 'Series Type'),
    columns=3
)
)

# Quarterly

In [11]:
m3_df = pd.read_excel( 'C:/Users/michelj8/Documents/GitHub/exp_smooth_lasso/m3_data.xls', sheet_name = 'M3Quart')

ar_mse = []
mets_mse  = []
series_type = []

ar_coef = []
mets_coef = []

for index in m3_df.index:
# for index in range(0, 10):
    ts = m3_df.drop(columns = ['Series','N','NF','Category', 'Starting Year','Starting Quarter']).loc[index].dropna()
    series_type.append( m3_df.loc[index,'Category'] )
    ts = ts.diff(1).dropna().values
    mets_model = ols_mets(ts, 50)
    mets_mse.append(mets_model[2] )
    mets_coef.append(-1*mets_model[1])
    
    ar_model = ARIMA(ts,order = (1,0,0)).fit()
    ar_mse.append(np.mean(np.square(ts- ar_model.predict())))
    ar_coef.append(ar_model.params[1])
    

quarterly_est_df = pd.DataFrame({'mets_mse':mets_mse, 
                              'ar_mse':ar_mse,
                              'series_type':series_type,
                             'ar_coef':ar_coef,
                             'mets_coef':mets_coef})

quarterly_est_df['MSE_Ratio'] =  100*(quarterly_est_df['mets_mse']-quarterly_est_df['ar_mse'])/quarterly_est_df['ar_mse']
quarterly_est_df['mets_beats_ar'] = 1*(quarterly_est_df['MSE_Ratio'] < 0)
quarterly_est_df.head()

  warn('Non-stationary starting autoregressive parameters'


Unnamed: 0,mets_mse,ar_mse,series_type,ar_coef,mets_coef,MSE_Ratio,mets_beats_ar
0,64222.365003,64217.156127,MICRO,0.027433,0.040816,0.008111,0
1,4630.886111,4561.08133,MICRO,-0.208103,-0.122449,1.530444,0
2,23181.854953,23178.527449,MICRO,0.012556,-0.0,0.014356,0
3,97187.886713,96721.799399,MICRO,-0.169962,-0.122449,0.481884,0
4,15051.500613,13608.294346,MICRO,0.617972,0.530612,10.605343,0


In [12]:
print (100*quarterly_est_df['mets_beats_ar'].mean())
print()
quarterly_est_df[['mets_beats_ar','series_type']].groupby('series_type').mean().multiply(100).apply(lambda s: round(s,1))

59.25925925925925



Unnamed: 0_level_0,mets_beats_ar
series_type,Unnamed: 1_level_1
DEMOGRAPHIC,63.2
FINANCE,64.5
INDUSTRY,77.1
MACRO,45.8
MICRO,71.1


In [13]:
(altair.Chart(quarterly_est_df, title = 'Quarterly M3 Estimation')
 .mark_point()
 .encode(x=altair.X('ar_coef', title = 'AR(1) Coefficient'),
         y=altair.Y('mets_coef',title = 'METS Coefficient'))
#          color=altair.Color('series_type', title = 'Series Type') )
 .properties(
    width=180,
    height=180)
.facet(
    facet=altair.Facet('series_type', title = 'Series Type'),
    columns=3
)
)

In [14]:
(altair.Chart(quarterly_est_df).mark_bar().encode(
    altair.X("MSE_Ratio", bin=True, title = 'Normalized difference in MSE'),
    y='count()')
 .properties(
    width=180,
    height=180)
.facet(
    facet=altair.Facet('series_type', title = 'Series Type'),
    columns=3
)
)

In [15]:
(altair.Chart(quarterly_est_df, title = 'Quarterly M3 Estimation')
 .mark_point()
 .encode(x=altair.X('mets_coef', title = 'METS Coefficient'),
         y=altair.Y('MSE_Ratio',title = 'Normalized difference in MSE'))
#          color=altair.Color('series_type', title = 'Series Type') )
 .properties(
    width=180,
    height=180)
.facet(
    facet=altair.Facet('series_type', title = 'Series Type'),
    columns=3
)
)

In [16]:
(altair.Chart(quarterly_est_df, title = 'Quarterly M3 Estimation')
 .mark_point()
 .encode(x=altair.X('ar_coef', title = 'AR Coefficient'),
         y=altair.Y('MSE_Ratio',title = 'Normalized difference in MSE'))
#          color=altair.Color('series_type', title = 'Series Type') )
 .properties(
    width=180,
    height=180)
.facet(
    facet=altair.Facet('series_type', title = 'Series Type'),
    columns=3
)
)

# Monthly

In [17]:
m3_df = pd.read_excel( 'C:/Users/michelj8/Documents/GitHub/exp_smooth_lasso/m3_data.xls', sheet_name = 'M3Month')

ar_mse = []
mets_mse  = []
series_type = []

ar_coef = []
mets_coef = []

for index in m3_df.index:
# for index in range(0, 50):
    ts = m3_df.drop(columns = ['Series','N','NF','Category', 'Starting Year','Starting Month']).loc[index].dropna()
    series_type.append( m3_df.loc[index,'Category'] )
    ts = np.log(ts).diff(1).dropna().values
    mets_model = ols_mets(ts, 50)
    mets_mse.append(mets_model[2] )
    mets_coef.append(-1*mets_model[1])
    
    ar_model = ARIMA(ts,order = (1,0,0)).fit()
    ar_mse.append(np.mean(np.square(ts- ar_model.predict())))
    ar_coef.append(ar_model.params[1])
    

monthly_est_df = pd.DataFrame({'mets_mse':mets_mse, 
                              'ar_mse':ar_mse,
                              'series_type':series_type,
                             'ar_coef':ar_coef,
                             'mets_coef':mets_coef})

monthly_est_df['MSE_Ratio'] =  100*(monthly_est_df['mets_mse']-monthly_est_df['ar_mse'])/monthly_est_df['ar_mse']
monthly_est_df['mets_beats_ar'] = 1*(monthly_est_df['MSE_Ratio'] < 0)
monthly_est_df.head()

  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "
  warn("Maximum Likelihood optimization failed to converge. "
  warn("

Unnamed: 0,mets_mse,ar_mse,series_type,ar_coef,mets_coef,MSE_Ratio,mets_beats_ar
0,0.504554,0.651061,MICRO,-0.518105,-1.020408,-22.502801,1
1,0.873952,0.975221,MICRO,-0.50509,-0.77551,-10.384246,1
2,0.239963,0.271305,MICRO,-0.51509,-0.77551,-11.552218,1
3,0.55487,0.626328,MICRO,-0.560277,-0.77551,-11.409018,1
4,0.297176,0.362953,MICRO,-0.529051,-0.938776,-18.12266,1


In [18]:
print (100*monthly_est_df['mets_beats_ar'].mean())
print()
monthly_est_df[['mets_beats_ar','series_type']].groupby('series_type').mean().multiply(100).apply(lambda s: round(s,1))

74.36974789915966



Unnamed: 0_level_0,mets_beats_ar
series_type,Unnamed: 1_level_1
DEMOGRAPHIC,36.9
FINANCE,66.9
INDUSTRY,79.3
MACRO,59.9
MICRO,96.6
OTHER,26.9


In [19]:
(altair.Chart(monthly_est_df, title = 'Monthly M3 Estimation')
 .mark_point()
 .encode(x=altair.X('ar_coef', title = 'AR(1) Coefficient'),
         y=altair.Y('mets_coef',title = 'METS Coefficient'))
#          color=altair.Color('series_type', title = 'Series Type') )
 .properties(
    width=180,
    height=180)
.facet(
    facet=altair.Facet('series_type', title = 'Series Type'),
    columns=3
)
)

In [20]:
(altair.Chart(monthly_est_df).mark_bar().encode(
    altair.X("MSE_Ratio", bin=True, title = 'Normalized difference in MSE'),
    y='count()')
 .properties(
    width=180,
    height=180)
.facet(
    facet=altair.Facet('series_type', title = 'Series Type'),
    columns=3
)
)

In [21]:
(altair.Chart(monthly_est_df, title = 'Monthly M3 Estimation')
 .mark_point()
 .encode(x=altair.X('mets_coef', title = 'METS Coefficient'),
         y=altair.Y('MSE_Ratio',title = 'Normalized difference in MSE'))
#          color=altair.Color('series_type', title = 'Series Type') )
 .properties(
    width=180,
    height=180)
.facet(
    facet=altair.Facet('series_type', title = 'Series Type'),
    columns=3
)
)

In [22]:
(altair.Chart(monthly_est_df, title = 'Monthly M3 Estimation')
 .mark_point()
 .encode(x=altair.X('ar_coef', title = 'AR Coefficient'),
         y=altair.Y('MSE_Ratio',title = 'Normalized difference in MSE'))
#          color=altair.Color('series_type', title = 'Series Type') )
 .properties(
    width=180,
    height=180)
.facet(
    facet=altair.Facet('series_type', title = 'Series Type'),
    columns=3
)
)

# Combination Graphs

In [23]:
yearly_est_df['freq'] = 'yearly'
quarterly_est_df['freq'] = 'quarterly'
monthly_est_df['freq'] = 'monthly'

frames = [yearly_est_df, quarterly_est_df, monthly_est_df]
total_df = result = pd.concat(frames)
total_df.loc[total_df['series_type'] ==  total_df['series_type'].unique()[4],'series_type'] = total_df['series_type'].unique()[6]

In [24]:
round(100*total_df['mets_beats_ar'].mean(),1)

67.2

In [25]:
total_df[['series_type','mets_beats_ar']].groupby('series_type').mean().apply(lambda s: round(100.0*s,1))

Unnamed: 0_level_0,mets_beats_ar
series_type,Unnamed: 1_level_1
DEMOGRAPHIC,47.0
FINANCE,65.9
INDUSTRY,76.7
MACRO,54.4
MICRO,85.2
OTHER,39.7


In [26]:
total_df[['freq','mets_beats_ar']].groupby('freq').mean().apply(lambda s: round(100.0*s,1))

Unnamed: 0_level_0,mets_beats_ar
freq,Unnamed: 1_level_1
monthly,74.4
quarterly,59.3
yearly,60.6


In [27]:
pd.pivot_table( total_df[['series_type','mets_beats_ar','freq']].groupby(['series_type','freq']).mean().reset_index(),
               columns = 'freq',
               index = 'series_type',
               values = 'mets_beats_ar').apply(lambda s: round(100.0*s,1))

freq,monthly,quarterly,yearly
series_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
DEMOGRAPHIC,36.9,63.2,47.8
FINANCE,66.9,64.5,65.5
INDUSTRY,79.3,77.1,67.6
MACRO,59.9,45.8,68.7
MICRO,96.6,71.1,67.8
OTHER,26.9,,100.0


In [28]:
(altair.Chart(total_df, title = 'Coefficients')
 .mark_point()
 .encode(x=altair.X('ar_coef', title = 'AR(1) Coefficient'),
         y=altair.Y('mets_coef',title = 'METS Coefficient'),
         color=altair.Color('freq', title = 'Data Frequency') )
 .properties(
    width=180,
    height=180)
.facet(
    facet=altair.Facet('series_type', title = 'Estimated coefficients across series and frequency'),
    columns=3
)
)

In [29]:
(altair.Chart(total_df).mark_bar().encode(
    altair.X("MSE_Ratio", bin=altair.Bin(extent=[-30, 30], step=10), title = 'Normalized difference in MSE'),
    y='count()',
    color = altair.Color('freq', title = 'Data Frequency'))
 .properties(
    width=180,
    height=180)
.facet(
    facet=altair.Facet('series_type', title = 'Comparison of MSE across series type and frequency'),
    columns=3
)
)

In [30]:
(altair.Chart(total_df.query('MSE_Ratio < 100'))
 .mark_point()
 .encode(x=altair.X('mets_coef', title = 'METS Coefficient'),
         y=altair.Y('MSE_Ratio',title = 'Normalized difference in MSE'),
    color = altair.Color('freq', title = 'Data Frequency'))
 .properties(
    width=180,
    height=180)
.facet(
    facet=altair.Facet('series_type', title = 'Comparison of AR and METS coefficient'),
    columns=3
)
)

In [31]:
(altair.Chart(total_df.query('MSE_Ratio < 100'))
 .mark_point()
 .encode(x=altair.X('ar_coef', title = 'AR Coefficient'),
         y=altair.Y('MSE_Ratio',title = 'Normalized difference in MSE'),
    color = altair.Color('freq', title = 'Data Frequency'))
 .properties(
    width=180,
    height=180)
.facet(
    facet=altair.Facet('series_type', title = 'Comparison of AR and METS coefficient'),
    columns=3
)
)

In [32]:
pd.DataFrame(ts)

Unnamed: 0,0
0,-0.005698
1,-0.019714
2,0.004955
3,-0.014150
4,-0.001659
...,...
65,-0.003347
66,-0.016668
67,0.001664
68,-0.004999


In [33]:
sample_size = len(ts)
L_approx = np.zeros([sample_size, sample_size])

#     for row in range(0,len(L_approx)-1):
#         L_approx[row][row+1] =1

for row in range(1,len(L_approx)):
    L_approx[row][row-1] =1
pd.DataFrame(np.dot(L_approx,ts))

Unnamed: 0,0
0,0.000000
1,-0.005698
2,-0.019714
3,0.004955
4,-0.014150
...,...
65,-0.022818
66,-0.003347
67,-0.016668
68,0.001664
