In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
from dateutil.relativedelta import relativedelta
import datetime
import statsmodels.api as sm
from scipy.optimize import brute



# format notebook output
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from IPython.core.display import display, HTML, Javascript
display(HTML("<style>.container { width:90% !important; }</style>"))

# style pandas display
pd.set_option('display.max_columns', None)

# matplotlib magic
%matplotlib inline

  from pandas.core import datetools


In [2]:
air_reserve = pd.read_csv('../../data_files/air_reserve.csv')
air_store_info = pd.read_csv('../../data_files/air_store_info.csv', encoding='utf-8')
air_visit_data = pd.read_csv('../../data_files/air_visit_data.csv')
date_info = pd.read_csv('../../data_files/date_info.csv')
hpg_reserve = pd.read_csv('../../data_files/hpg_reserve.csv')
hpg_store_info = pd.read_csv('../../data_files/hpg_store_info.csv')
sample_submission = pd.read_csv('../../data_files/sample_submission.csv')
store_id_relation = pd.read_csv('../../data_files/store_id_relation.csv')

In [3]:
air_stores = list(air_store_info['air_store_id'])
test = air_visit_data[air_visit_data['air_store_id']==air_stores[0]]
test.index = pd.DatetimeIndex(test['visit_date'])
display(test.head())

Unnamed: 0_level_0,air_store_id,visit_date,visitors
visit_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-07-01,air_0f0cdeee6c9bf3d7,2016-07-01,18
2016-07-02,air_0f0cdeee6c9bf3d7,2016-07-02,37
2016-07-03,air_0f0cdeee6c9bf3d7,2016-07-03,20
2016-07-04,air_0f0cdeee6c9bf3d7,2016-07-04,16
2016-07-05,air_0f0cdeee6c9bf3d7,2016-07-05,15


In [23]:
data_col = 'visitors'

def build_model(iter_param, series, params_list, static_param=None):
  if static_param==None:
    ords = iter_param
    sords = (1,0,0,0)
  else:
    ords = static_param
    sords = iter_param
  try:
    mod = sm.tsa.statespace.SARIMAX(
        series, trend='n', order=ords, seasonal_order=sords
      )
    res = mod.fit(disp=0)
    params_list.append(tuple((ords, sords, res.aic)))
  except:
    pass

'''m = []
res = build_model((1,0,0), test[data_col], m)
print('AIC: %s' % res.aic)
begin = len(test)-1
end = (datetime.datetime(2017,5,30).date()-test.index[-1].date()).days+len(test)
#res.predict(start=begin, end=end, dynamic=True)'''

"m = []\nres = build_model((1,0,0), test[data_col], m)\nprint('AIC: %s' % res.aic)\nbegin = len(test)-1\nend = (datetime.datetime(2017,5,30).date()-test.index[-1].date()).days+len(test)\n#res.predict(start=begin, end=end, dynamic=True)"

In [21]:
def parameter_search2(series, num_models, grid_diameter=3):
  models = []
  param_vals = range(grid_diameter)
  for ord_param in list(product(param_vals,param_vals,param_vals)):
    for sord_param in list(product(param_vals,param_vals,param_vals,param_vals)):
      build_model(sord_param, series, models, ord_param)
  return sorted(models, key=lambda x: x[2])[:num_models]

In [16]:
def parameter_search(series, num_models, grid_diameter=3):
  ord_models = []
  sord_models = []
  grid = (slice(0, grid_diameter, 1), slice(0, grid_diameter, 1), slice(0, grid_diameter, 1))
  brute(build_model, grid, args=(series, ord_models), finish=None)
  for model in sorted(ord_models, key=lambda x: x[2])[:num_models]:
    grid = (slice(0, grid_diameter, 1), slice(0, grid_diameter, 1), slice(0, grid_diameter, 1), slice(7,8,1))
    brute(build_model, grid, args=(series, sord_models, model[0]), finish=None)
  return sord_models

In [None]:
mods = parameter_search2(test['visitors'], 5)

In [24]:
mods

[((0, 0, 1), (2, 1, 2, 2), 2132.4585647911099),
 ((0, 0, 0), (2, 1, 2, 2), 2132.6585989226774),
 ((0, 0, 1), (0, 1, 1, 2), 2135.7014635751048),
 ((0, 0, 0), (2, 1, 1, 2), 2135.7374456644493),
 ((0, 0, 1), (2, 1, 0, 1), 2137.1573214801319)]