In [1]:
# load libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn import linear_model, model_selection, preprocessing

import datetime
from itertools import product
import statsmodels.api as sm

import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

In [2]:
def regression(res, alpha = 0.1, plot = False,verbose = False, searchBestFit = False):
    X = res.drop('trip_count',axis = 1)        
    y = res.loc[:,'trip_count'];
    
    if searchBestFit:
        # создать словарь параметров
        param_grid = {'alpha': [x for x in np.linspace(1,100,10)]} #,'l1_ratio': [0.1, 0.25, 0.4, 0.5, 0.6, 0.75, 1]}
        # создать кросс-валидацию для временных рядов
        tscv = model_selection.TimeSeriesSplit()
        
        # запустить поиск оптимальных параметров
        regressor = linear_model.Ridge()
        clf = model_selection.GridSearchCV(regressor, param_grid, n_jobs=4, cv=tscv, verbose=1)
        clf.fit(X,y)
        regressor = clf.best_estimator_        
        print 'Best params is', clf.best_params_ 
        
        
    else:
        regressor = linear_model.Lasso(alpha = alpha, max_iter = 1e5,fit_intercept = True,random_state = 0);
        regressor.fit(X,y)
        
    y_pr = pd.Series(data = regressor.predict(X), index = res.index)
    R = regressor.score(X,y);
    print 'R factor is ', R
    
    if plot:
        plt.figure(figsize = [15,10])
        plt.subplot(211)
        plt.plot(y)
        plt.plot(y_pr)
        plt.legend(['Original data','Predicted'])

        plt.subplot(212)
        plt.plot(y-y_pr)
        plt.legend(['Residuals'])
        
    return [y_pr, y-y_pr, regressor]

In [3]:
def getRegressor(regressor, start_date = '2016-05-15 00:00:00', end_date = '2016-05-20 23:00:00'):
    predictionStart = datetime.datetime.strptime(start_date,'%Y-%m-%d %H:%M:%S')
    predictionEnd = datetime.datetime.strptime(end_date,'%Y-%m-%d %H:%M:%S')
    date_index = pd.date_range(predictionStart, predictionEnd, freq='H')
   
    #какой-то пипец. Должен быть способ сделать это проще.
    features = date_index.to_series().to_frame()
    features = addFeatures(features,verbose = True)
    features = features.drop(0,axis = 1)
    exog = regressor.predict(features)
    return pd.Series(exog,index = date_index)

In [4]:
def addFeatures(res, Kw = 6, Ka = 3,verbose = False):    
    # add linear feature
    res = res.assign(hours = (res.index - datetime.datetime(2014,1,1,0,0,0))/np.timedelta64(1, 'h'))
    
    # добавляем гармонические фичи
    for ind in range(1,Kw+1):
        res['weekCos'+str(ind)]= np.cos(np.pi*res.hours*ind/168);
        res['weekSin'+str(ind)]= np.sin(np.pi*res.hours*ind/168);
    for ind in range(1,Ka+1):
        res['yearCos'+str(ind)]= np.cos(2*np.pi*res.hours*ind/8766);
        res['yearSin'+str(ind)]= np.sin(2*np.pi*res.hours*ind/8766);
        
    # добавляем dummy variables для дней недели
    lbDays = preprocessing.LabelBinarizer()
    lbDays.fit(list(np.arange(6)))
    DoW = pd.DataFrame(lbDays.transform(res.index.dayofweek),columns = ['DayOfWeek_'+str(x) for x in np.arange(6)],
                       index = res.index)      
    res = res.merge(DoW,left_index=True,right_index=True)
 
    # добавляем dummy variables для месяца
    lbMonths = preprocessing.LabelBinarizer()
    lbMonths.fit(list(np.arange(12)))
    Months = pd.DataFrame(lbMonths.transform(res.index.month),columns = ['Month_'+str(x) for x in np.arange(12)],index = res.index)      
    res = res.merge(Months,left_index=True,right_index=True);
    return res

In [22]:
def findHyperParams(ts,pList = []):
    
    if (len(pList) ==0):
        # create list of parameters
        ps = range(2, 7)
        d  = 1
        qs = range(2, 7)

        Ps = range(1, 3)
        D  = 1
        Qs = range(1, 3)

        pList = list(product(ps, qs, Ps, Qs))
    
    results = []
    best_aic = float("inf")
    
    # add features
    ts = addFeatures(ts, Kw = 6, Ka = 3)
    # regression
    [s, r, lasso] = regression(ts,verbose = True, searchBestFit = True)
    
    # loop over parameters' list
    for param in pList:
        #try except нужен, потому что на некоторых наборах параметров модель не обучается
        print('Parameters:', param)
        try:
            mSARIMA=sm.tsa.statespace.SARIMAX(ts.loc[:,'trip_count'], order=[param[0], 1, param[1]],
                                          seasonal_order=(param[2], 1, param[3], 24),exog = r).fit(disp=1);
        #выводим параметры, на которых модель не обучается и переходим к следующему набору
        except Exception as inst:
            print inst           # __str__ allows args to be printed directly
            continue
        else:     
            aic = mSARIMA.aic
            print('AIC',aic) 
            #сохраняем лучшую модель, aic, параметры
            if aic < best_aic:
                best_model = mSARIMA
                best_aic = aic
                best_param = param
            results.append([param, mSARIMA.aic])
    return [best_aic,best_param, best_model]

In [6]:
# id нужных регионов
regsDf = pd.read_csv('../crowdRegs.csv',names=['id','regId']);  

# временные ряды для этих регионов
df = pd.read_pickle('../loadData/crowdRegs3.pcl')
df.columns = regsDf.regId.values.astype('str')

# словарь с группировкой рядов
tsGroups = np.load('tsGroups.npy').item()

# словарь с оптимальными параметрами для каждой группы
paramsGroups = np.load('paramsGroups.npy').item()

*Логика скрипта:*
<ol>
<li> Выбираем одну группу
<li> В группе выбираем один ряд
<li> По номеру группы подгружаем оптимальные параметры
<li> Обучаем регрессор
<li> Обучаем SARIMAX модель
<li> Сохраняем модель (??? Может быть без данных, чтобы сэкономить место).
<li> Делаем предсказание
<li> Сохраняем предсказение
<li> Идём на второй или первый шаг
<ol>
    

In [8]:
# выберу настроечные ряды руками
fitSeries = {'gr18':'1274','gr19':'1684','gr10':'1333','gr11':'1075','gr12':'2118','gr13':'1387','gr14':'1384','gr15':'1174'}
fitSeries.update({'gr16':'1483','gr17':'1282','gr21':'1184','gr20':'1131','gr23':'1332','gr22':'1580','gr6':'1177','gr7':'1388'})
fitSeries.update({'gr4':'1128','gr2':'1234','gr3':'1231','gr0':'1286','gr1':'1125','gr8':'1181','gr9':'1532'})

In [25]:
def saveResults(df, fName):
    """
    Save dataframe df to file fName
    """
    f = open(fName, 'w')
    for ts in df.index.levels[0]:
        for lag in df.index.levels[1][6:-5]:
            for i in np.arange(6):
                try:
                    historyStart = lag - datetime.timedelta(hours = 1)
                    res = df.loc[ts,lag].y[i]
                    if res<0:
                        res = 0
                    s =  str(ts)+'_'+datetime.datetime.strftime(historyStart, "%Y-%m-%d") +'_'+ str(historyStart.hour)+'_'+str(i+1)+','+str(res)+'\n'
                except Exception as ins:
                    print lag, ts, i
                    print ins
                else:
                    f.write(s)

    f.close()

In [23]:
# диапазон дат для обучения
startFit = '2015-01-01 0:0:0'
endFit = '2016-05-31 23:00:00'

err = 0

# диапазон дат для предсказания
startPrediction = '2016-05-31 18:00:00'
endPrediction   = '2016-06-30 23:00:00'
predictionRange = pd.date_range(startPrediction, endPrediction, freq='H')

# словарь с оптимальными параметрами для каждой группы
#paramsGroups = np.load('paramsGroups.npy').item()

# create array to save prediction results
mIndex = pd.MultiIndex.from_product([df.columns.values, predictionRange])
#resDf = pd.DataFrame(index = mIndex, columns = ['y','err'])
# load data
resDf = pd.read_pickle('predictionResults6.pcl')
#recalcRegions = [1272,1377]

dp = 0
dq = 0
dp2 = 0
dq2 = 0

for grId, ts in tsGroups.iteritems(): #{k: v for k, v in tsGroups.iteritems() if k in recalcGroups}.iteritems():
    
    print 'Group ID is', grId
    
    # получаем параметры SARIMAX модели
    params = paramsGroups.get(grId)[1] 
        
    for tsId in ts:
        
        print 'Regions is ', tsId
        # получаем временной ряд
        ts = df.loc[startFit:endFit,tsId] #
        
        if ~np.isnan(resDf.loc[tsId,'2016-06-15'].err):
            print 'Already done!'
            continue

        # обучаем регрессор
        ts = ts.to_frame(name = 'trip_count')
        
        
        [best_aic,best_params, best_model] = findHyperParams(ts)
        
        print best_params
        params = best_params
        
        [r_pr, res, regressor] = regression(addFeatures(ts), verbose = True, searchBestFit = True)
        exog = getRegressor(regressor,startFit,endFit)
        
        # обучаем SARIMAX модель
        print 'Teach SARIMAX'
        try:
            mSARIMA = best_model
            #mSARIMA=sm.tsa.statespace.SARIMAX(ts, order=[params[0]+dp, 2, params[1]+dq],
            #                                  seasonal_order=(params[2]+dp2, 1, params[3]+dq2, 24),
            #                                  exog = exog).fit(disp=1);
            
            # получаем предсказания регрессора на весь диапазон дат (обучение+предсказание)
            exog = getRegressor(regressor,startFit,endPrediction)
            # получаем данные о поездкахы на весь диапазон дат
            endog = df.loc[startFit:endPrediction,tsId]
       
            # создаём новую модель, которую будет использовать для предсказания
            model_fitted = sm.tsa.statespace.SARIMAX(endog, order=[params[0]+dp, 2, params[1]+dq],
                                                 seasonal_order=(params[2]+dp2, 1, params[3]+dq2, 24),
                                                 exog = exog).filter(mSARIMA.params)
        except Exception as inst:
            print 'Can not teach or create the model'
            print inst
            continue
        else:    
            # проходим по всему диапазону дат предсказаний
            print 'Make prediction'
            for firstLag in predictionRange:
                lastLag = firstLag+datetime.timedelta(hours = 5)
                # prediction
                try:
                    predicted_data = model_fitted.predict(firstLag, lastLag, dynamic=True, exog = exog[firstLag:lastLag])
                except Exception as inst:
                    print 'Prediction error'
                    print inst
                else:
                    # save results
                    resDf.loc[tsId,firstLag].y = predicted_data
                    err += (df.loc[startPrediction:endPrediction,tsId]-predicted_data).abs().sum()
                    resDf.loc[tsId,firstLag].err = (df.loc[startPrediction:endPrediction,tsId]-predicted_data).abs().mean()

            # save results
            resDf.to_pickle('predictionResults7.pcl')
    
print 'Total error is', err    

Group ID is gr18
Regions is  1273
Already done!
Regions is  1274
Already done!
Group ID is gr19
Regions is  1434
Already done!
Regions is  1435
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    3.9s finished


Best params is {'alpha': 78.0}
R factor is  0.142808766501
('Parameters:', (2, 2, 1, 1))
('AIC', -10292.911330850204)
('Parameters:', (2, 2, 1, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 2, 2, 1))
('AIC', -17658.322827811033)
('Parameters:', (2, 2, 2, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 3, 1, 1))
('AIC', -10302.820495658529)
('Parameters:', (2, 3, 1, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 3, 2, 1))
('AIC', -17645.794259244169)
('Parameters:', (2, 3, 2, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 4, 1, 1))
('AIC', -10159.391948171753)
('Parameters:', (2, 4, 1, 2))
non-invertible starting seasonal moving average parameters found wit

[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    3.9s finished


Best params is {'alpha': 78.0}
R factor is  0.142808766501
Teach SARIMAX
Make prediction
Prediction error
Provided exogenous values are not of the appropriate shape. Required (1, 1), got (5L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (2, 1), got (4L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (3, 1), got (3L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (4, 1), got (2L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (5, 1), got (1L,).
Regions is  1437
Already done!
Regions is  1438
Already done!
Regions is  1630
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    3.7s finished


Best params is {'alpha': 100.0}
R factor is  0.0976775147896
('Parameters:', (2, 2, 1, 1))
('AIC', -10276.026703035284)
('Parameters:', (2, 2, 1, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 2, 2, 1))
('AIC', -12375.591642885331)
('Parameters:', (2, 2, 2, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 3, 1, 1))
('AIC', -10271.545214848089)
('Parameters:', (2, 3, 1, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 3, 2, 1))
('AIC', -12299.553641574375)
('Parameters:', (2, 3, 2, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 4, 1, 1))
('AIC', -10271.261563304932)
('Parameters:', (2, 4, 1, 2))
non-invertible starting seasonal moving average parameters found w

[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    4.0s finished


Best params is {'alpha': 100.0}
R factor is  0.0976775147896
Teach SARIMAX
Make prediction
Prediction error
Provided exogenous values are not of the appropriate shape. Required (1, 1), got (5L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (2, 1), got (4L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (3, 1), got (3L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (4, 1), got (2L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (5, 1), got (1L,).
Regions is  1684
Already done!
Group ID is gr10
Regions is  1333
Already done!
Regions is  1337
Already done!
Regions is  1338
Already done!
Regions is  1339
Already done!
Regions is  1783
Already done!
Group ID is gr11
Regions is  1075
Already done!
Regions is  1733
Already done!
Regions is  1734
Already done!
Group ID is gr12
Regions is  2069
Already done!
Regions is  2118
Alread

[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    3.4s finished


Best params is {'alpha': 12.0}
R factor is  0.147069344536
('Parameters:', (2, 2, 1, 1))
('AIC', 36283.906613691397)
('Parameters:', (2, 2, 1, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 2, 2, 1))
('AIC', 30097.863768842115)
('Parameters:', (2, 2, 2, 2))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameters:', (2, 3, 1, 1))
('AIC', 36366.089032023556)
('Parameters:', (2, 3, 1, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 3, 2, 1))
('AIC', 30153.236672762763)
('Parameters:', (2, 3, 2, 2))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameters:', (2, 4, 1, 1))
Singular matrix
('Parameters:', (2, 4, 1, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True

[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    9.4s finished


Best params is {'alpha': 12.0}
R factor is  0.147069344536
Teach SARIMAX
Make prediction
Prediction error
Provided exogenous values are not of the appropriate shape. Required (1, 1), got (5L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (2, 1), got (4L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (3, 1), got (3L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (4, 1), got (2L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (5, 1), got (1L,).
Regions is  1223
Already done!
Regions is  1224
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    6.3s finished


Best params is {'alpha': 12.0}
R factor is  0.135518011847
('Parameters:', (2, 2, 1, 1))
('AIC', 26345.196720321906)
('Parameters:', (2, 2, 1, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 2, 2, 1))
('AIC', 24872.614426791239)
('Parameters:', (2, 2, 2, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 3, 1, 1))
('AIC', 26405.042763994767)
('Parameters:', (2, 3, 1, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 3, 2, 1))
('AIC', 24812.492877302328)
('Parameters:', (2, 3, 2, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 4, 1, 1))
('AIC', 26318.07883286065)
('Parameters:', (2, 4, 1, 2))
non-invertible starting seasonal moving average parameters found with `enf

[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    9.2s finished


Best params is {'alpha': 12.0}
R factor is  0.135518011847
Teach SARIMAX
Make prediction
Prediction error
Provided exogenous values are not of the appropriate shape. Required (1, 1), got (5L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (2, 1), got (4L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (3, 1), got (3L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (4, 1), got (2L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (5, 1), got (1L,).
Regions is  1225
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    5.5s finished


Best params is {'alpha': 12.0}
R factor is  0.0545344964305
('Parameters:', (2, 2, 1, 1))
('AIC', -12046.701591499706)
('Parameters:', (2, 2, 1, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 2, 2, 1))
('AIC', -18662.463315539335)
('Parameters:', (2, 2, 2, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 3, 1, 1))
('AIC', -12052.647455393546)
('Parameters:', (2, 3, 1, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 3, 2, 1))
('AIC', -18662.020494334647)
('Parameters:', (2, 3, 2, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 4, 1, 1))
('AIC', -12060.376336596706)
('Parameters:', (2, 4, 1, 2))
non-invertible starting seasonal moving average parameters found wi

[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    3.7s finished


Best params is {'alpha': 12.0}
R factor is  0.0545344964305
Teach SARIMAX
Make prediction
Prediction error
Provided exogenous values are not of the appropriate shape. Required (1, 1), got (5L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (2, 1), got (4L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (3, 1), got (3L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (4, 1), got (2L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (5, 1), got (1L,).
Regions is  1227
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    3.5s finished


Best params is {'alpha': 12.0}
R factor is  0.298965049515
('Parameters:', (2, 2, 1, 1))
Singular matrix
('Parameters:', (2, 2, 1, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 2, 2, 1))
array must not contain infs or NaNs
('Parameters:', (2, 2, 2, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 3, 1, 1))
array must not contain infs or NaNs
('Parameters:', (2, 3, 1, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 3, 2, 1))
array must not contain infs or NaNs
('Parameters:', (2, 3, 2, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 4, 1, 1))
('AIC', 114857.00209963399)
('Parameters:', (2, 4, 1, 2))
non-invertible starting seasonal moving average parameters fo

[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    3.9s finished


Best params is {'alpha': 12.0}
R factor is  0.298965049515
Teach SARIMAX
Make prediction
Prediction error
Provided exogenous values are not of the appropriate shape. Required (1, 1), got (5L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (2, 1), got (4L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (3, 1), got (3L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (4, 1), got (2L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (5, 1), got (1L,).
Regions is  1385
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    3.4s finished


Best params is {'alpha': 100.0}
R factor is  0.0528866381658
('Parameters:', (2, 2, 1, 1))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameters:', (2, 2, 1, 2))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameters:', (2, 2, 2, 1))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameters:', (2, 2, 2, 2))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameters:', (2, 3, 1, 1))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameters:', (2, 3, 1, 2))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameters:', (2, 3, 2, 1))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameters:', (2, 3, 2, 2))
Non-stationary start

[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    3.4s finished


Best params is {'alpha': 100.0}
R factor is  0.0528866381658
Teach SARIMAX
Make prediction
Prediction error
Provided exogenous values are not of the appropriate shape. Required (1, 1), got (5L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (2, 1), got (4L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (3, 1), got (3L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (4, 1), got (2L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (5, 1), got (1L,).
Regions is  1386
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    3.4s finished


Best params is {'alpha': 34.0}
R factor is  0.0757655314732
('Parameters:', (2, 2, 1, 1))
('AIC', 23344.042846019722)
('Parameters:', (2, 2, 1, 2))
('AIC', 17964.209225447768)
('Parameters:', (2, 2, 2, 1))
('AIC', 23346.779428868162)
('Parameters:', (2, 2, 2, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 3, 1, 1))
('AIC', 23340.757634092137)
('Parameters:', (2, 3, 1, 2))
('AIC', 17927.513315220604)
('Parameters:', (2, 3, 2, 1))
('AIC', 23339.963307417773)
('Parameters:', (2, 3, 2, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 4, 1, 1))
('AIC', 23342.549077272619)
('Parameters:', (2, 4, 1, 2))
('AIC', 18152.963261100744)
('Parameters:', (2, 4, 2, 1))
('AIC', 23344.200971106278)
('Parameters:', (2, 4, 2, 2))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Param

[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    3.7s finished


Best params is {'alpha': 34.0}
R factor is  0.0757655314732
Teach SARIMAX
Make prediction
Prediction error
Provided exogenous values are not of the appropriate shape. Required (1, 1), got (5L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (2, 1), got (4L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (3, 1), got (3L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (4, 1), got (2L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (5, 1), got (1L,).
Regions is  1387
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    3.6s finished


Best params is {'alpha': 45.0}
R factor is  0.142740107512
('Parameters:', (2, 2, 1, 1))
('AIC', 15159.428536318461)
('Parameters:', (2, 2, 1, 2))
('AIC', 15128.784961646161)
('Parameters:', (2, 2, 2, 1))
('AIC', 15181.748912217281)
('Parameters:', (2, 2, 2, 2))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameters:', (2, 3, 1, 1))
('AIC', 15154.673193360064)
('Parameters:', (2, 3, 1, 2))
('AIC', 15125.093835980408)
('Parameters:', (2, 3, 2, 1))
('AIC', 15149.971214136907)
('Parameters:', (2, 3, 2, 2))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameters:', (2, 4, 1, 1))
('AIC', 15155.57351828914)
('Parameters:', (2, 4, 1, 2))
('AIC', 15114.240816377291)
('Parameters:', (2, 4, 2, 1))
('AIC', 15141.723937074337)
('Parameters:', (2, 4, 2, 2))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameters:', (2, 5, 1, 1))
('AIC', 1

[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    3.6s finished


Best params is {'alpha': 45.0}
R factor is  0.142740107512
Teach SARIMAX
Make prediction
Prediction error
Provided exogenous values are not of the appropriate shape. Required (1, 1), got (5L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (2, 1), got (4L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (3, 1), got (3L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (4, 1), got (2L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (5, 1), got (1L,).
Regions is  1390
Fitting 3 folds for each of 10 candidates, totalling 30 fits


[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    3.5s finished


Best params is {'alpha': 45.0}
R factor is  0.134819603506
('Parameters:', (2, 2, 1, 1))
('AIC', -538.06548171449504)
('Parameters:', (2, 2, 1, 2))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameters:', (2, 2, 2, 1))
('AIC', -601.41269028763327)
('Parameters:', (2, 2, 2, 2))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameters:', (2, 3, 1, 1))
('AIC', -549.81658546836093)
('Parameters:', (2, 3, 1, 2))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameters:', (2, 3, 2, 1))
('AIC', -613.63938230582244)
('Parameters:', (2, 3, 2, 2))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameters:', (2, 4, 1, 1))
('AIC', -565.32500698817978)
('Parameters:', (2, 4, 1, 2))
Non-stationary starting autoregressive parameters found with `enforce_stationarity` set to True.
('Parameter

[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    3.7s finished


Best params is {'alpha': 45.0}
R factor is  0.134819603506
Teach SARIMAX
Make prediction
Prediction error
Provided exogenous values are not of the appropriate shape. Required (1, 1), got (5L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (2, 1), got (4L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (3, 1), got (3L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (4, 1), got (2L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (5, 1), got (1L,).
Group ID is gr14
Regions is  1380
Already done!
Regions is  1382
Already done!
Regions is  1383
Already done!
Regions is  1384
Already done!
Group ID is gr15
Regions is  1132
Already done!
Regions is  1172
Already done!
Regions is  1173
Already done!
Regions is  1174
Already done!
Group ID is gr16
Regions is  1480
Already done!
Regions is  1482
Already done!
Regions is  1483
Already 

[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:    3.4s finished


Best params is {'alpha': 23.0}
R factor is  0.165993587824
('Parameters:', (2, 2, 1, 1))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 2, 1, 2))
Singular matrix
('Parameters:', (2, 2, 2, 1))
('AIC', -9899.4314797508287)
('Parameters:', (2, 2, 2, 2))
Singular matrix
('Parameters:', (2, 3, 1, 1))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 3, 1, 2))
array must not contain infs or NaNs
('Parameters:', (2, 3, 2, 1))
('AIC', -10237.731932728004)
('Parameters:', (2, 3, 2, 2))
array must not contain infs or NaNs
('Parameters:', (2, 4, 1, 1))
non-invertible starting seasonal moving average parameters found with `enforce_invertibility` set to True.
('Parameters:', (2, 4, 1, 2))
Singular matrix
('Parameters:', (2, 4, 2, 1))
('AIC', -10060.520241353617)
('Parameters:', (2, 4, 2, 2))
Singular matrix
('Parameters:', (2, 5, 1, 1))
non-inv

[Parallel(n_jobs=4)]: Done  30 out of  30 | elapsed:   10.4s finished


Best params is {'alpha': 23.0}
R factor is  0.165993587824
Teach SARIMAX
Make prediction
Prediction error
Provided exogenous values are not of the appropriate shape. Required (1, 1), got (5L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (2, 1), got (4L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (3, 1), got (3L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (4, 1), got (2L,).
Prediction error
Provided exogenous values are not of the appropriate shape. Required (5, 1), got (1L,).
Group ID is gr4
Regions is  1127
Already done!
Regions is  1128
Already done!
Regions is  1129
Already done!
Regions is  1130
Already done!
Group ID is gr2
Regions is  1180
Already done!
Regions is  1232
Already done!
Regions is  1233
Already done!
Regions is  1234
Already done!
Regions is  1235
Already done!
Group ID is gr3
Regions is  1183
Already done!
Regions is  1229
Already don

In [28]:
# load data
#resDf = pd.read_pickle('predictionResults6.pcl')
saveResults(resDf,'m11.csv')

2016-06-01 16:00:00 1175 0
'float' object has no attribute '__getitem__'
2016-06-01 16:00:00 1175 1
'float' object has no attribute '__getitem__'
2016-06-01 16:00:00 1175 2
'float' object has no attribute '__getitem__'
2016-06-01 16:00:00 1175 3
'float' object has no attribute '__getitem__'
2016-06-01 16:00:00 1175 4
'float' object has no attribute '__getitem__'
2016-06-01 16:00:00 1175 5
'float' object has no attribute '__getitem__'
2016-06-01 17:00:00 1175 0
'float' object has no attribute '__getitem__'
2016-06-01 17:00:00 1175 1
'float' object has no attribute '__getitem__'
2016-06-01 17:00:00 1175 2
'float' object has no attribute '__getitem__'
2016-06-01 17:00:00 1175 3
'float' object has no attribute '__getitem__'
2016-06-01 17:00:00 1175 4
'float' object has no attribute '__getitem__'
2016-06-01 17:00:00 1175 5
'float' object has no attribute '__getitem__'
2016-06-11 20:00:00 1442 0
'float' object has no attribute '__getitem__'
2016-06-11 20:00:00 1442 1
'float' object has no at