In [None]:
# import modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
# load data
time = pd.read_csv('../input/bri-data-hackathon-cr-optimization/train.csv',
                   parse_dates=['periode'], index_col='periode')

print(time.info())
time

In [None]:
plt.figure(figsize=(16,8))
sns.lineplot(data=time, x=time.index, y='kas_kantor')
plt.show()

In [None]:
time_Kantor = time.iloc[:,2:-1]
time_Kantor.head()
time_Kantor.columns[:-1]
time_Kantor[time_Kantor.columns[:-1]].head()

In [None]:
# Decompose data
decom_kantor = seasonal_decompose(time_Kantor['kas_kantor'], freq=30)

# plot decompsed data
decom_kantor = decom_kantor.plot()
plt.figure(figsize=(16,5))
plt.show()

In [None]:
# Decompose data
decom_kantor_mult = seasonal_decompose(time_Kantor['kas_kantor'], freq=30, model='multiplicative')

# plot decompsed data
decom_kantor_mult = decom_kantor_mult.plot()
plt.figure(figsize=(16,5))
plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(2,1, figsize=(8,8))

# plot ACF
plot_acf(time['kas_kantor'], lags=20, zero=False, ax=ax1)

# plot PACF
plot_pacf(time['kas_kantor'], lags=20, zero=False, ax=ax2)
plt.show()

In [None]:
# sarching for AIC and BIC
#order_aic_bic = []

#for d in range(3):
#    for p in range(5):
#        for q in range(5):
#            ts_Model = SARIMAX(endog=time_Kantor['kas_kantor'], order=(p, d, q))
#            ts_result = ts_Model.fit()
#            order_aic_bic.append((p,d,q,ts_result.aic,ts_result.bic))

In [None]:
#order_aic_bic = pd.DataFrame(order_aic_bic, columns=['p','d','q','aic','bic'])
#print(order_aic_bic.sort_values('aic').head())
#print('')
#print(order_aic_bic.sort_values('bic').head())

In [None]:
#order_aic_bic2 = []
#SS = [7,30]

#for S in range(2):
#    for D in range(3):
#        for P in range(5):
#            for Q in range(5):
#                ts_Model2 = SARIMAX(endog=time_Kantor['kas_kantor'], order=(0, 2, 4), seasonal_order=(P,D,Q,SS[S]))
#                ts_result2 = ts_Model2.fit()
#                order_aic_bic2.append((P,D,Q, SS[S], ts_result2.aic, ts_result2.bic))
#                print(str((P,D,Q,SS[S])) + ' is done')

In [None]:
#order_aic_bic2 = pd.DataFrame(order_aic_bic2, columns=['P','D','Q','S','aic','bic'])
#print(order_aic_bic2.sort_values('aic').head())
#print('')
#print(order_aic_bic2.sort_values('bic').head())

In [None]:
ts_Model2 = SARIMAX(endog=time_Kantor['kas_kantor'], order=(0, 2, 4), seasonal_order=(0,2,2,30)) # trend='t'
ts_result2 = ts_Model2.fit()

In [None]:
# forecast in-data
forecasting2 = ts_result2.get_prediction(start=-30)

forecast2_mean = forecasting2.predicted_mean

forecast2_ci = forecasting2.conf_int()

In [None]:
plt.figure(figsize=(16,8))
plt.plot(time_Kantor.index, time_Kantor['kas_kantor'], color='blue', label='real')
plt.plot(time_Kantor.index[-30:], forecast2_mean.values, color='red', label='forecast')
plt.fill_between(time_Kantor.index[-30:], forecast2_ci['lower kas_kantor'], forecast2_ci['upper kas_kantor'], color='pink')
plt.show()

In [None]:
plt.figure(figsize=(16,5))
plt.plot(time_Kantor.index[-90:], time_Kantor['kas_kantor'][-90:], color='blue', label='real')
plt.plot(time_Kantor.index[-30:], forecast2_mean.values, color='red', label='forecast')
plt.fill_between(time_Kantor.index[-30:], forecast2_ci['lower kas_kantor'], forecast2_ci['upper kas_kantor'], color='pink')
plt.show()

In [None]:
# forecast out of sample
forecast_out2 = ts_result2.get_forecast(steps=31)

forecast_out2_mean = forecast_out2.predicted_mean

forecast_out2_ci = forecast_out2.conf_int()

In [None]:
forecast_out2_mean= pd.DataFrame(forecast_out2_mean)
forecast_out2_mean.index = pd.date_range(start='2020-10-01', end='2020-10-31', periods=31)

forecast_out2_ci.index = pd.date_range(start='2020-10-01', end='2020-10-31', periods=31)

In [None]:
plt.figure(figsize=(16,4))
plt.plot(time_Kantor.index, time_Kantor['kas_kantor'], color='blue', label='real')
plt.plot(forecast_out2_mean.index, forecast_out2_mean.values, color='red', label='forecast')
plt.fill_between(forecast_out2_ci.index, forecast_out2_ci['lower kas_kantor'], forecast_out2_ci['upper kas_kantor'], color='pink')
plt.show()

In [None]:
plt.figure(figsize=(16,4))
plt.plot(time_Kantor.index[-90:], time_Kantor['kas_kantor'][-90:], color='blue', label='real')
plt.plot(forecast_out2_mean.index, forecast_out2_mean.values, color='red', label='forecast')
plt.fill_between(forecast_out2_ci.index, forecast_out2_ci['lower kas_kantor'], forecast_out2_ci['upper kas_kantor'], color='pink')
plt.show()

In [None]:
# Diagnostic
np.mean(np.abs(ts_result2.resid))

In [None]:
# summary of fit model
print(ts_result2.summary())

In [None]:
# line plot of residuals
residuals = pd.DataFrame(ts_result2.resid)
residuals.plot()
plt.show()

# density plot of residuals
residuals.plot(kind='kde')
plt.show()

# summary stats of residuals
print(residuals.describe())

# kas echannel

In [None]:
plt.figure(figsize=(16,8))
sns.lineplot(data=time, x=time.index, y='kas_echannel')
plt.show()

In [None]:
time_echannel = time.loc[:,:]
time_echannel.head()
time_echannel.columns[:-1]
time_echannel[time_echannel.columns[:-1]].head()

In [None]:
# Decompose data
decom_echannel = seasonal_decompose(time_echannel['kas_echannel'], freq=30)

# plot decompsed data
decom_echannel = decom_echannel.plot()
plt.figure(figsize=(16,5))
plt.show()

In [None]:
# Decompose data
decom_echannel_mult = seasonal_decompose(time_echannel['kas_echannel'], freq=30, model='multiplicative')

# plot decompsed data
decom_echannel_mult = decom_echannel_mult.plot()
plt.figure(figsize=(16,5))
plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(2,1, figsize=(8,8))

# plot ACF
plot_acf(time['kas_echannel'], lags=20, zero=False, ax=ax1)

# plot PACF
plot_pacf(time['kas_echannel'], lags=20, zero=False, ax=ax2)
plt.show()

In [None]:
# sarching for AIC and BIC
order_aic_bic3 = []

for d in range(3):
    for p in range(5):
        for q in range(5):
            ts_Model3 = SARIMAX(endog=time_echannel['kas_echannel'], order=(p, d, q))
            ts_result3 = ts_Model.fit()
            order_aic_bic3.append((p,d,q,ts_result3.aic,ts_result3.bic))

In [None]:
order_aic_bic3 = pd.DataFrame(order_aic_bic3, columns=['p','d','q','aic','bic'])
print(order_aic_bic3.sort_values('aic').head())
print('')
print(order_aic_bic3.sort_values('bic').head())

In [None]:
#order_aic_bic4 = []

#for S in range(2):
#    for D in range(3):
#        for P in range(4):
#            for Q in range(4):
#                ts_Model4 = SARIMAX(endog=time_echannel['kas_echannel'], order=(0, 2, 3), seasonal_order=(P,D,Q,30))
#                ts_result4 = ts_Model4.fit()
#                order_aic_bic4.append((P,D,Q, 30, ts_result4.aic, ts_result4.bic))
#                print(str((P,D,Q,30)) + ' is done')

In [None]:
order_aic_bic4 = pd.DataFrame(order_aic_bic4, columns=['P','D','Q','S','aic','bic'])
print(order_aic_bic4.sort_values('aic').head())
print('')
print(order_aic_bic4.sort_values('bic').head())

In [None]:
order_aic_bic4b = []
DD = [1,2]


for D in range(2):
    ts_Model4 = SARIMAX(endog=time_echannel['kas_echannel'], order=(0, 2, 3), seasonal_order=(2,2,1,30))
    ts_result4 = ts_Model4.fit()
    order_aic_bic4.append((P,DD[D],Q, 30, ts_result4.aic, ts_result4.bic))
    print(str((P,DD[D],Q,30)) + ' is done')

In [None]:
ts_Model4 = SARIMAX(endog=time_echannel['kas_echannel'], order=(0, 2, 3), seasonal_order=(2,2,1,30))
ts_result4 = ts_Model4.fit()

In [None]:
# forecast in-data
forecasting4 = ts_result4.get_prediction(start=-30)

forecast4_mean = forecasting4.predicted_mean

forecast4_ci = forecasting4.conf_int()

In [None]:
plt.figure(figsize=(16,8))
plt.plot(time_echannel.index, time_echannel['kas_echannel'], color='blue', label='real')
plt.plot(time_echannel.index[-30:], forecast4_mean.values, color='red', label='forecast')
plt.fill_between(time_echannel.index[-30:], forecast4_ci['lower kas_echannel'], forecast4_ci['upper kas_echannel'], color='pink')
plt.show()

In [None]:
plt.figure(figsize=(16,5))
plt.plot(time_echannel.index[-90:], time_echannel['kas_echannel'][-90:], color='blue', label='real')
plt.plot(time_echannel.index[-30:], forecast4_mean.values, color='red', label='forecast')
plt.fill_between(time_echannel.index[-30:], forecast4_ci['lower kas_echannel'], forecast4_ci['upper kas_echannel'], color='pink')
plt.show()

In [None]:
# forecast out of sample
forecast_out4 = ts_result4.get_forecast(steps=31)

forecast_out4_mean = forecast_out4.predicted_mean

forecast_out4_ci = forecast_out4.conf_int()

In [None]:
len(forecast_out4_mean)

In [None]:
forecast_out4_mean= pd.DataFrame(forecast_out4_mean)

forecast_out4_mean.periode = pd.date_range(start='2020-10-01', end='2020-10-31', periods=31)

forecast_out4_ci.index = pd.date_range(start='2020-10-01', end='2020-10-31', periods=31)

In [None]:
plt.figure(figsize=(16,4))
plt.plot(time_echannel.index, time_echannel['kas_echannel'], color='blue', label='real')
plt.plot(forecast_out4_mean.index, forecast_out4_mean.values, color='red', label='forecast')
plt.fill_between(forecast_out4_ci.index, forecast_out4_ci['lower kas_echannel'], forecast_out4_ci['upper kas_echannel'], color='pink')
plt.show()

In [None]:
plt.figure(figsize=(16,4))
plt.plot(time_echannel.index[-90:], time_echannel['kas_echannel'][-90:], color='blue', label='real')
plt.plot(forecast_out4_mean.index, forecast_out4_mean.values, color='red', label='forecast')
plt.fill_between(forecast_out4_ci.index, forecast_out4_ci['lower kas_echannel'], forecast_out4_ci['upper kas_echannel'], color='pink')
plt.show()

In [None]:
# Diagnostic
np.mean(np.abs(ts_result4.resid))

In [None]:
# summary of fit model
print(ts_result4.summary())

In [None]:
# line plot of residuals
residuals4 = pd.DataFrame(ts_result4.resid)
residuals4.plot()
plt.show()

# density plot of residuals
residuals4.plot(kind='kde')
plt.show()

# summary stats of residuals
print(residuals4.describe())

In [None]:
len(forecast_out4_mean)

In [None]:
result = pd.DataFrame(pd.concat([forecast_out2_mean['predicted_mean'], forecast_out4_mean],
                   axis=0).reset_index())
result.columns = ['aaa', 'value']
result = result['value'].reset_index()
result

In [None]:
result.to_csv('submit_TS.csv', index=False)