In [23]:
import pandas as pd
import numpy as np
from datetime import datetime
from pandas import to_datetime
import matplotlib.pyplot as plt
import time
import seaborn as sns
import pmdarima as pm
from pmdarima.model_selection import train_test_split


from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.api import ExponentialSmoothing, SimpleExpSmoothing, Holt
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn import linear_model

import warnings
warnings.filterwarnings('ignore')
from tqdm import tqdm
plt.rcParams['font.family'] ='Malgun Gothic'
plt.rcParams['axes.unicode_minus'] =False

In [2]:
df = pd.read_csv("C:/Users/User/github/Quant/data/kospi_category.csv", encoding='cp949')
start_date = '2004-01'
end_date = '2020-03'
 
df['time'] = pd.date_range(start_date,end_date,freq='m')
df.drop('date',axis=1, inplace=True)
df.set_index('time', inplace=True) 

In [3]:
# train 데이터와 validation 데이터 나누기.
X_train = df[df.index < '2019-01-01']
X_valid = df[df.index >= '2019-01-01']

# 각 데이터의 사이즈 확인하기
print('X_train Shape', X_train.shape)
print('X_Valid Shape', X_valid.shape)

X_train Shape (180, 22)
X_Valid Shape (14, 22)


import itertools
p=d=q=range(0,5)
pdq = list(itertools.product(p,d,q))

for param in pdq:
    try:
        model_arima = ARIMA(df.iloc[:,1],order=param)
        model_arima_fit = model_arima.fit()
        print(param,model_arima_fit.aic)
    except:
        continue

# Moving Window X
# Auto-ARIMA 돌리기 - 계측값이 일별이면 m=1, 월별이면 m=12, 주별이면 m=52, 
# 계절성이 있는 데이터면 seasonal=True 로 바꿔야함. 알아서 d 값을 찾아줌.
arima_result = []
for i in tqdm(range(len(df.columns))):
    auto_arima_model = pm.auto_arima(y = X_train.iloc[:,i],
              start_P=0,
              max_p= 3,
              start_q = 0,
              max_q = 3,
              m = 12,
              seasonal=True,
              stepwise= False,
              trace= True)
    
    fcast2 = auto_arima_model.predict(14) 
    fcast2 = pd.Series(fcast2, index = X_valid.index)
    fcast2 = fcast2.rename("Auto Arima")
    fig, ax = plt.subplots(figsize=(15,5))
    chart = sns.lineplot(data = pd.DataFrame(X_train.iloc[:,i]))
    
    chart.set_title(df.columns[i]+' Auto Arima')
    
    fcast2.plot(ax=ax, color='red', marker="o")
    plt.plot(X_valid.iloc[:,i], color = 'blue', marker = 'o')
    plt.title(df.columns[i])
    plt.legend()
    plt.savefig("C:/Users/User/github/Quant/data/"+df.columns[i]+" AutoArima_seasonal_O.png")
    print(df.columns[i]+' The MSE of auto-arima is:', mean_squared_error(X_valid.iloc[:,i].values, fcast2.values))
    arima_result.append( mean_squared_error(X_valid.iloc[:,i].values, fcast2.values))
    

In [None]:
# Moving Window 적용
# Auto-ARIMA 돌리기 - 계측값이 일별이면 m=1, 월별이면 m=12, 주별이면 m=52, 
# 계절성이 있는 데이터면 seasonal=True 로 바꿔야함. 알아서 d 값을 찾아줌
arima_result = []
for i in tqdm(range(len(df.columns))):
    predictions = list()
    history = [x for x in X_train.iloc[:,i]]
    for t in range(len(X_valid.iloc[:,t])):
        model = pm.auto_arima(y = history,
                start_P=0,
                max_p= 3,
                start_q = 0,
                max_q = 3,
                m = 12,
                seasonal=True,
                stepwise= False,
                trace= True)
        output = model.predict()
        yhat = output[0]
        predictions.append(yhat)
        obs = X_valid.iloc[:,i][t]
        history.append(obs)
    
    predictions = pd.Series(predictions, index = X_valid.index)

    chart = sns.lineplot(data = pd.DataFrame(X_train.iloc[:,i]))
    chart.set_title(df.columns[i]+' Auto Arima')

    plt.plot(X_train.iloc[:,i], color='blue')
    plt.plot(predictions, color = 'red', marker = 'o')
    plt.plot(X_valid.iloc[:,i], color = 'blue', marker = 'o')
    plt.title(df.columns[i])
    plt.legend()
    plt.savefig("C:/Users/User/github/Quant/data/"+df.columns[i]+" moving_window.png")
    print(df.columns[i]+' The MSE of auto-arima is:', mean_squared_error(X_valid.iloc[:,i].values, predictions))
    arima_result.append( mean_squared_error(X_valid.iloc[:,i].values, predictions))
    plt.clf()
    

#### 계절성 O : 제조, 섬유의복, 종이목재, 화학, 비금속광물, 철강금속, 전기전자, 운수장비, 전기가스업, 건설업, 운수창고업, 금융업, 은행, 보험
#### 계절성 X : 음식료품, 의약품, 기계, 의료정밀, 유통업, 통신업, 증권, 서비스업