<a href="https://colab.research.google.com/github/Sweetmonee/DataAnalytics/blob/main/%EC%A0%9C%EC%A3%BC%EC%9E%85%EB%8F%84%EA%B4%80%EA%B4%91%EA%B0%9D_%EC%8B%9C%EA%B3%84%EC%97%B4%EC%98%88%EC%B8%A1_ARIMA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller

# 시계열 데이터 불러오기
data = pd.read_csv('df.csv', parse_dates = ['Year'])
df = data.copy()

# 정상성 확인을 위한 Augmented Dickey-Fuller 검정
def test_stationarity(timeseries):
    result = adfuller(timeseries, autolag='AIC')
    print(f'ADF Statistic: {result[0]}')
    print(f'p-value: {result[1]}')
    print(f'Critical Values: {result[4]}')

# 정상성 확인
test_stationarity(df['Domestic Tourist'])

# 차분을 통한 정상성 확보
df['diff'] = df['Domestic Tourist'].diff().dropna()
df = df.dropna(subset = ['diff'])
# 정상성 재확인
test_stationarity(df['diff'])

# ACF 및 PACF 그래프를 통한 차수 선택
plot_acf(df['diff'], lags=20)
plot_pacf(df['diff'], lags=20)
plt.show()

# ARIMA 모델 구축
p, d, q = 1, 1, 0
model = ARIMA(df['Domestic Tourist'], order=(p, d, q))
results = model.fit()

# 예측
forecast_steps = 10
forecast = results.get_forecast(steps=forecast_steps)
forecast_index = np.arange(len(df), len(df) + forecast_steps)

# 결과 시각화
plt.plot(df['Domestic Tourist'], label='Observed')
plt.plot(forecast_index, forecast.predicted_mean, color='red', label='Forecast')
plt.fill_between(forecast_index, forecast.conf_int().iloc[:,0], forecast.conf_int().iloc[:,1], color='red', alpha=0.3)
plt.legend()
plt.show()