In [None]:
# ทำ ETL และ ปรับข้อมูลให้เป็น stationary สำหรับใช้ในการทำโมเดล ARIMA
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose

# STEP 1: Data Collection
df = yf.download('GC=F', start='2021-01-01', end='2025-07-01')

print("Original DataFrame shape:", df.shape)
print(df.head())

# STEP 2: Data Cleaning & Feature Engineering
# 1. ลบ missing values
df = df.dropna()
print("After dropna:", df.shape)

# STEP 3: ใช้เฉพาะ series 'Close' สำหรับการตรวจสอบ Stationarity และ Decomposition
series = df['Close']

# 1. ดู decomposition เพื่อเช็คแนวโน้มและฤดูกาล
decomposition = seasonal_decompose(series, model='additive', period=30)  # ปรับ period ตาม data (30 = เดือนสำหรับ daily)
decomposition.plot()
plt.show()

# 2. Differencing เพื่อเอา trend ออก
series_diff = series.diff().dropna()

# 3. Seasonal Differencing เพื่อเอาฤดูกาลออก (เช่น 30 สำหรับเดือน ถ้า daily)
seasonal_period = 30
series_diff_seasonal = series_diff.diff(seasonal_period).dropna()

# 4. ตรวจสอบ stationarity อีกครั้ง
adf_result = adfuller(series_diff_seasonal)
print('ADF Statistic:', adf_result[0])
print('p-value:', adf_result[1])
if adf_result[1] < 0.05:
    print("Stationary แล้ว")
else:
    print("ยังไม่ stationary")

# 5. ดูกราฟ
plt.figure(figsize=(12,4))
plt.plot(series, label='Original')
plt.plot(series_diff_seasonal, label='After Diff + Seasonal Diff')
plt.legend()
plt.show()


In [None]:
# Model: ARIMA (auto_arima) for Gold Futures Price Prediction
import warnings
warnings.filterwarnings("ignore")

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pmdarima import auto_arima
from sklearn.metrics import mean_absolute_error, mean_squared_error
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# 1. โหลดข้อมูล (Gold Futures)
df = yf.download('GC=F', start='2021-01-01', end='2025-07-01').dropna()

# 2. (Optional) Feature engineering (เช่น lag1)
df['lag1'] = df['Close'].shift(1)
df = df.dropna()

# 3. แบ่ง train/test
split_ratio = 0.8
split_index = int(len(df) * split_ratio)
train = df.iloc[:split_index]
test = df.iloc[split_index:]

# 4. ใช้เฉพาะ series 'Close' สำหรับ ARIMA
train_arima = train['Close']
test_arima = test['Close']

# เช็ค ACF/PACF เพื่อช่วยเลือก p, q 
plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
plot_acf(train_arima.diff().dropna(), lags=30, ax=plt.gca())
plt.title('ACF (After Differencing)')

plt.subplot(1, 2, 2)
plot_pacf(train_arima.diff().dropna(), lags=30, ax=plt.gca(), method="ywm")
plt.title('PACF (After Differencing)')
plt.tight_layout()
plt.show()

# 5. หา best ARIMA model อัตโนมัติ
stepwise_fit = auto_arima(
    train_arima,
    start_p=0, start_q=0,
    max_p=7, max_q=7,
    seasonal=False,
    trace=True,
    error_action='ignore',
    suppress_warnings=True,
    stepwise=True
)

print(stepwise_fit.summary())

# 6. ทำนาย (Forecast) ช่วง test set
n_periods = len(test_arima)
forecast, conf_int = stepwise_fit.predict(n_periods=n_periods, return_conf_int=True)
forecast_1d = pd.Series(np.ravel(forecast), index=test_arima.index)
test_arima_1d = pd.Series(np.ravel(test_arima), index=test_arima.index)

# 7. Evaluate
results = pd.DataFrame({'actual': test_arima_1d, 'forecast': forecast_1d})
results = results.dropna() 

mae = mean_absolute_error(results['actual'], results['forecast'])
rmse = np.sqrt(mean_squared_error(results['actual'], results['forecast']))
print(f'auto_arima MAE: {mae:.2f}')
print(f'auto_arima RMSE: {rmse:.2f}')

# 8. Plot
plt.figure(figsize=(12,5))
plt.plot(train_arima.index, train_arima, label='Train')
plt.plot(test_arima.index, test_arima, label='Actual', color='black')
plt.plot(test_arima.index, forecast_1d, label='auto_arima Forecast', color='green')
plt.fill_between(test_arima.index, conf_int[:,0], conf_int[:,1], color='lightgreen', alpha=0.3)
plt.legend()
plt.title('auto_arima Forecast vs Actual')
plt.show()
