In [None]:
import pandas as pd
import yfinance as yf
import numpy as np
import ta
from xgboost import XGBRegressor, plot_importance
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

# 1. Load Gold Futures
df = yf.download('GC=F', start='2025-01-01', end='2025-07-01').dropna()

# --- Remove ALL MultiIndex, force DatetimeIndex ---
while isinstance(df.index, pd.MultiIndex):
    df = df.reset_index()
if not isinstance(df.index, pd.DatetimeIndex):
    df = df.reset_index()
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'])
        df = df.set_index('Date')
    elif 'index' in df.columns:
        df['index'] = pd.to_datetime(df['index'])
        df = df.set_index('index')
if 'Ticker' in df.columns:
    df = df.drop(columns='Ticker')

# โหลด DXY/BTC แล้ว reset index เช่นเดียวกัน
def fix_index(_df):
    while isinstance(_df.index, pd.MultiIndex):
        _df = _df.reset_index()
    if not isinstance(_df.index, pd.DatetimeIndex):
        _df = _df.reset_index()
        if 'Date' in _df.columns:
            _df['Date'] = pd.to_datetime(_df['Date'])
            _df = _df.set_index('Date')
        elif 'index' in _df.columns:
            _df['index'] = pd.to_datetime(_df['index'])
            _df = _df.set_index('index')
    return _df

df_dxy = yf.download('DX-Y.NYB', start=df.index[0], end=df.index[-1])
df_btc = yf.download('BTC-USD', start=df.index[0], end=df.index[-1])
df_dxy = fix_index(df_dxy)
df_btc = fix_index(df_btc)
df_dxy = df_dxy.rename(columns=lambda x: f'DXY_{x}')
df_btc = df_btc.rename(columns=lambda x: f'BTC_{x}')

# Join!
df_merge = df.reset_index()
df_dxy_merge = df_dxy[['DXY_Close']].reset_index()
df_btc_merge = df_btc[['BTC_Close']].reset_index()

df_merge = df_merge.merge(df_dxy_merge, on='Date', how='left')
df_merge = df_merge.merge(df_btc_merge, on='Date', how='left')
df_merge = df_merge.set_index('Date')
df_merge['DXY_Close'] = df_merge['DXY_Close'].fillna(method='ffill')
df_merge['BTC_Close'] = df_merge['BTC_Close'].fillna(method='ffill')

df = df_merge

# 2. Feature Engineering
df['lag1'] = df['Close'].shift(1)
df['lag2'] = df['Close'].shift(2)
df['lag3'] = df['Close'].shift(3)
df['lag5'] = df['Close'].shift(5)
df['lag7'] = df['Close'].shift(7)
df['rolling3'] = df['Close'].rolling(window=3).mean()
df['rolling7'] = df['Close'].rolling(window=7).mean()
df['rolling14'] = df['Close'].rolling(window=14).mean()
df['rolling21'] = df['Close'].rolling(window=21).mean()
df['std7'] = df['Close'].rolling(window=7).std()
df['return1'] = df['Close'].pct_change(1)
df['return5'] = df['Close'].pct_change(5)
df['dayofweek'] = df.index.dayofweek

close_series = df['Close']
if isinstance(close_series, pd.DataFrame):
    close_series = close_series.squeeze()

df['rsi14'] = ta.momentum.RSIIndicator(close_series, window=14).rsi()
df['ema10'] = ta.trend.EMAIndicator(close_series, window=10).ema_indicator()
df['ema21'] = ta.trend.EMAIndicator(close_series, window=21).ema_indicator()
macd = ta.trend.MACD(close_series)
df['macd'] = macd.macd()
df['macd_signal'] = macd.macd_signal()
df['macd_diff'] = macd.macd_diff()

# 3. Train/Test Split (ก่อน dropna)
split_ratio = 0.8
split_index = int(len(df) * split_ratio)
train = df.iloc[:split_index]
test = df.iloc[split_index:]

# Drop NA ในแต่ละชุด (สำคัญมาก)
train = train.dropna()
test = test.dropna()

feature_cols = [
    'lag1', 'lag2', 'lag3', 'lag5', 'lag7',
    'rolling3', 'rolling7', 'rolling14', 'rolling21',
    'std7', 'return1', 'return5', 'dayofweek',
    'rsi14', 'ema10', 'ema21', 'macd', 'macd_signal', 'macd_diff',
    'DXY_Close', 'BTC_Close'
]

X_train = train[feature_cols]
y_train = train['Close']
X_test = test[feature_cols]
y_test = test['Close']

print(f"X_train shape: {X_train.shape}, X_test shape: {X_test.shape}")

# 4. Train XGBoost
model = XGBRegressor(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=5,
    subsample=0.7,
    colsample_bytree=0.7,
    random_state=42
)
model.fit(X_train, y_train)

# 5. Predict
y_pred = model.predict(X_test)

# 6. Evaluate
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f'XGBoost MAE: {mae:.2f}')
print(f'XGBoost RMSE: {rmse:.2f}')

# 7. Plot
plt.figure(figsize=(12,5))
plt.plot(train.index, train['Close'], label='Train', color='skyblue')
plt.plot(test.index, y_test, label='Actual', color='black')
plt.plot(test.index, y_pred, label='XGBoost Predicted', color='blue')
plt.legend()
plt.title('XGBoost Forecast vs Actual')
plt.show()

# 8. Feature Importance
plt.figure(figsize=(10, 5))
plot_importance(model, importance_type="gain", max_num_features=15, height=0.5)
plt.title('Feature Importance (Gain)')
plt.show()


# 9. Print DataFrame Descriptions
print(X_test.describe())

