In [None]:
import xgboost as xgb
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import warnings
warnings.filterwarnings('ignore')

# 1. 데이터 로딩
df = pd.read_csv('통합된_수출_데이터셋.csv', parse_dates=['날짜'])
df.set_index('날짜', inplace=True)

# 2. Lag 및 파생변수 생성
df['lag_1'] = df['수출량'].shift(1)
df['lag_2'] = df['수출량'].shift(2)
df['rolling_3'] = df['수출량'].rolling(window=3).mean().shift(1)
df['diff_1'] = df['수출량'].diff(1).shift(1)

# 3. 결측치 제거
df = df.dropna()

# 4. 특성과 타겟 분리
X = df.drop(columns=['수출량'])
y = df['수출량']

# 5. 시계열 순서 유지한 채 분할
split_idx = int(len(df) * 0.8)
X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

# 6. XGBoost DMatrix 변환
dtrain = xgb.DMatrix(data=X_train, label=y_train)
dtest = xgb.DMatrix(data=X_test, label=y_test)

# 7. 모델 파라미터 설정
params = {
    'max_depth': 3,
    'eta': 0.1,
    'objective': 'reg:squarederror',
    'eval_metric': 'rmse'
}
num_rounds = 300
watchlist = [(dtrain, 'train'), (dtest, 'eval')]

# 8. 모델 학습
model = xgb.train(params, dtrain, num_boost_round=num_rounds,
                  early_stopping_rounds=30, evals=watchlist)

# 9. 예측
preds = model.predict(dtest)

# 10. 평가
def evaluate(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    print(f'MAE: {mae:.2f}, RMSE: {rmse:.2f}, R2: {r2:.4f}')

evaluate(y_test, preds)
