In [1]:
%%capture
!pip install koreanize-matplotlib
import koreanize_matplotlib

In [4]:
import pandas as pd
exchange = pd.read_csv('/content/exchange_final.csv')
m2 = pd.read_csv('/content/M2_2000_2024.CSV' , encoding='cp949')

In [5]:
# 1. 날짜 포맷 맞추기
exchange['TIME'] = pd.to_datetime(exchange['TIME']).dt.strftime('%Y-%m')

# 2. m2 인코딩해서 불러오기
m2 = pd.read_csv('/content/M2_2000_2024.CSV', encoding='cp949')  # 또는 euc-kr

# 3. 조인 (왼쪽 기준: exchange 기준으로 붙이기)
df = pd.merge(exchange, m2, on='TIME', how='left')

In [6]:
import random
import numpy as np
import tensorflow as tf
import os

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

In [7]:
# 변수 정의
features = ['경제 심리 지수', '한국 외환 보유액', 'vix', 'WTI 유가', '한국 실업률', '두바이 유가',
                        'Brent 유가', '천연가스 가격', '유연탄 가격', '미국 주가지수',
                        '한국 인플레이션율', '인플레이션 격차', '실질GDP성장률','M2',
                        '금 가격', '구리 가격', '아연 가격', '알루미늄 가격', '니켈 가격',
                        '외국인 투자 금액']

In [8]:
target = ['원/달러환율']

In [11]:
df_filled = df.interpolate(method='linear')
df_filled = df_filled.fillna(method='ffill').fillna(method='bfill')

  df_filled = df.interpolate(method='linear')
  df_filled = df_filled.fillna(method='ffill').fillna(method='bfill')


In [9]:
def create_sequences(X, y, n_steps):
    X_seq, y_seq = [], []
    for i in range(len(X) - n_steps):
        X_seq.append(X[i:i+n_steps])
        y_seq.append(y[i+n_steps])  # 타깃은 n_steps 뒤
    return np.array(X_seq), np.array(y_seq)

결측치 처리 + VIF 검사

In [15]:
import pandas as pd
from statsmodels.stats.outliers_influence import variance_inflation_factor

# 결측치 처리 (보간 + ffill + bfill)
df_filled = df.interpolate(method='linear')
df_filled = df_filled.fillna(method='ffill').fillna(method='bfill')

# VIF 계산 함수
def calculate_vif(df, thresh=20.0):
    variables = df.columns.tolist()
    while True:
        vif = pd.Series(
            [variance_inflation_factor(df[variables].values, i) for i in range(len(variables))],
            index=variables
        )
        max_vif = vif.max()
        if max_vif > thresh:
            drop_feature = vif.idxmax()
            print(f"❌ Removing '{drop_feature}' with VIF={max_vif:.2f}")
            variables.remove(drop_feature)
        else:
            break
    return df[variables]

# VIF 필터링
X_vif = calculate_vif(df_filled[features])

  df_filled = df.interpolate(method='linear')
  df_filled = df_filled.fillna(method='ffill').fillna(method='bfill')


❌ Removing 'Brent 유가' with VIF=3259.78
❌ Removing 'WTI 유가' with VIF=504.36
❌ Removing 'M2' with VIF=393.74
❌ Removing '알루미늄 가격' with VIF=304.36
❌ Removing '한국 외환 보유액' with VIF=220.92
❌ Removing '경제 심리 지수' with VIF=152.08
❌ Removing '구리 가격' with VIF=142.76
❌ Removing '미국 주가지수' with VIF=84.02
❌ Removing '아연 가격' with VIF=37.42
❌ Removing '금 가격' with VIF=29.76
❌ Removing '한국 실업률' with VIF=23.58


피처 선택법 3가지 적용

In [16]:
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import SequentialFeatureSelector
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.model_selection import train_test_split

# 타깃 준비
y = df_filled[target].values.flatten()

# 피처 준비
X_for_select = X_vif.copy()

# 임시 분할 (선택법용)
X_train_sel, _, y_train_sel, _ = train_test_split(X_for_select, y, test_size=0.2, shuffle=False)
model = LinearRegression()

# 전진 선택
sfs_forward = SequentialFeatureSelector(model, direction='forward', cv=5, scoring='neg_mean_squared_error')
sfs_forward.fit(X_train_sel, y_train_sel)
selected_forward_features = X_for_select.columns[sfs_forward.get_support()].tolist()

# 후진 제거
sfs_backward = SequentialFeatureSelector(model, direction='backward', cv=5, scoring='neg_mean_squared_error')
sfs_backward.fit(X_train_sel, y_train_sel)
selected_backward_features = X_for_select.columns[sfs_backward.get_support()].tolist()

# 단계 선택 (SFFS 방식)
sfs_stepwise = SFS(model, k_features='best', forward=True, floating=True, scoring='neg_mean_squared_error', cv=5)
sfs_stepwise.fit(X_train_sel, y_train_sel)
selected_stepwise_features = list(sfs_stepwise.k_feature_names_)

In [19]:
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def run_gru_with_selected_features(name, selected_features, df, target_col='원/달러환율', n_steps=12):
    print(f"\n🔷 [START] GRU for: {name.upper()} Features")

    # 1. 입력/타깃 설정
    X_raw = df[selected_features].values
    y_raw = df[[target_col]].values  # shape: (n, 1)

    # 2. 시퀀스 생성
    X_seq, y_seq = create_sequences(X_raw, y_raw, n_steps)

    # 3. 시계열 기반 분할
    split_index = int(len(X_seq) * 0.8)
    X_train, X_test = X_seq[:split_index], X_seq[split_index:]
    y_train, y_test = y_seq[:split_index], y_seq[split_index:]

    # 4. 정규화 (X)
    scaler_x = MinMaxScaler()
    X_train_2d = X_train.reshape(-1, X_train.shape[2])
    X_test_2d = X_test.reshape(-1, X_test.shape[2])

    X_train_scaled = scaler_x.fit_transform(X_train_2d).reshape(X_train.shape)
    X_test_scaled = scaler_x.transform(X_test_2d).reshape(X_test.shape)

    # 5. 정규화 (y)
    scaler_y = MinMaxScaler()
    y_train_scaled = scaler_y.fit_transform(y_train)
    y_test_scaled = scaler_y.transform(y_test)

    # 6. GRU 모델 구성
    model = Sequential([
        GRU(64, input_shape=(n_steps, X_train.shape[2])),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])

    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    model.fit(
        X_train_scaled, y_train_scaled,
        validation_split=0.2,
        epochs=100,
        batch_size=16,
        callbacks=[early_stop],
        verbose=0
    )

    # 7. 예측 및 역변환
    y_pred_scaled = model.predict(X_test_scaled)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)
    y_true = scaler_y.inverse_transform(y_test_scaled)

    # 8. 평가
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    print(f"✅ {name.upper()} 결과 | RMSE: {rmse:.4f}, MAE: {mae:.4f}, R²: {r2:.4f}")
    return {'features': selected_features, 'rmse': rmse, 'mae': mae, 'r2': r2}

In [20]:
results = []

results.append(run_gru_with_selected_features('forward', selected_forward_features, df_filled))
results.append(run_gru_with_selected_features('backward', selected_backward_features, df_filled))
results.append(run_gru_with_selected_features('stepwise', selected_stepwise_features, df_filled))


🔷 [START] GRU for: FORWARD Features


  super().__init__(**kwargs)


[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 210ms/step



[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 256ms/step
✅ FORWARD 결과 | RMSE: 193.2498, MAE: 167.5234, R²: -3.2892

🔷 [START] GRU for: BACKWARD Features


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 221ms/step
✅ BACKWARD 결과 | RMSE: 153.0789, MAE: 129.7754, R²: -1.6913

🔷 [START] GRU for: STEPWISE Features


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 218ms/step
✅ STEPWISE 결과 | RMSE: 135.9214, MAE: 106.3702, R²: -1.1218
