In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib.pyplot as plt

# 📌 설정
LOOKBACK = 30
START_DATE = "2005-01-01"
END_DATE = "2025-02-28"

# 📌 데이터 불러오기 및 날짜 필터링
df_all = pd.read_csv("/content/new_0418.csv")
df_all["DATE"] = pd.to_datetime(df_all["DATE"])
df_all = df_all[(df_all["DATE"] >= START_DATE) & (df_all["DATE"] <= END_DATE)].dropna().sort_values("DATE").reset_index(drop=True)

# 📌 타겟 로그 변환
df_all["Target"] = np.log1p(df_all["원/미국달러(매매기준율)"])

# 📌 시퀀스 생성 함수
def create_sequences(data, target, lookback=30):
    X, y = [], []
    for i in range(len(data) - lookback):
        X.append(data[i:i+lookback])
        y.append(target[i+lookback])
    return np.array(X), np.array(y)

In [None]:
# 전체 피처 숫자형 변환 (쉼표 제거 → float로 변환)
for col in features_all:
    df_all[col] = (
        df_all[col]
        .astype(str)              # 문자열로 변환
        .str.replace(",", "")     # 쉼표 제거
        .astype(float)            # float 변환
    )

In [None]:
# 전체 피처 구성
features_all = df_all.columns.drop(["DATE", "Target"])

# 스케일링
scaler_all = MinMaxScaler()
scaled_all = scaler_all.fit_transform(df_all[features_all])

# 시퀀스 생성
X_all, y_all = create_sequences(scaled_all, df_all["Target"], LOOKBACK)
dates_all = df_all["DATE"][LOOKBACK:].reset_index(drop=True)
train_idx_all = dates_all <= "2021-12-31"

# 훈련/테스트 분리
X_train_all, X_test_all = X_all[train_idx_all], X_all[~train_idx_all]
y_train_all, y_test_all = y_all[train_idx_all], y_all[~train_idx_all]

# 모델 정의 및 학습
model_all = Sequential([LSTM(64, input_shape=(LOOKBACK, X_all.shape[2])), Dense(1)])
model_all.compile(optimizer='adam', loss='mse')
model_all.fit(X_train_all, y_train_all, epochs=30, batch_size=32, validation_split=0.1, verbose=0)

# 예측 및 역변환
y_pred_all_log = model_all.predict(X_test_all)
y_pred_all = np.expm1(y_pred_all_log.flatten())
y_true_all = np.expm1(y_test_all)

# 평가 지표
rmse_all = np.sqrt(mean_squared_error(y_true_all, y_pred_all))
mae_all = mean_absolute_error(y_true_all, y_pred_all)
r2_all = r2_score(y_true_all, y_pred_all)

  super().__init__(**kwargs)


[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step


In [None]:
selected_features = [
    'KOSPI','기준금리','수출물가지수(원화기준)','수입물가지수(원화기준)','PPI',
    '한국실업률','금','은','원유','위안화/달러','NASDAQ','외환보유액(천달러)',
    '미국PPI','뉴스심리지수','대외채무','본원 통화(달러 발행량)',
    'CPI','장단기금리차','GDP_GROWTH(%)','원/미국달러(매매기준율)'
]

scaler_sel = MinMaxScaler()
scaled_sel = scaler_sel.fit_transform(df_all[selected_features])
X_sel, y_sel = create_sequences(scaled_sel, df_all["Target"], LOOKBACK)
dates_sel = df_all["DATE"][LOOKBACK:].reset_index(drop=True)
train_idx_sel = dates_sel <= "2021-12-31"

X_train_sel, X_test_sel = X_sel[train_idx_sel], X_sel[~train_idx_sel]
y_train_sel, y_test_sel = y_sel[train_idx_sel], y_sel[~train_idx_sel]

model_sel = Sequential([LSTM(64, input_shape=(LOOKBACK, X_sel.shape[2])), Dense(1)])
model_sel.compile(optimizer='adam', loss='mse')
model_sel.fit(X_train_sel, y_train_sel, epochs=30, batch_size=32, validation_split=0.1, verbose=0)

y_pred_sel = np.expm1(model_sel.predict(X_test_sel).flatten())
y_true_sel = np.expm1(y_test_sel)
rmse_sel = np.sqrt(mean_squared_error(y_true_sel, y_pred_sel))
mae_sel = mean_absolute_error(y_true_sel, y_pred_sel)
r2_sel = r2_score(y_true_sel, y_pred_sel)

KeyError: "['장단기금리차'] not in index"

In [None]:
shap_features = [
    'PPI','본원 통화(달러 발행량)','수출물가지수(원화기준)','NASDAQ','미국PPI',
    'CPI','KOSPI','외환보유액(천달러)','원유','대외채무','원/미국달러(매매기준율)'
]

scaler_shap = MinMaxScaler()
scaled_shap = scaler_shap.fit_transform(df_all[shap_features])
X_shap, y_shap = create_sequences(scaled_shap, df_all["Target"], LOOKBACK)
dates_shap = df_all["DATE"][LOOKBACK:].reset_index(drop=True)
train_idx_shap = dates_shap <= "2021-12-31"

X_train_shap, X_test_shap = X_shap[train_idx_shap], X_shap[~train_idx_shap]
y_train_shap, y_test_shap = y_shap[train_idx_shap], y_shap[~train_idx_shap]

model_shap = Sequential([LSTM(64, input_shape=(LOOKBACK, X_shap.shape[2])), Dense(1)])
model_shap.compile(optimizer='adam', loss='mse')
model_shap.fit(X_train_shap, y_train_shap, epochs=30, batch_size=32, validation_split=0.1, verbose=0)

y_pred_shap = np.expm1(model_shap.predict(X_test_shap).flatten())
y_true_shap = np.expm1(y_test_shap)
rmse_shap = np.sqrt(mean_squared_error(y_true_shap, y_pred_shap))
mae_shap = mean_absolute_error(y_true_shap, y_pred_shap)
r2_shap = r2_score(y_true_shap, y_pred_shap)

In [None]:
labels = ['전체 피처', '선택 피처', 'SHAP 피처']
rmses = [rmse_all, rmse_sel, rmse_shap]
maes = [mae_all, mae_sel, mae_shap]
r2s = [r2_all, r2_sel, r2_shap]

x = np.arange(len(labels))
width = 0.25

plt.figure(figsize=(12, 6))
plt.bar(x - width, rmses, width, label='RMSE', color='red')
plt.bar(x, maes, width, label='MAE', color='blue')
plt.bar(x + width, r2s, width, label='R²', color='green')

for i in range(len(labels)):
    plt.text(x[i] - width, rmses[i] + 2, f'{rmses[i]:.1f}', ha='center')
    plt.text(x[i], maes[i] + 2, f'{maes[i]:.1f}', ha='center')
    plt.text(x[i] + width, r2s[i] + 0.02, f'{r2s[i]:.2f}', ha='center')

plt.xticks(x, labels)
plt.title("📊 LSTM 모델 성능 비교")
plt.ylabel("Score")
plt.legend()
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()