In [None]:
# lstm과 lstm + prophet을 사용한 예측 결과 비교를 위한 코드
# 임의 생성 시계열 데이터 기준

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt

# (Prophet이 없다면 아래처럼 sklearn을 사용해 추세+계절성 모델 대체)
try:
    from prophet import Prophet
    use_prophet = True
except ImportError:
    from sklearn.linear_model import LinearRegression
    use_prophet = False

In [2]:
# 1) Synthetic series 생성 (change-points + monthly seasonality)
np.random.seed(42)
length = 200
dates = pd.date_range('2020-01-01', periods=length, freq='D')
t = np.arange(length)
trend = np.piecewise(
    t,
    [t < 60, (t >= 60)&(t<130), t>=130],
    [lambda x: 0.2*x,
     lambda x: 0.2*60 + 0.5*(x-60),
     lambda x: 0.2*60 + 0.5*70 - 0.1*(x-130)]
)
season = 5 * np.sin(2*np.pi*t/30)
noise = np.random.normal(0,1,length)
y = trend + season + noise

In [3]:
# 학습/테스트 분할
train_len = 150
y_train, y_test = y[:train_len], y[train_len:]
dates_test = dates[train_len:]

In [28]:
# 2) Prophet or 대체 모델로 기본예측(base_pred) 생성
# if use_prophet:
#     df = pd.DataFrame({'ds': dates, 'y': y})
#     m = Prophet(weekly_seasonality=False, yearly_seasonality=False, daily_seasonality=False)
#     m.add_seasonality(name='monthly', period=30, fourier_order=5)
#     m.fit(df.iloc[:train_len])
#     future = m.make_future_dataframe(periods=length-train_len)
#     fc = m.predict(future)
#     base_pred = fc['yhat'].values[train_len:]
# else:
# 선형회귀 + sin/cos 대체
from sklearn.linear_model import LinearRegression
X = np.column_stack([t, np.sin(2*np.pi*t/30), np.cos(2*np.pi*t/30)])
lr = LinearRegression().fit(X[:train_len], y_train)
base_pred = lr.predict(X)[:train_len]

In [30]:
# 3) LSTM 학습 준비 함수
def create_dataset(series, window):
    X, Y = [], []
    for i in range(len(series)-window):
        X.append(series[i:i+window])
        Y.append(series[i+window])
    return np.array(X), np.array(Y)

window = 30
# 3a) 단순 LSTM on raw y_train
X_raw, Y_raw = create_dataset(y_train, window)
# 3b) 하이브리드용: 잔차 학습
residual = y_train - base_pred
# residual = y_train - (base_pred if use_prophet else lr.predict(np.column_stack([t, np.sin(2*np.pi*t/30), np.cos(2*np.pi*t/30)]))[:train_len])
X_res, Y_res   = create_dataset(residual, window)

def to_loader(X, Y):
    X_t = torch.tensor(X, dtype=torch.float32).unsqueeze(-1)
    Y_t = torch.tensor(Y, dtype=torch.float32).unsqueeze(-1)
    return DataLoader(TensorDataset(X_t, Y_t), batch_size=16, shuffle=True)

loader_raw = to_loader(X_raw, Y_raw)
loader_res = to_loader(X_res, Y_res) 

# 4) LSTM 모델 정의
class LSTMModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(1, 16, batch_first=True)
        self.fc   = nn.Linear(16, 1)
    def forward(self, x):
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :])

In [None]:
base_pred의 의미가 정확히 뭔가?

In [10]:
y_train.shape

(150,)

In [8]:
(base_pred if use_prophet else lr.predict(np.column_stack([t, np.sin(2*np.pi*t/30), np.cos(2*np.pi*t/30)]))[:train_len]).shape

(50,)

In [31]:
# 5) 단순 LSTM 학습
model_raw = LSTMModel()
opt_raw = torch.optim.Adam(model_raw.parameters(), lr=0.01)
loss_fn = nn.MSELoss()
for _ in range(100):
    for xb, yb in loader_raw:
        opt_raw.zero_grad()
        loss_fn(model_raw(xb), yb).backward()
        opt_raw.step()

In [32]:
# 6) 잔차 LSTM 학습
model_res = LSTMModel()
opt_res  = torch.optim.Adam(model_res.parameters(), lr=0.01)
for _ in range(100):
    for xb, yb in loader_res:
        opt_res.zero_grad()
        loss_fn(model_res(xb), yb).backward()
        opt_res.step()

In [33]:
# 7) 예측: 단순 LSTM - test 값을 알고 싶은거고
simple_preds = []
seq = y_train.copy()
for _ in range(len(y_test)):
    xin = torch.tensor(seq[-window:], dtype=torch.float32).unsqueeze(0).unsqueeze(-1)
    with torch.no_grad():
        p = model_raw(xin).item()
    simple_preds.append(p)
    seq = np.append(seq, p)

In [34]:
# 8) 예측: 하이브리드 LSTM+base
res_preds = []
seq_res   = residual.copy()
for _ in range(len(y_test)):
    xin = torch.tensor(seq_res[-window:], dtype=torch.float32).unsqueeze(0).unsqueeze(-1)
    with torch.no_grad():
        p = model_res(xin).item()
    res_preds.append(p)
    seq_res = np.append(seq_res, p)
hybrid_preds = base_pred + np.array(res_preds)

ValueError: operands could not be broadcast together with shapes (150,) (50,) 

In [None]:
# 9) MSE 비교
mse_raw    = np.mean((y_test - simple_preds)**2)
mse_hybrid = np.mean((y_test - hybrid_preds)**2)

In [None]:
# 10) 결과 플롯
plt.figure()
plt.plot(dates_test, y_test,      label='Actual')
plt.plot(dates_test, simple_preds, label='Simple LSTM')
plt.plot(dates_test, hybrid_preds, label='Hybrid LSTM+Base')
plt.legend()
plt.show()

print(f'MSE Simple LSTM   : {mse_raw:.3f}')
print(f'MSE Hybrid (L+P)   : {mse_hybrid:.3f}')