In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, LSTM, Dense
import plotly.graph_objects as go
from datetime import timedelta

# === Step 1: 加载并处理数据 ===
df = pd.read_csv("sales_data.csv")
df['Date'] = pd.to_datetime(df['Date'])

train_df = df[(df['Date'] >= '2022-01-01') & (df['Date'] <= '2023-12-31')]
test_df = df[(df['Date'] >= '2024-01-01') & (df['Date'] <= '2024-12-31')]

train_demand = train_df.groupby('Date')['Demand'].sum().reset_index()
test_demand = test_df.groupby('Date')['Demand'].sum().reset_index()

# === Step 2: 归一化 ===
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train_demand['Demand'].values.reshape(-1, 1))
test_scaled = scaler.transform(test_demand['Demand'].values.reshape(-1, 1))

# === Step 3: 构建 LSTM 数据集 ===
def create_dataset(data, time_step=30):
    X, y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:i+time_step, 0])
        y.append(data[i+time_step, 0])
    return np.array(X), np.array(y)

time_step = 30
X_train, y_train = create_dataset(train_scaled, time_step)
combined = np.concatenate((train_scaled[-time_step:], test_scaled), axis=0)
X_test, y_test = create_dataset(combined, time_step)

X_train = X_train.reshape(-1, time_step, 1)
X_test = X_test.reshape(-1, time_step, 1)

# === Step 4: 构建模型 ===
model = Sequential([
    Input(shape=(time_step, 1)),
    LSTM(64),
    Dense(1)
])
model.compile(optimizer='adam', loss='mse')
model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=1)

# === Step 5: 模型预测 ===
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

train_predict_inv = scaler.inverse_transform(train_predict)
y_train_inv = scaler.inverse_transform(y_train.reshape(-1, 1))
test_predict_inv = scaler.inverse_transform(test_predict)
y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))

# === Step 6: 滚动预测未来 30 天 ===
def predict_next_days(model, last_sequence, days=30):
    pred_list = []
    input_seq = last_sequence.copy()
    for _ in range(days):
        pred = model.predict(input_seq.reshape(1, time_step, 1), verbose=0)[0, 0]
        pred_list.append(pred)
        input_seq = np.append(input_seq[1:], [[pred]], axis=0)
    return np.array(pred_list).reshape(-1, 1)

last_input = combined[-time_step:].reshape(-1, 1)
future_preds = predict_next_days(model, last_input, days=30)
future_preds_inv = scaler.inverse_transform(future_preds)

# === Step 7: 构造日期索引 ===
train_dates = train_demand['Date'][time_step:].reset_index(drop=True)
if len(test_demand) > time_step:
    test_dates = test_demand['Date'][time_step:].reset_index(drop=True)
else:
    test_dates = pd.date_range(start=test_demand['Date'].min(), periods=len(y_test_inv))

last_date = test_demand['Date'].max()
future_dates = [last_date + timedelta(days=i) for i in range(1, 31)]

# === Step 8: 使用 Plotly 可视化 ===
fig = go.Figure()

# 训练集：实际 + 预测（橙色虚线）
fig.add_trace(go.Scatter(
    x=train_dates, y=y_train_inv.flatten(),
    name="Train Actual", line=dict(color="blue")
))
fig.add_trace(go.Scatter(
    x=train_dates, y=train_predict_inv.flatten(),
    name="Train Predicted", line=dict(color="orange", dash="dash")
))

# 测试集：实际 + 预测（红色虚线）
fig.add_trace(go.Scatter(
    x=test_dates, y=y_test_inv.flatten(),
    name="Test Actual", line=dict(color="green")
))
fig.add_trace(go.Scatter(
    x=test_dates, y=test_predict_inv.flatten(),
    name="Test Predicted", line=dict(color="red", dash="dash")
))

# 未来预测（深红色点状线）
fig.add_trace(go.Scatter(
    x=future_dates, y=future_preds_inv.flatten(),
    name="Future Forecast (30d)", line=dict(color="darkred", dash="dot")
))

# 布局美化
fig.update_layout(
    title="LSTM Demand Forecasting: Train (2022-23), Test (2024), and Future (30d)",
    xaxis_title="Date",
    yaxis_title="Demand",
    hovermode="x unified",
    legend=dict(x=0.01, y=0.99),
    template="plotly_white"
)

fig.show()
