In [5]:
# 0. Import all libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt

# 1. Import data
file_path = './data/merged_data.csv'
data = pd.read_csv(file_path)

data['Date'] = pd.to_datetime(data['Date'])
data = data.sort_values(by='Date', ascending=True)

# train_set = data[(data['Date'] >= '2014-06-24') & (data['Date'] < '2023-06-24')].copy()
# valid_set = data[(data['Date'] >= '2022-06-25') & (data['Date'] <= '2024-06-21')].copy()


total_length = len(data)
train_length = int(total_length * 0.8)
train_set = data.iloc[:train_length].copy()
valid_set = data.iloc[train_length:].copy()


# print("Training set date range:", train_set['Date'].min(), "to", train_set['Date'].max())
# print("Validation set date range:", valid_set['Date'].min(), "to", valid_set['Date'].max())



# 2. Setup Variables
time_steps = 90
future_steps = 10  # Number of future time steps to predict

# 3. Data pre-processing
scaler = MinMaxScaler(feature_range=(0, 1))
train_data = scaler.fit_transform(train_set['Close/Last'].values.reshape(-1, 1))
valid_data = scaler.transform(valid_set['Close/Last'].values.reshape(-1, 1))

# Create training and testing dataset
def create_dataset(data, time_steps, future_steps):
    x, y = [], []
    for i in range(time_steps, len(data) - future_steps + 1):
        x.append(data[i - time_steps:i, 0])
        y.append(data[i:i + future_steps, 0])
    return np.array(x), np.array(y)

x_train, y_train = create_dataset(train_data, time_steps, future_steps)
x_validation, y_validation = create_dataset(valid_data, time_steps, future_steps)

# Reshape for LSTM input
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_validation = np.reshape(x_validation, (x_validation.shape[0], x_validation.shape[1], 1))


# 调整y_train的形状以匹配模型输出
y_train = np.reshape(y_train, (y_train.shape[0], future_steps, 1))



# 4. Build Model
def build_model():
    model = Sequential()
    model.add(LSTM(units=50, return_sequences=True, input_shape=(time_steps, 1)))
    model.add(Dropout(0.2))
    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.2))
    # model.add(LSTM(units=50, return_sequences=False))
    # model.add(Dropout(0.2))
    model.add(Dense(units=future_steps))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# 5. Training
def train_model(model, x_train, y_train):
    model.fit(x_train, y_train, epochs=25, batch_size=32)

# 6. Make Multi-Step Prediction
def make_multi_step_prediction(model, x_valid, scaler):
    # last_data = data[-time_steps:]
    # last_data = np.reshape(last_data, (1, last_data.shape[0], 1))
    # 
    predicted_y_valid_normalized = model.predict(x_valid)
    predicted_y_valid = scaler.inverse_transform(predicted_y_valid_normalized.reshape(-1, 1))
    return predicted_y_valid

# Execute steps 4, 5, 6
model = build_model()
train_model(model, x_train, y_train)

# Predict future 10 days' Close/Last prices
predicted_y_valid = make_multi_step_prediction(model, x_validation, scaler)

# Find the last date in the training set
last_date = train_set['Date'].max()
print(f"last_date:{last_date}")

future_dates = []
print(f"Predicted Close Prices for the next {future_steps} days:")
for i, price in enumerate(predicted_y_valid, 1):
    future_date = (last_date + pd.offsets.BDay(i+1)).strftime('%Y-%m-%d')
    future_date = pd.to_datetime(future_date)
    future_dates.append(future_date)
    print(f"Day {i+1} ({future_date}): {predicted_y_valid[0][i]}")


# Plotting function
def plot_historical_and_predicted(train_set, predicted_prices, future_dates, future_steps):
    plt.figure(figsize=(14, 7))
    plt.plot(train_set['Date'], train_set['Close/Last'], label='Historical Close Price')
    plt.plot(future_dates, predicted_prices, color='red', label='Predicted Prices')
    plt.title(f'Historical Close Price with Predicted Prices for next {future_steps} days')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.legend()
    plt.grid(True)
    plt.show()

plot_historical_and_predicted(train_set, predicted_y_valid, future_dates, future_steps=future_steps)


Epoch 1/25


  super().__init__(**kwargs)


ValueError: Dimensions must be equal, but are 10 and 90 for '{{node compile_loss/mean_squared_error/sub}} = Sub[T=DT_FLOAT](data_1, sequential_4_1/dense_4_1/Add)' with input shapes: [?,10,1], [?,90,10].

In [None]:
data