In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt

train_data = pd.read_csv('train_data.csv')
test_data = pd.read_csv('test_data.csv')

features = ['season', 'yr', 'mnth', 'hr', 'holiday', 'weekday', 'workingday', 
            'weathersit', 'temp', 'atemp', 'hum', 'windspeed', 'cnt']

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
train_scaled = pd.DataFrame(scaler.fit_transform(train_data[features]), columns=features)
test_scaled = pd.DataFrame(scaler.transform(test_data[features]), columns=features)

def create_sequences(data, target, input_len, output_len):
    """
    生成滑动窗口输入数据和目标值
    """
    x, y = [], []
    for i in range(len(data) - input_len - output_len):
        x.append(data[i:i + input_len])
        y.append(target[i + input_len:i + input_len + output_len])
    return np.array(x), np.array(y)

input_len = 96  # 输入时间步
output_len = 96  # 输出时间步

# 除去 cnt 列作为输入特征
x_train, y_train = create_sequences(train_scaled[features[:-1]].values, train_scaled['cnt'].values, input_len, output_len)
x_test, y_test = create_sequences(test_scaled[features[:-1]].values, test_scaled['cnt'].values, input_len, output_len)

print("x_train shape:", x_train.shape)  # (样本数, 时间步, 特征数)
print("y_train shape:", y_train.shape)  # (样本数, 输出时间步)

In [None]:
# ===========================
# 构建 LSTM 模型
# ===========================
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.optimizers import Adam

mse_scores = []
mae_scores = []
best_history = None 
best_val_loss = float('inf') 

num_experiments = 10 

for i in range(num_experiments):
    print(f"Starting Experiment {i + 1}/{num_experiments}...")
   
    model = models.Sequential([
        layers.Input(shape=(input_len, len(features) - 1)),
        layers.LSTM(128, return_sequences=True, activation='tanh'),
        layers.Dropout(0.1),
        layers.LSTM(128, return_sequences=True, activation='tanh'),
        layers.Dropout(0.1),
        layers.LSTM(256, activation='tanh'),
        layers.Dense(output_len)
    ])
    
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=20,
        restore_best_weights=True
    )
    
    history = model.fit(
        x_train, y_train,
        validation_data=(x_test, y_test),
        epochs=100,
        batch_size=64,
        callbacks=[early_stopping],
        verbose=1
    )
    current_min_val_loss = min(history.history['val_loss'])
    if current_min_val_loss < best_val_loss:
        best_val_loss = current_min_val_loss
        best_history = history
    mse, mae = model.evaluate(x_test, y_test, verbose=0)
    print(f"Experiment {i + 1} - MSE: {mse:.4f}, MAE: {mae:.4f}")
    
    mse_scores.append(mse)
    mae_scores.append(mae)

mse_mean = np.mean(mse_scores)
mse_std = np.std(mse_scores)
mae_mean = np.mean(mae_scores)
mae_std = np.std(mae_scores)

print("\nFinal Results:")
print("MSE Scores:", mse_scores)
print(f"Mean MSE: {mse_mean:.4f}, Std MSE: {mse_std:.4f}")
print("MAE Scores:", mae_scores)
print(f"Mean MAE: {mae_mean:.4f}, Std MAE: {mae_std:.4f}")

In [None]:
temp = models.Sequential([
        layers.Input(shape=(input_len, len(features) - 1)),
        layers.LSTM(128, return_sequences=True, activation='tanh'),
        layers.Dropout(0.1),
        layers.LSTM(128, return_sequences=True, activation='tanh'),
        layers.Dropout(0.1),
        layers.LSTM(256, activation='tanh'),
        layers.Dense(output_len)
    ])
temp.summary()

In [None]:
import matplotlib.pyplot as plt

# 绘制箱线图
plt.figure(figsize=(8, 4))
plt.boxplot([mse_scores, mae_scores], labels=["MSE", "MAE"])
plt.title("Model Performance Across Experiments")
plt.ylabel("Score")
plt.show()

In [None]:
history = best_history

epochs = len(history.history['loss'])
plt.figure(figsize=(10, 4))
plt.plot(range(epochs), history.history['loss'], label='Training Loss', linewidth=2)
# plt.plot(range(epochs), history.history['val_loss'], label='Validation Loss', linewidth=2)
plt.title('Loss over Epochs', fontsize=18)
plt.xlabel('Epoch', fontsize=14)
plt.ylabel('Loss', fontsize=14)
plt.legend(fontsize=12)
plt.show()

In [None]:
y_pred = model.predict(x_test)

final_predictions = np.zeros_like(test_scaled['cnt'].values)
counts = np.zeros_like(test_scaled['cnt'].values)
for i in range(len(y_pred)):
    start_idx = i + input_len 
    end_idx = start_idx + output_len 
    for j in range(output_len):
        final_predictions[start_idx + j] += y_pred[i][j]
        counts[start_idx + j] += 1
final_predictions /= counts


true_values = test_scaled['cnt'].values[input_len:] 
predicted_values = final_predictions[input_len:]  # 去掉前input_len没有预测的部分

time_st = 250
time_ed = time_st + 250
true_values_ = true_values[time_st:time_ed]
predicted_values_ = predicted_values[time_st:time_ed]

file = open("lstm/tp.txt", "w")
for i in range(len(true_values_)):
    file.write(str(true_values_[i]) + " " + str(predicted_values_[i]) + "\n")
file.close()

plt.figure(figsize=(12, 6))
plt.plot(true_values_, label="Ground Truth", linestyle='-', marker='o', markersize=3, alpha=0.7, color='b',linewidth=1.5)
plt.plot(predicted_values_, label="Prediction", linestyle='--', marker='x', markersize=4, alpha=1, color='r',linewidth=2)
plt.legend()
plt.xlabel("Time Step", fontsize=14)
plt.ylabel("Rental Count", fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.title("Overall Bike Rental Prediction", fontsize=16)
plt.show()