In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.optimizers import Adam

train_data = pd.read_csv('train_data.csv')
test_data = pd.read_csv('test_data.csv')

features = ['season', 'yr', 'mnth', 'hr', 'holiday', 'weekday', 'workingday',
            'weathersit', 'temp', 'atemp', 'hum', 'windspeed', 'cnt']


from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
train_scaled = pd.DataFrame(scaler.fit_transform(train_data[features]), columns=features)
test_scaled = pd.DataFrame(scaler.transform(test_data[features]), columns=features)

def create_sequences(data, target, input_len, output_len):
    """
    生成滑动窗口输入数据和目标值
    """
    x, y = [], []
    for i in range(len(data) - input_len - output_len):
        x.append(data[i:i + input_len])
        y.append(target[i + input_len:i + input_len + output_len])
    return np.array(x), np.array(y)

input_len = 96 
output_len = 96 

x_train, y_train = create_sequences(train_scaled[features[:-1]].values, train_scaled['cnt'].values, input_len, output_len)
x_test, y_test = create_sequences(test_scaled[features[:-1]].values, test_scaled['cnt'].values, input_len, output_len)

print("x_train shape:", x_train.shape)  
print("y_train shape:", y_train.shape)  

In [11]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, LayerNormalization, MultiHeadAttention, Dropout, Layer
from tensorflow.keras.models import Model

# Transformer 块
class TransformerBlock(Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.attention = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential([
            Dense(ff_dim, activation="relu"), 
            Dense(embed_dim)
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.attention(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

# 位置编码
class PositionalEncoding(Layer):
    def __init__(self, sequence_len, embed_dim):
        super(PositionalEncoding, self).__init__()
        self.pos_encoding = self.get_positional_encoding(sequence_len, embed_dim)

    def get_positional_encoding(self, sequence_len, embed_dim):
        position = np.arange(sequence_len)[:, np.newaxis]
        div_term = np.exp(np.arange(0, embed_dim, 2) * -(np.log(10000.0) / embed_dim))
        pos_encoding = np.zeros((sequence_len, embed_dim))
        pos_encoding[:, 0::2] = np.sin(position * div_term)
        pos_encoding[:, 1::2] = np.cos(position * div_term)
        return tf.constant(pos_encoding, dtype=tf.float32)

    def call(self, inputs):
        return inputs + self.pos_encoding

# Transformer 模型
def build_transformer_model(input_len, feature_dim, output_len, embed_dim, num_heads, ff_dim, num_layers, dropout=0.1):
    inputs = Input(shape=(input_len, feature_dim))
    x = Dense(embed_dim)(inputs)
    x = PositionalEncoding(input_len, embed_dim)(x)

    for _ in range(num_layers):
        x = TransformerBlock(embed_dim, num_heads, ff_dim, dropout)(x, training=True)
    outputs = Dense(1)(x)  
    outputs = tf.keras.layers.Reshape((output_len,))(outputs[:, -output_len:]) 

    return Model(inputs, outputs)

In [None]:
# ===========================
# 构建 LSTM 模型
# ===========================
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.optimizers import Adam

mse_scores = []
mae_scores = []
best_history = None  
best_val_loss = float('inf')  

num_experiments = 10

for i in range(num_experiments):
    print(f"Starting Experiment {i + 1}/{num_experiments}...")

    model = build_transformer_model(input_len=input_len, feature_dim=len(features) - 1, output_len=output_len,
                                        embed_dim=64, num_heads=4, ff_dim=128, num_layers=4, dropout=0.2)

    
    lr_schedule = ExponentialDecay(
        initial_learning_rate=0.001,  # 初始学习率
        decay_steps=10000,           # 每隔多少步衰减
        decay_rate=0.9,              # 衰减率
        staircase=True               # 是否阶梯衰减（True 为每次整数步衰减一次）
    )

    optimizer = Adam(learning_rate=lr_schedule)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])

    early_stopping = EarlyStopping(
        monitor='val_loss',  
        patience=20,        
        restore_best_weights=True  
    )


    history = model.fit(
        x_train, y_train,
        validation_data=(x_test, y_test),
        epochs=100,            
        batch_size=64,
        callbacks=[early_stopping],  
        verbose=1
    )

    current_min_val_loss = min(history.history['val_loss'])
    if current_min_val_loss < best_val_loss:
        best_val_loss = current_min_val_loss
        best_history = history 
 
    mse, mae = model.evaluate(x_test, y_test, verbose=0)
    print(f"Experiment {i + 1} - MSE: {mse:.4f}, MAE: {mae:.4f}")
    
    # 保存结果
    mse_scores.append(mse)
    mae_scores.append(mae)

# 计算均值和标准差
mse_mean = np.mean(mse_scores)
mse_std = np.std(mse_scores)
mae_mean = np.mean(mae_scores)
mae_std = np.std(mae_scores)

print("\nFinal Results:")
print("MSE Scores:", mse_scores)
print(f"Mean MSE: {mse_mean:.4f}, Std MSE: {mse_std:.4f}")
print("MAE Scores:", mae_scores)
print(f"Mean MAE: {mae_mean:.4f}, Std MAE: {mae_std:.4f}")

In [None]:
import matplotlib.pyplot as plt

# 绘制箱线图
plt.figure(figsize=(8, 4))
plt.boxplot([mse_scores, mae_scores], labels=["MSE", "MAE"])
plt.title("Model Performance Across Experiments")
plt.ylabel("Score")
plt.show()

In [None]:
history = best_history

epochs = len(history.history['loss'])
# 绘制训练损失曲线
plt.figure(figsize=(10, 4))
plt.plot(range(epochs), history.history['loss'], label='Training Loss', linewidth=2)
# plt.plot(range(epochs), history.history['val_loss'], label='Validation Loss', linewidth=2)
plt.title('Loss over Epochs', fontsize=18)
plt.xlabel('Epoch', fontsize=14)
plt.ylabel('Loss', fontsize=14)
plt.legend(fontsize=12)
plt.show()

In [None]:
y_pred = model.predict(x_test)

final_predictions = np.zeros_like(test_scaled['cnt'].values)
counts = np.zeros_like(test_scaled['cnt'].values)


for i in range(len(y_pred)):
    start_idx = i + input_len 
    end_idx = start_idx + output_len  
    for j in range(output_len):
        final_predictions[start_idx + j] += y_pred[i][j]
        counts[start_idx + j] += 1
final_predictions /= counts

true_values = test_scaled['cnt'].values[input_len:] 
predicted_values = final_predictions[input_len:]


time_st = 250
time_ed = time_st + 250
true_values_ = true_values[time_st:time_ed]
predicted_values_ = predicted_values[time_st:time_ed]


# 本地保存真实值和对比值
file = open("tranaformer/tp1.txt", "w")
for i in range(len(true_values_)):
    file.write(str(true_values_[i]) + " " + str(predicted_values_[i]) + "\n")
file.close()

plt.figure(figsize=(12, 6))
plt.plot(true_values_, label="Ground Truth", linestyle='-', marker='o', markersize=3, alpha=0.7, color='b',linewidth=1.5)
plt.plot(predicted_values_, label="Prediction", linestyle='--', marker='x', markersize=4, alpha=1, color='r',linewidth=2)
plt.legend()
plt.xlabel("Time Step", fontsize=14)
plt.ylabel("Rental Count", fontsize=14)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.title("Overall Bike Rental Prediction", fontsize=16)
plt.show()