In [None]:
!pip install -q tensorflow

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


In [None]:
# Load dataset
df = pd.read_excel("usask.sec.min_short_v2.xlsx")
df.columns = ['minute', 'requests']
df['minute'] = pd.to_datetime(df['minute'], unit='m', origin='unix')
df.rename(columns={'minute': 'ds', 'requests': 'y'}, inplace=True)
df['ds'] = pd.to_datetime(df['ds'])
df = df.sort_values('ds')
df.tail()


In [None]:
from prophet import Prophet

# Use Prophet to generate real forecasts
prophet_df = df[['ds', 'y']].copy()
model = Prophet(daily_seasonality=True)
model.fit(prophet_df)

# Forecast next values (in-sample)
forecast = model.predict(prophet_df)
df['yhat'] = forecast['yhat']
df['residual'] = df['y'] - df['yhat']
merged = df[['ds', 'residual']].dropna()


In [None]:
scaler = MinMaxScaler()
scaled = scaler.fit_transform(merged[['residual']])


In [None]:
def create_sequences(data, window_size=30):
    X, y = [], []
    for i in range(window_size, len(data)):
        X.append(data[i-window_size:i])
        y.append(data[i])
    return np.array(X), np.array(y)

window_size = 30
X, y = create_sequences(scaled, window_size)
X = X.reshape((X.shape[0], X.shape[1], 1))


In [None]:
# Create output directory
MODEL_DIR = "hybrid_lstm_models"
os.makedirs(MODEL_DIR, exist_ok=True)

# Define LSTM model
model_lstm = Sequential([
    Input(shape=(X.shape[1], X.shape[2])),
    LSTM(64, activation='tanh'),
    Dense(1)
])
model_lstm.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Callbacks
checkpoint_path = os.path.join(MODEL_DIR, "best_lstm_model.keras")
es = EarlyStopping(monitor='val_loss', patience=5, min_delta=1e-5, restore_best_weights=True)
mc = ModelCheckpoint(checkpoint_path, monitor='val_loss', save_best_only=True)

# Train model
history = model_lstm.fit(
    X, y,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[es, mc],
    verbose=1
)


In [None]:
# Plot training loss and MAE
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title("Loss (MSE)")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['mae'], label='Training MAE')
plt.plot(history.history['val_mae'], label='Validation MAE')
plt.title("Mean Absolute Error (MAE)")
plt.xlabel("Epoch")
plt.ylabel("MAE")
plt.legend()

plt.tight_layout()
metrics_plot_path = os.path.join(MODEL_DIR, "training_metrics.png")
plt.savefig(metrics_plot_path, dpi=300)
plt.show()


In [None]:
# Predict residuals using trained model
y_pred_scaled = model_lstm.predict(X)

# Inverse transform
y_true_rescaled = scaler.inverse_transform(y)
y_pred_rescaled = scaler.inverse_transform(y_pred_scaled)

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

mae = mean_absolute_error(y_true_rescaled, y_pred_rescaled)
rmse = np.sqrt(mean_squared_error(y_true_rescaled, y_pred_rescaled))
r2 = r2_score(y_true_rescaled, y_pred_rescaled)
mape = np.mean(np.abs((y_true_rescaled - y_pred_rescaled) / y_true_rescaled)) * 100
accuracy = 100 - mape

print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"R²: {r2:.4f}")
print(f"MAPE: {mape:.2f}%")
print(f"Accuracy: {accuracy:.2f}%")
