# 📊 Multi-Output LSTM for Microservice CPU Usage Forecasting

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

# Load new dataset
df = pd.read_csv("Workload_metrics_V2.csv")


In [None]:
target_cols = ['adservice_cpu', 'cartservice_cpu', 'checkoutservice_cpu', 'currencyservice_cpu', 'emailservice_cpu', 'frontend_cpu', 'paymentservice_cpu', 'productcatalogservice_cpu', 'recommendationservice_cpu', 'redis_cpu', 'shippingservice_cpu']

# Add lag features
lag_cols = ['requests'] + target_cols
for col in lag_cols:
    df[f'{col}_lag_1'] = df[col].shift(1)
    df[f'{col}_lag_5'] = df[col].shift(5)

# Time encoding
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['hour'] = df['timestamp'].dt.hour
df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)

# Drop rows with missing values
df_clean = df.dropna().reset_index(drop=True)

input_features = ['requests_lag_1', 'requests_lag_5', 'adservice_cpu_lag_1', 'adservice_cpu_lag_5', 'cartservice_cpu_lag_1', 'cartservice_cpu_lag_5', 'checkoutservice_cpu_lag_1', 'checkoutservice_cpu_lag_5', 'currencyservice_cpu_lag_1', 'currencyservice_cpu_lag_5', 'emailservice_cpu_lag_1', 'emailservice_cpu_lag_5', 'frontend_cpu_lag_1', 'frontend_cpu_lag_5', 'paymentservice_cpu_lag_1', 'paymentservice_cpu_lag_5', 'productcatalogservice_cpu_lag_1', 'productcatalogservice_cpu_lag_5', 'recommendationservice_cpu_lag_1', 'recommendationservice_cpu_lag_5', 'redis_cpu_lag_1', 'redis_cpu_lag_5', 'shippingservice_cpu_lag_1', 'shippingservice_cpu_lag_5', 'hour_sin', 'hour_cos']


In [None]:
X_all = df_clean[input_features].values
y_all = df_clean[target_cols].values

scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_X.fit_transform(X_all)
y_scaled = scaler_y.fit_transform(y_all)

window_size = 30
X_seq, y_seq = [], []
for i in range(window_size, len(X_scaled)):
    X_seq.append(X_scaled[i-window_size:i])
    y_seq.append(y_scaled[i])
X_seq, y_seq = np.array(X_seq), np.array(y_seq)

split_idx = int(len(X_seq) * 0.8)
X_train, X_test = X_seq[:split_idx], X_seq[split_idx:]
y_train, y_test = y_seq[:split_idx], y_seq[split_idx:]


In [None]:
# 🔵 Heatmap 1: Raw correlation between requests and per-service CPU usage
corr_matrix1 = df[['requests'] + target_cols].corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix1, annot=True, cmap='RdBu_r', center=0, fmt=".2f")
plt.title("Correlation Matrix: Requests and CPU Usage per Service")
plt.tight_layout()
plt.show()

# 🔴 Heatmap 2: Correlation of engineered features with all target CPU usages
feature_cols = [col for col in df_clean.columns if '_lag_' in col or col.startswith('hour_')]
corr_matrix2 = df_clean[feature_cols + target_cols].corr()
plt.figure(figsize=(14, 6))
sns.heatmap(corr_matrix2.loc[feature_cols, target_cols], annot=False, cmap="coolwarm")
plt.title("Correlation of Input Features with Target CPU Usage")
plt.tight_layout()
plt.show()


In [None]:
corr = df_clean[input_features + target_cols].corr()
target_corr = corr[target_cols].drop(index=target_cols)

plt.figure(figsize=(12, 6))
sns.heatmap(target_corr, annot=False, cmap="coolwarm")
plt.title("Correlation of Input Features with Target CPU Usage")
plt.tight_layout()
plt.show()


In [None]:
model = Sequential([
    tf.keras.Input(shape=(X_train.shape[1], X_train.shape[2])),
    LSTM(128, activation='tanh'),
    Dense(len(target_cols))  # Multi-output
])
model.compile(optimizer='adam', loss='mse')
es = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

history = model.fit(
    X_train, y_train,
    validation_split=0.1,
    epochs=50,
    batch_size=32,
    callbacks=[es],
    verbose=1
)


In [None]:
y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_true = scaler_y.inverse_transform(y_test)

# Metrics
metrics = {}
for i, col in enumerate(target_cols):
    mae = mean_absolute_error(y_true[:, i], y_pred[:, i])
    rmse = np.sqrt(mean_squared_error(y_true[:, i], y_pred[:, i]))
    r2 = r2_score(y_true[:, i], y_pred[:, i])
    metrics[col] = {"MAE": mae, "RMSE": rmse, "R2": r2}

pd.DataFrame(metrics).T


In [None]:
plt.figure(figsize=(14, 5))
plt.plot(y_true[:100, 0], label="True")
plt.plot(y_pred[:100, 0], label="Predicted")
plt.title(f"Prediction vs Actual: {target_cols[0]}")
plt.legend()
plt.show()


In [None]:
model.save("multioutput_lstm_cpu_model.keras")
print("✅ Model saved as 'multioutput_lstm_cpu_model.keras'")


In [None]:
import os
from sklearn.metrics import mean_absolute_percentage_error

# Create output directory
os.makedirs("output", exist_ok=True)

# Evaluate all targets with more metrics
results = []
for i, col in enumerate(target_cols):
    y_t = y_true[:, i]
    y_p = y_pred[:, i]
    mae = mean_absolute_error(y_t, y_p)
    rmse = np.sqrt(mean_squared_error(y_t, y_p))
    r2 = r2_score(y_t, y_p)
    mape = mean_absolute_percentage_error(y_t, y_p)
    acc = 100 - mape * 100
    results.append([col, mae, rmse, r2, mape, acc])

# Create DataFrame and export
results_df = pd.DataFrame(results, columns=['Service', 'MAE', 'RMSE', 'R2', 'MAPE', 'Accuracy (%)'])
results_df.to_csv("output/cpu_usage_prediction_metrics.csv", index=False)
results_df.style.background_gradient(cmap="YlGnBu")


In [None]:
for i, col in enumerate(target_cols):
    plt.figure(figsize=(12, 4))
    plt.plot(y_true[:100, i], label='True')
    plt.plot(y_pred[:100, i], label='Predicted')
    plt.title(f"Prediction vs Actual: {col}")
    plt.legend()
    plt.tight_layout()
    plt.savefig(f"output/prediction_{col}.png", dpi=300)
    plt.close()
print("✅ All service prediction plots saved to /output/")


In [None]:
# Reload evaluation results
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

metrics_df = pd.read_csv("output/cpu_usage_prediction_metrics.csv")

# Save accuracy bar chart
plt.figure(figsize=(10, 6))
sns.barplot(data=metrics_df.sort_values("Accuracy (%)", ascending=False), x="Accuracy (%)", y="Service", palette="Blues_d")
plt.title("Accuracy (%) by Microservice")
plt.xlabel("Accuracy (%)")
plt.ylabel("Service")
plt.tight_layout()
plt.savefig("output/accuracy_barplot.png", dpi=300)
plt.show()

# Save metric heatmap
heatmap_data = metrics_df.set_index("Service").drop(columns=["Accuracy (%)"])
plt.figure(figsize=(12, 6))
sns.heatmap(heatmap_data, annot=True, fmt=".2f", cmap="coolwarm")
plt.title("Evaluation Metrics Heatmap (MAE, RMSE, R², MAPE)")
plt.tight_layout()
plt.savefig("output/metrics_heatmap.png", dpi=300)
plt.show()
print("✅ Saved: output/accuracy_barplot.png and metrics_heatmap.png")
