In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras.models import save_model, load_model


In [None]:
file_path = r"C:\Users\mvy48\OneDrive\Documents\cllg_projects\sem_5\cloud\vmCloud_data.csv"
df = pd.read_csv(file_path)

df = df.drop(['timestamp', 'vm_id'], axis=1)
df = df.head(100000)
df.head()

Unnamed: 0,cpu_usage,memory_usage,network_traffic,power_consumption,num_executed_instructions,execution_time,energy_efficiency,task_type,task_priority,task_status
0,54.88135,78.950861,164.775973,287.808986,7527.0,69.345575,0.553589,network,medium,waiting
1,71.518937,29.901883,,362.273569,5348.0,41.39604,0.349856,io,high,completed
2,,92.709195,203.674847,231.467903,5483.0,24.602549,0.796277,io,medium,completed
3,54.488318,88.10096,,195.639954,5876.0,16.45667,0.529511,compute,high,completed
4,42.36548,,,359.451537,3361.0,55.307992,0.351907,,medium,waiting


In [None]:

numeric_columns = df.select_dtypes(include=[np.number]).columns
categorical_columns = df.select_dtypes(exclude=[np.number]).columns

df[numeric_columns] = df[numeric_columns].fillna(df[numeric_columns].mean())

df = pd.get_dummies(df, columns=categorical_columns, drop_first=True)

df['cpu_usage_lag1'] = df['cpu_usage'].shift(1)
df['cpu_usage_lag2'] = df['cpu_usage'].shift(2)
df['cpu_usage_lag3'] = df['cpu_usage'].shift(3)
df['cpu_usage_lag4'] = df['cpu_usage'].shift(4)
df['cpu_usage_lag5'] = df['cpu_usage'].shift(5)
df['cpu_usage_rolling_mean'] = df['cpu_usage'].rolling(window=5).mean()
df['cpu_usage_rolling_std'] = df['cpu_usage'].rolling(window=5).std()
df['cpu_usage_rolling_median'] = df['cpu_usage'].rolling(window=5).median()
df['cpu_usage_rolling_min'] = df['cpu_usage'].rolling(window=5).min()
df['cpu_usage_rolling_max'] = df['cpu_usage'].rolling(window=5).max()

df = df.dropna()

X = df.drop('cpu_usage', axis=1)
y = df['cpu_usage']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

X_train_scaled = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
X_test_scaled = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])


In [None]:
model = Sequential()
model.add(LSTM(150, return_sequences=True, input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])))
model.add(Dropout(0.4))
model.add(LSTM(100, return_sequences=False))
model.add(Dropout(0.4))
model.add(Dense(1))

optimizer = Adam(learning_rate=0.0005)
model.compile(optimizer=optimizer, loss='mean_squared_error')

history = model.fit(
    X_train_scaled, y_train,
    epochs=100, batch_size=64,
    validation_data=(X_test_scaled, y_test),
    verbose=1
)


  super().__init__(**kwargs)


Epoch 1/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - loss: 1955.0598 - val_loss: 312.0769
Epoch 2/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 238.2527 - val_loss: 67.9315
Epoch 3/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 74.2210 - val_loss: 17.7454
Epoch 4/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 38.4738 - val_loss: 4.8560
Epoch 5/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 29.6921 - val_loss: 2.2394
Epoch 6/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 26.9563 - val_loss: 1.4693
Epoch 7/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 26.2065 - val_loss: 1.2731
Epoch 8/100
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - loss: 24.8013 - val_loss: 0.8433
E

In [None]:
from tensorflow.keras.models import save_model

model_path = r"C:\Users\mvy48\OneDrive\Documents\cllg_projects\sem_5\cloud\saved_lstm_model.keras"

save_model(model, model_path)

print(f"Model saved to: {model_path}")


Model saved to: C:\Users\mvy48\OneDrive\Documents\cllg_projects\sem_5\cloud\saved_lstm_model.keras


In [None]:
model = load_model(model_path)

y_pred = model.predict(X_test_scaled)

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared (R2):", r2)

print("\nPredictions vs Actual Values (for the first 10 predictions):")
for i in range(10):
    print(f"Predicted: {y_pred[i][0]:.2f}, Actual: {y_test.iloc[i]:.2f}")


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step
Mean Absolute Error (MAE): 0.4503779700896236
Mean Squared Error (MSE): 0.31534215050077835
Root Mean Squared Error (RMSE): 0.5615533371824785
R-squared (R2): 0.9995852829192424

Predictions vs Actual Values (for the first 10 predictions):
Predicted: 18.07, Actual: 17.64
Predicted: 65.71, Actual: 64.98
Predicted: 28.13, Actual: 27.89
Predicted: 83.53, Actual: 83.17
Predicted: 68.62, Actual: 68.42
Predicted: 3.08, Actual: 2.52
Predicted: 96.17, Actual: 95.58
Predicted: 93.27, Actual: 93.32
Predicted: 92.26, Actual: 92.01
Predicted: 70.94, Actual: 70.28


In [None]:
import plotly.graph_objects as go

actual_values = y_test.values[:100]
predicted_values = y_pred[:100].flatten()  
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=list(range(len(actual_values))),
    y=actual_values,
    mode='lines+markers',
    name='Actual',
    line=dict(color='blue'),
    marker=dict(size=6)
))

fig.add_trace(go.Scatter(
    x=list(range(len(predicted_values))),
    y=predicted_values,
    mode='lines+markers',
    name='Predicted',
    line=dict(color='orange'),
    marker=dict(size=6)
))

fig.update_layout(
    title="Interactive Actual vs. Predicted CPU Usage",
    xaxis=dict(title="Sample"),
    yaxis=dict(title="CPU Usage"),
    legend=dict(title="Legend"),
    template='plotly_dark',  
    hovermode='x'
)

fig.show()
