In [None]:
import pandas as pd
import numpy as np

# Generate the sample data
timestamps = pd.date_range(start="2024-09-21 12:00:00", periods=200, freq="5min").astype(int) // 10**9
lambda_functions = ['simpleDataValidation', 'arithmeticComputation', 'stringManipulation', 'fibonacciCalculation', 'randomDataGenrator']

np.random.seed(42)

data_sample = pd.DataFrame({
    'FunctionID': np.random.choice(lambda_functions, size=200),
    'Timestamp': timestamps,
    'ResponseTime': np.random.uniform(low=0.1, high=3.0, size=200),  # Response time in seconds
    'CPUUtilization': np.random.uniform(low=20, high=80, size=200),  # CPU Utilization in %
    'MemoryUtilization': np.random.uniform(low=50, high=300, size=200),  # Memory usage in MB
    'ColdStart': np.random.choice([0, 1], size=200, p=[0.8, 0.2]),  # 20% probability of cold start
    'ColdStartLatency': np.random.uniform(low=0.2, high=1.5, size=200) * np.random.choice([0, 1], size=200),  # Only for cold starts
    'ThrottlingRate': np.random.randint(0, 4, size=200),  # Throttling events in last 1 minute
    'ErrorRate': np.random.choice([0, 1], size=200, p=[0.9, 0.1])  # 10% probability of errors
})

# Save as CSV if needed
data_sample.to_csv('lambda_functions_sample_data.csv', index=False)


In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load the dataset
df = pd.read_csv('lambda_functions_sample_data.csv')

# Step 1: Convert Timestamp to cyclical features (sin and cos of time of day)
df['Hour'] = pd.to_datetime(df['Timestamp'], unit='s').dt.hour
df['Hour_sin'] = np.sin(2 * np.pi * df['Hour'] / 24)
df['Hour_cos'] = np.cos(2 * np.pi * df['Hour'] / 24)

# One-hot encode the 'FunctionID' column
df_encoded = pd.get_dummies(df, columns=['FunctionID'])

# Drop the original Timestamp and Hour columns
df_processed = df_encoded.drop(columns=['Timestamp', 'Hour'])

# Normalize the dataset (scale the features between 0 and 1)
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df_processed)

# Convert the scaled data back into a DataFrame
df_scaled = pd.DataFrame(scaled_data, columns=df_processed.columns)

# Save preprocessed data to a new CSV file
df_scaled.to_csv('lambda_functions_preprocessed_data.csv', index=False)


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
mm

Epoch 1/50


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 147ms/step - loss: 0.2029 - val_loss: 0.1106
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.1045 - val_loss: 0.1537
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.1027 - val_loss: 0.0952
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.0929 - val_loss: 0.0977
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.1053 - val_loss: 0.0929
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.0940 - val_loss: 0.0900
Epoch 7/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - loss: 0.0763 - val_loss: 0.0948
Epoch 8/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.0791 - val_loss: 0.0962
Epoch 9/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2

In [None]:
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Dropout, LSTM, Dense
from tensorflow.keras.models import Sequential

# Rebuild the model with dropout for regularization and early stopping
model_improved = Sequential()
model_improved.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model_improved.add(Dropout(0.2))  # Dropout to prevent overfitting
model_improved.add(LSTM(units=50, return_sequences=False))
model_improved.add(Dense(units=25))
model_improved.add(Dense(units=1))  # Output layer to predict the ResponseTime

# Compile the model
model_improved.compile(optimizer='adam', loss='mean_squared_error')

# Early stopping to prevent overfitting and restore best weights
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the improved model
model_improved.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Evaluate the improved model
improved_loss = model_improved.evaluate(X_test, y_test)
print(f"Improved Test Loss: {improved_loss}")

# Make predictions
y_pred_improved = model_improved.predict(X_test)
y_pred_improved = scaler.inverse_transform(y_pred_improved)  # Inverse scaling to get original values
y_test_improved = scaler.inverse_transform(y_test.reshape(-1, 1))  # Inverse scaling of test data

# Display improved predictions
for i in range(5):
    print(f"Improved Predicted: {y_pred_improved[i]}, Actual: {y_test_improved[i]}")


Epoch 1/100


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 159ms/step - loss: 0.2994 - val_loss: 0.1028
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.1104 - val_loss: 0.1717
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.1289 - val_loss: 0.1043
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 0.1070 - val_loss: 0.0952
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 0.1039 - val_loss: 0.0937
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.0939 - val_loss: 0.0907
Epoch 7/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.0900 - val_loss: 0.0970
Epoch 8/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 0.0866 - val_loss: 0.1009
Epoch 9/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

In [None]:
!pip install keras-tuner --upgrade



Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl.metadata (5.4 kB)
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl.metadata (221 bytes)
Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m896.5 kB/s[0m eta [36m0:00:00[0m
[?25hDownloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [None]:
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Function to build the model with hyperparameters
def build_model(hp):
    model = Sequential()

    # LSTM layer with tunable number of units and dropout rate
    model.add(LSTM(units=hp.Int('units', min_value=25, max_value=100, step=25), return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(hp.Float('dropout', min_value=0.1, max_value=0.5, step=0.1)))

    # LSTM layer with tunable units
    model.add(LSTM(units=hp.Int('units2', min_value=25, max_value=100, step=25), return_sequences=False))
    model.add(Dense(units=hp.Int('dense_units', min_value=25, max_value=100, step=25)))

    # Output layer
    model.add(Dense(1))

    # Compile the model with a tunable learning rate
    model.compile(optimizer=Adam(learning_rate=hp.Float('learning_rate', min_value=1e-4, max_value=1e-2, sampling='LOG')),
                  loss='mean_squared_error')

    return model

# Hyperparameter tuning setup using Keras Tuner
tuner = kt.Hyperband(build_model,
                     objective='val_loss',  # minimize validation loss
                     max_epochs=50,
                     factor=3,
                     directory='my_dir',
                     project_name='lstm_tuning')

# Stop early if no improvement
stop_early = EarlyStopping(monitor='val_loss', patience=10)

# Perform the search
tuner.search(X_train, y_train, epochs=50, validation_split=0.2, callbacks=[stop_early])

# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"The optimal number of units in the LSTM layers is {best_hps.get('units')} and {best_hps.get('units2')}.")
print(f"The optimal learning rate is {best_hps.get('learning_rate')}.")

# Build the model with the best hyperparameters and train it
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train, y_train, epochs=50, validation_split=0.2)

# Evaluate the model
final_loss = model.evaluate(X_test, y_test)
print(f"Final test loss: {final_loss}")

# Make predictions
y_pred_tuned = model.predict(X_test)
y_pred_tuned = scaler.inverse_transform(y_pred_tuned)  # Inverse scaling to get original values
y_test_tuned = scaler.inverse_transform(y_test.reshape(-1, 1))

# Display predictions
for i in range(5):
    print(f"Tuned Predicted: {y_pred_tuned[i]}, Actual: {y_test_tuned[i]}")


Trial 90 Complete [00h 00m 15s]
val_loss: 0.08841697126626968

Best val_loss So Far: 0.08032001554965973
Total elapsed time: 00h 12m 36s
The optimal number of units in the LSTM layers is 75 and 50.
The optimal learning rate is 0.00027017485116976497.
Epoch 1/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 234ms/step - loss: 0.3019 - val_loss: 0.2221
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - loss: 0.2271 - val_loss: 0.1514
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 0.1328 - val_loss: 0.1152
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - loss: 0.0987 - val_loss: 0.1148
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step - loss: 0.0987 - val_loss: 0.1304
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 0.1028 - val_loss: 0.1312
Epoch 7/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━



[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 298ms/step



[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 305ms/step
Tuned Predicted: [0.4555355], Actual: [0.88943925]
Tuned Predicted: [0.47789666], Actual: [0.53900654]
Tuned Predicted: [0.4214669], Actual: [0.57884983]
Tuned Predicted: [0.58468366], Actual: [0.57192731]
Tuned Predicted: [0.47253698], Actual: [0.61179287]


In [None]:
from tensorflow.keras.callbacks import LearningRateScheduler

def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

lr_scheduler = LearningRateScheduler(scheduler)


In [None]:
# Save the trained model
model.save('lstm_model.h5')




In [None]:
# Save the model in the native Keras format
model.save('lstm_model.keras')
