In [1]:
import json
import pandas as pd


# Load data from JSON file and Convert data to pandas DataFrame
df = pd.read_json('data.json', convert_dates=['timestamp'])


In [2]:
print(df.columns)

Index(['latency', 'requestSize', 'requestMethod', 'responseSize', 'requestUrl',
       'status', 'source_app', 'timestamp'],
      dtype='object')


In [3]:
# set timestamp as the index
df.set_index('timestamp', inplace=True)
# Sort the DataFrame by the index 
df = df.sort_index()

dforiginal = df.copy()

df['ones'] = 1
df['zeros'] = 0
df['latency'] = df['zeros'].where(df['latency'] < 0.1, df['ones'])


In [4]:
df.head()
len(df.drop_duplicates(keep=False))

58

In [5]:
from sklearn.preprocessing import MinMaxScaler

# Convert categorical features to one-hot encoded format
#df = pd.get_dummies(df, columns=['requestUrl', 'status', 'source_app','requestMethod'])

# Normalize the numerical features
#scaler = MinMaxScaler()
#df[["latency", "requestSize", "responseSize"]] = scaler.fit_transform(df[["latency", "requestSize", "responseSize"]])
df.columns.values.astype(list)

array(['latency', 'requestSize', 'requestMethod', 'responseSize',
       'requestUrl', 'status', 'source_app', 'ones', 'zeros'],
      dtype=object)

In [6]:
#create keys for unique rows

df['key'] = df.groupby(['latency', 'requestSize', 'requestMethod', 'responseSize',
       'requestUrl', 'status', 'source_app']).ngroup()

df.key.unique()
df=df[['key']]

df = pd.get_dummies(df, columns=['key'])

In [7]:
df.head()

Unnamed: 0_level_0,key_0,key_1,key_2,key_3,key_4,key_5,key_6,key_7,key_8,key_9,...,key_163,key_164,key_165,key_166,key_167,key_168,key_169,key_170,key_171,key_172
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-04-14 10:48:32.528658+00:00,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-04-14 10:48:32.587245+00:00,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-04-14 10:48:32.592313+00:00,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-04-14 10:48:34.756192+00:00,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2023-04-14 10:48:35.636258+00:00,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
from sklearn.preprocessing import StandardScaler


train_size = int(len(df) * 0.95)
train = df[:train_size][df.columns]
test = df[train_size:][df.columns]

In [11]:
# lstm autoencoder to recreate a timeseries
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
'''
A UDF to convert input data into 3-D
array as required for LSTM network.
'''

def temporalize(X, y, lookback):
    output_X = []
    output_y = []
    for i in range(len(X)-lookback-1):
        t = []
        for j in range(1,lookback+1):
            # Gather past records upto the lookback period
            t.append(X[[(i+j+1)], :])
        output_X.append(t)
        output_y.append(y[i+lookback+1])
    return output_X, output_y

2023-05-03 18:11:27.474089: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [14]:
timeseries = np.array(df.values)
timeseries.shape

(5000, 173)

In [17]:
import numpy as np

timesteps = 10
X, y = temporalize(X = timeseries, y = np.zeros(len(timeseries)), lookback = timesteps)

n_features = timeseries.shape[1]
X = np.array(X)
X = X.reshape(X.shape[0], timesteps, n_features)

In [18]:
X
y

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0

In [19]:
# define model
model = Sequential()
model.add(LSTM(128, activation='relu', input_shape=(timesteps,n_features), return_sequences=True))
model.add(LSTM(64, activation='relu', return_sequences=False))
model.add(RepeatVector(timesteps))
model.add(LSTM(64, activation='relu', return_sequences=True))
model.add(LSTM(128, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(n_features)))
model.compile(optimizer='adam', loss='mse')
model.summary()

2023-05-03 18:16:39.694798: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 10, 128)           154624    
                                                                 
 lstm_1 (LSTM)               (None, 64)                49408     
                                                                 
 repeat_vector (RepeatVector  (None, 10, 64)           0         
 )                                                               
                                                                 
 lstm_2 (LSTM)               (None, 10, 64)            33024     
                                                                 
 lstm_3 (LSTM)               (None, 10, 128)           98816     
                                                                 
 time_distributed (TimeDistr  (None, 10, 173)          22317     
 ibuted)                                                

In [None]:
# fit model
model.fit(X, X, epochs=300, batch_size=5, verbose=0)
# demonstrate reconstruction
yhat = model.predict(X, verbose=0)
print('---Predicted---')
print(np.round(yhat,3))
print('---Actual---')
print(np.round(X, 3))

# legacy

In [11]:
import keras

model = keras.Sequential()
model.add(keras.layers.LSTM(
    units=64,
    input_shape=(X.shape[1], X.shape[2])
))
model.add(keras.layers.Dropout(rate=0.2))
model.add(keras.layers.RepeatVector(n=X_train.shape[1]))
model.add(keras.layers.LSTM(units=64, return_sequences=True))
model.add(keras.layers.Dropout(rate=0.2))
model.add(
  keras.layers.TimeDistributed(
    keras.layers.Dense(units=X_train.shape[2])
  )
)

model.compile(loss='mae', optimizer='adam')

IndexError: tuple index out of range

In [16]:

test_array = np.array(test)
test_array = np.reshape(test_array, (test_array.shape[0], 1, test_array.shape[1]))

# Generate predictions on test data
y_pred = model.predict(test_array)

# Denormalize the predicted values
y_pred_denorm = scaler.inverse_transform(y_pred)

# Calculate the mean squared error
mse = np.mean(np.square(test - y_pred_denorm))

print('Test MSE:', mse)


Test MSE: latency                                                                                               0.011552
requestSize                                                                                           0.003098
responseSize                                                                                          0.053303
requestUrl_http://adservice:9555/hipstershop.AdService/GetAds                                         0.063806
requestUrl_http://cartservice:7070/hipstershop.CartService/AddItem                                    0.007903
requestUrl_http://cartservice:7070/hipstershop.CartService/EmptyCart                                  0.000008
requestUrl_http://cartservice:7070/hipstershop.CartService/GetCart                                    0.080683
requestUrl_http://checkoutservice:5050/hipstershop.CheckoutService/PlaceOrder                         0.000004
requestUrl_http://currencyservice:7000/hipstershop.CurrencyService/Convert                            

  return mean(axis=axis, dtype=dtype, out=out, **kwargs)
