In [3]:
import numpy as np 
import pandas as pd
import tensorflow as tf
from msspackages import Pyspark_data_ingestion, get_features
from utilities import write_tensor, read_tensor
from training_input import node_autoencoder_input, pod_autoencoder_input, container_autoencoder_input
from training_input import node_pca_input, pod_pca_input, container_pca_input
from sklearn.preprocessing import StandardScaler
from evaluation import autoencoder_testing_pipeline 

#Set random seed
#np.random.seed(10)

#raw_data_s3_path = 's3://dish-5g.core.pd.g.dp.eks.logs.e/inference_data/Node/Node_2022_9_11_12.parquet'
#raw_data_s3_path = 's3://dish-5g.core.pd.g.dp.eks.logs.e/inference_data/Container/Container_2022_8_20_9.parquet'
#raw_data_s3_path = 's3://dish-5g.core.pd.g.dp.eks.logs.e/inference_data/Pod/Pod_2022_7_10_20.parquet'

def read_raw_data(raw_data_s3_path):
    
    #Read raw data in parquet format from s3_path
    df = pd.read_parquet(raw_data_s3_path)
    
    print(f"reading raw data from: {raw_data_s3_path}")

    return df


def save_processed(df_raw: pd.DataFrame,
                   feature_group_name, feature_input_version,
                   data_bucketname, train_data_filename, test_data_filename,
                   save_model_local_path, model_bucketname,
                   model_name, model_version,
                   sampling_column = "InstanceId",
                   file_name = 'inference'):
                   
    #load data
    df = df_raw.copy()
    
    #Read features and parameters
    features_df = get_features(feature_group_name, feature_input_version)
    features = features_df["feature_name"].to_list()
    #remove spaces: that were put by mistake
    features = [feature.strip(' ') for feature in features]
    model_parameters = features_df["model_parameters"].iloc[0]
    time_steps = model_parameters["time_steps"]
    
    #select unique sampling_column (e.g. InstanceId for Node or pod_id for Pod
    random_id = np.random.choice(df[sampling_column].unique())
    print(f'\n*** Select data with unique {sampling_column} = {random_id} ***\n')
    df = df.loc[(df[sampling_column] == random_id)]
    #sort by time
    df = df.sort_values(by='Timestamp').reset_index(drop=True)
        
    #select last time slice of data
    start = df.shape[0] - time_steps
    df = df.loc[start:start+time_steps, features]
    
    print("\n***** Inference input data shape*****")
    print(df.shape)
    print("\n*** Inference data tensor ***")
    print(df)
    print("\n***************************************\n")
    
    #scaler transformations
    scaler = StandardScaler()
    scaled_features = ["scaled_" + feature for feature in features]
    df[scaled_features] = scaler.fit_transform(df[features])
    inference_input_tensor = np.expand_dims(df[scaled_features], axis = 0)

    print("\n***** Inference input tensor shape*****")
    print(inference_input_tensor.shape)
    print("\n*** Inference input tensor ***")
    print(inference_input_tensor)
    print("\n***************************************\n")
    
    saved_file_name = ('_').join([file_name, sampling_column, random_id])
    
    write_tensor(tensor = inference_input_tensor, 
                 bucket_name = model_bucketname, 
                 model_name = model_name, 
                 version = model_version, 
                 flag = "data",
                 file_name = saved_file_name)
                                 
    return saved_file_name


if __name__ == "__main__":
    
    #Specify raw data s3 path
    raw_data_s3_path = 's3://dish-5g.core.pd.g.dp.eks.logs.e/inference_data/Node/Node_2022_9_11_12.parquet'
                                 
    #Read raw data
    df = read_raw_data(raw_data_s3_path)
    
    #load input parameters
    input_parameters = node_autoencoder_input()

    #Generate input tensor for a randomly selected sampling_column
    saved_file_name = save_processed(df, *input_parameters)
    
    #update save output path
    input_parameters[4] = saved_file_name
                                   
    predictions, residuals = autoencoder_testing_pipeline(*input_parameters[2:])
    

reading raw data from: s3://dish-5g.core.pd.g.dp.eks.logs.e/inference_data/Node/Node_2022_9_11_12.parquet

***Select data with unique InstanceId = i-09ce4c2455b1d82d6***


***** Inference input data shape*****
(20, 3)

*** Inference data tensor ***


Unnamed: 0,node_cpu_utilization,node_memory_utilization,node_network_total_bytes
36,1.059449,5.612631,101613.907208
37,1.046212,5.612908,104004.325768
38,1.060264,5.613604,100877.692029
39,1.044679,5.614027,105210.93214
40,1.058275,5.613208,102606.928022
41,1.052644,5.614708,100800.66844
42,0.913058,5.618048,96229.163978
43,0.932358,5.613245,97419.937319
44,1.074577,5.614113,102434.037018
45,1.071532,5.615401,104343.592118


None

***************************************


***** Inference input tensor shape*****
(1, 20, 3)

*** Inference input tensor ***
[[[ 0.41329072 -0.8645782  -0.13751187]
  [ 0.12922705 -0.67815741  0.73694289]
  [ 0.43077495 -0.20796275 -0.40683159]
  [ 0.09631684  0.07788246  1.17833951]
  [ 0.38807797 -0.47516588  0.22575161]
  [ 0.26724892  0.53772041 -0.4350081 ]
  [-2.72821326  2.79341198 -2.10734026]
  [-2.31403467 -0.45030978 -1.67173562]
  [ 0.73792699  0.13588004  0.16250522]
  [ 0.67256574  1.00584373  0.86105214]
  [ 0.22477914  0.35337097 -0.60919899]
  [ 0.81947325  1.68317261  1.06889825]
  [ 0.40120264 -0.34467133  0.71243527]
  [ 0.54047599 -1.09863986 -0.38399808]
  [ 1.30660019  0.18973494  1.56839332]
  [ 0.13143435  0.46729478 -1.02988441]
  [-1.00314445 -0.38609817  0.55263552]
  [-0.80067029 -1.43626863 -1.14380576]
  [-0.56826838 -1.54397842  1.4214299 ]
  [ 0.8549363   0.24151849 -0.56306896]]]

***************************************

writing tensor to: mss-sh

In [2]:
!pip install tensorflow

Keyring is skipped due to an exception: 'keyring.backends'
Collecting tensorflow
  Using cached tensorflow-2.11.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (588.3 MB)
Collecting opt-einsum>=2.3.2
  Using cached opt_einsum-3.3.0-py3-none-any.whl (65 kB)
Collecting gast<=0.4.0,>=0.2.1
  Using cached gast-0.4.0-py3-none-any.whl (9.8 kB)
Collecting grpcio<2.0,>=1.24.3
  Using cached grpcio-1.51.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.8 MB)
Collecting protobuf<3.20,>=3.9.2
  Using cached protobuf-3.19.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
Collecting keras<2.12,>=2.11.0
  Using cached keras-2.11.0-py2.py3-none-any.whl (1.7 MB)
Collecting tensorflow-estimator<2.12,>=2.11.0
  Using cached tensorflow_estimator-2.11.0-py2.py3-none-any.whl (439 kB)
Collecting astunparse>=1.6.0
  Using cached astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting libclang>=13.0.0
  Using cached libclang-14.0.6-py2.py3-none-manylinux2010_x86_64

In [5]:
pwd

'/root/CodeCommit/eks-ml-pipeline/eks_ml_pipeline'

In [3]:
!pip uninstall -y msspackages

Keyring is skipped due to an exception: 'keyring.backends'
[0m

In [4]:
!pip install /root/CodeCommit/msspackages/dist/msspackages-0.0.7-py3-none-any.whl

Keyring is skipped due to an exception: 'keyring.backends'
Processing /root/CodeCommit/msspackages/dist/msspackages-0.0.7-py3-none-any.whl
Collecting configparser
  Using cached configparser-5.3.0-py3-none-any.whl (19 kB)
Collecting pyspark
  Using cached pyspark-3.3.1-py2.py3-none-any.whl
Collecting py4j==0.10.9.5
  Using cached py4j-0.10.9.5-py2.py3-none-any.whl (199 kB)
Installing collected packages: py4j, pyspark, configparser, msspackages
Successfully installed configparser-5.3.0 msspackages-0.0.7 py4j-0.10.9.5 pyspark-3.3.1
[0m

In [5]:
from msspackages import setup_runner
setup_runner()




In [6]:
!python3 -m pip install -r requirements.txt

Keyring is skipped due to an exception: 'keyring.backends'
[31mERROR: Could not open requirements file: [Errno 2] No such file or directory: 'requirements.txt'[0m[31m
[0m

In [53]:
import numpy as np 
import tensorflow as tf
from msspackages import Pyspark_data_ingestion, get_features
from utilities import write_tensor, read_tensor
from training_input import node_autoencoder_input, pod_autoencoder_input, container_autoencoder_input
from training_input import node_pca_input, pod_pca_input, container_pca_input
from sklearn.preprocessing import StandardScaler

#Set random seed
np.random.seed(10)

In [8]:
#load standard input
[feature_group_name, feature_input_version,
data_bucketname, train_data_filename, test_data_filename,
save_model_local_path, model_bucketname,
model_name, model_version] = node_autoencoder_input()

sampling_column = "InstanceId" #add to input function pod_id for pod data


In [9]:
#set rec type
rec_type = 'Node'

#Select random day
random_hour = np.random.choice(24)

#Select input for data ingestion
input_year, input_month, input_day = ["2022", "10", "10"]

#Read raw data
pyspark_data = Pyspark_data_ingestion(year = input_year, month = input_month, day = input_day, hour = random_hour, 
                                      setup = "default", filter_column_value = rec_type)
err, pyspark_df = pyspark_data.read()

if err == 'PASS':

    #To Pandas
    df = pyspark_df.toPandas()
    
else:
    print("No data available")

In [14]:
print(df.shape)
df.head()

(27947, 56)


Unnamed: 0,account_id,log_group_name,log_stream_name,record_id,stream_name,record_arrival_stream_timestamp,record_arrival_stream_epochtime,log_event_timestamp,log_event_epochtime,log_event_id,...,node_network_rx_errors,node_network_rx_packets,node_network_total_bytes,node_network_tx_bytes,node_network_tx_dropped,node_network_tx_errors,node_network_tx_packets,node_number_of_running_containers,node_number_of_running_pods,region
0,573697193355,/aws/containerinsights/oracle-ndc-eks-cluster-...,ip-172-24-69-80.ec2.internal,4963238480310897860607030469547306236643781540...,dp-us-west-2-container-insights-performance-me...,2022-10-10 09:02:21,1665392541,2022-10-10 09:02:15,1665392535000,3713949457856999275082516968948429878346659417...,...,0.0,3053.362137,1873642.0,955745.979576,0.0,0.0,3043.938341,46,26,us-west-2
1,866535947663,/aws/containerinsights/mt-ndc-eks-cluster-dev-...,ip-172-24-67-95.ec2.internal,4963238480310897860607030469547668914389665929...,dp-us-west-2-container-insights-performance-me...,2022-10-10 09:02:21,1665392541,2022-10-10 09:02:16,1665392536000,3713949460087073794935579283167051793039784622...,...,0.0,39.225817,26424.22,16304.664582,0.0,0.0,38.698442,16,12,us-west-2
2,700642182785,/aws/containerinsights/am-ndc-eks-cluster-dev-...,ip-172-27-0-213.ec2.internal,4963238480310897860607030469548515162463396169...,dp-us-west-2-container-insights-performance-me...,2022-10-10 09:02:21,1665392541,2022-10-10 09:02:16,1665392536000,3713949460087073794935579283180947496093588277...,...,0.0,79.982878,45088.26,21093.793793,0.0,0.0,81.269201,16,11,us-west-2
3,573697193355,/aws/containerinsights/oracle-ndc-eks-cluster-...,ip-172-24-69-73.ec2.internal,4963238480310897860607030469548998732791242021...,dp-us-west-2-container-insights-performance-me...,2022-10-10 09:02:21,1665392541,2022-10-10 09:02:16,1665392536000,3713949460087073794935579283209803335448096210...,...,0.0,1736.63099,548692.6,319622.614514,0.0,0.0,1746.431302,32,19,us-west-2
4,700642182785,/aws/containerinsights/am-ndc-eks-cluster-dev-...,ip-172-27-0-180.ec2.internal,4963238480310897860607030469560967098405426891...,dp-us-west-2-container-insights-performance-me...,2022-10-10 09:02:27,1665392547,2022-10-10 09:02:22,1665392542000,3713949473467520914053953168850961208717091689...,...,0.0,70.616759,41535.71,20422.126695,0.0,0.0,72.790082,23,14,us-west-2


In [47]:
import pandas as pd

In [48]:
df = pd.read_parquet('s3://dish-5g.core.pd.g.dp.eks.logs.e/inference_data/Node/Node_2022_9_11_12.parquet')
print(df.shape)
df.head(1)

(26926, 56)


Unnamed: 0,account_id,log_group_name,log_stream_name,record_id,stream_name,record_arrival_stream_timestamp,record_arrival_stream_epochtime,log_event_timestamp,log_event_epochtime,log_event_id,...,node_network_rx_errors,node_network_rx_packets,node_network_total_bytes,node_network_tx_bytes,node_network_tx_dropped,node_network_tx_errors,node_network_tx_packets,node_number_of_running_containers,node_number_of_running_pods,region
0,573697193355,/aws/containerinsights/oracle-ndc-eks-cluster-...,ip-172-24-69-107.ec2.internal,4963238480310897860607014874455008092334096691...,dp-us-west-2-container-insights-performance-me...,2022-09-11 11:58:53,1662897533,2022-09-11 11:58:47,1662897527000,3708385404089369725783903666081033223917887905...,...,0.0,3194.570024,1125493.0,710987.597484,0.0,0.0,3223.322,30,18,us-west-2


In [49]:
df = pd.read_parquet('s3://dish-5g.core.pd.g.dp.eks.logs.e/inference_data/Container/Container_2022_8_20_9.parquet')
print(df.shape)
df.head(1)

(3215011, 47)


Unnamed: 0,account_id,log_group_name,log_stream_name,record_id,stream_name,record_arrival_stream_timestamp,record_arrival_stream_epochtime,log_event_timestamp,log_event_epochtime,log_event_id,...,container_memory_request,container_memory_rss,container_memory_swap,container_memory_usage,container_memory_utilization,container_memory_working_set,container_status,kubernetes,number_of_container_restarts,region
0,237450111201,/aws/containerinsights/oracle-ndc-eks-cluster-...,ip-172-16-69-112.us-west-2.compute.internal,4963238480310897860607000146349906113757808661...,dp-us-west-2-container-insights-performance-me...,2022-08-20 09:03:26,1660986206,2022-08-20 09:03:21,1660986201000,3704123004677637051603010711104262139087824652...,...,10737420000.0,2465083392,0,2650927104,0.939759,2512785408,Running,"{""container_name"":""mysqlndbcluster"",""docker"":{...",0.0,us-west-2


In [51]:
df = pd.read_parquet('s3://dish-5g.core.pd.g.dp.eks.logs.e/inference_data/Pod/Pod_2022_7_10_20.parquet')
print(df.shape)
df.head(1)

(959674, 63)


Unnamed: 0,account_id,log_group_name,log_stream_name,record_id,stream_name,record_arrival_stream_timestamp,record_arrival_stream_epochtime,log_event_timestamp,log_event_epochtime,log_event_id,...,pod_network_total_bytes,pod_network_tx_bytes,pod_network_tx_dropped,pod_network_tx_errors,pod_network_tx_packets,pod_number_of_container_restarts,pod_number_of_containers,pod_number_of_running_containers,pod_status,region
0,675136609689,/aws/containerinsights/nk-ndc-eks-cluster-dev-...,ip-100-64-13-218.ec2.internal,4962796749719051212894668074468894478886881729...,dp-us-west-2-container-insights-performance-me...,2022-07-10 20:02:12,1657483332,2022-07-10 20:02:06,1657483326000,3696311332393906755948896546619186255064620951...,...,8170.309286,719.206072,0.0,0.0,5.032304,0.0,1,1,Running,us-west-2


In [32]:

#Read features and parameters
features_df = get_features(feature_group_name, feature_input_version)
features = features_df["feature_name"].to_list()
#remove spaces
features = [feature.strip(' ') for feature in features]
model_parameters = features_df["model_parameters"].iloc[0]
time_steps = model_parameters["time_steps"]


# #load parquet files from s3 and convert to pandas
# df = pandas.read_parquet(s3_path)

# df.head()

In [33]:
#select random instance/pod_id
random_id = np.random.choice(df[sampling_column].unique())
print(f'\n***Select data unique {sampling_column} = {random_id}***\n')
df = df.loc[(df[sampling_column] == random_id)]
#sort by time
df = df.sort_values(by='Timestamp').reset_index(drop=True)
print(df.shape)
df.head(1)


***Select data unique InstanceId = i-03421a696fce8926a***

(58, 56)


Unnamed: 0,account_id,log_group_name,log_stream_name,record_id,stream_name,record_arrival_stream_timestamp,record_arrival_stream_epochtime,log_event_timestamp,log_event_epochtime,log_event_id,...,node_network_rx_errors,node_network_rx_packets,node_network_total_bytes,node_network_tx_bytes,node_network_tx_dropped,node_network_tx_errors,node_network_tx_packets,node_number_of_running_containers,node_number_of_running_pods,region
0,888907375722,/aws/containerinsights/at-ndc-eks-cluster-dev-...,ip-10-249-128-122.ec2.internal,4963238480310897860607030469585266507379681054...,dp-us-west-2-container-insights-performance-me...,2022-10-10 09:02:44,1665392564,2022-10-10 09:02:39,1665392559000,3713949511378787751556012511486583686680834299...,...,0.0,155.840252,55345.455348,28365.164458,0.0,0.0,151.000296,14,9,us-west-2


In [34]:
features

['node_cpu_utilization', 'node_memory_utilization', 'node_network_total_bytes']

In [42]:
#scaler transformations
scaler = StandardScaler()

scaled_features = ["scaled_" + feature for feature in features]
df[scaled_features] = scaler.fit_transform(df[features])

#inference_tensor = np.zeros((1, time_steps, len(features)))
start = df.shape[0] - time_steps
inference_input_tensor = np.expand_dims(df.loc[start:start+time_steps, scaled_features], axis = 0)

print("\n***** Inference input tensor shape*****")
print(inference_input_tensor.shape)
print("\n*** Inference input tensor ***")
print(inference_input_tensor)
print("\n***************************************\n")


*** Inference input tensor shape***
(1, 20, 3)

*** Inference input tensor ***
[[[-1.340959    0.00610795  3.03932179]
  [-0.33049384 -0.45057572 -0.31257483]
  [-0.53523789 -1.22296156 -0.35456141]
  [ 0.03188856 -1.08800491 -0.21911584]
  [ 1.07445016 -0.59878704 -0.38262058]
  [-0.9038839  -1.07234029 -0.37804837]
  [-0.82220693 -0.02883618 -0.23764066]
  [ 1.83305968 -1.08679994 -0.36604461]
  [ 1.62691222 -1.45552079 -0.29416669]
  [ 0.75997346 -0.77350771 -0.34383583]
  [-1.28725319 -0.45419063  3.00639784]
  [-0.94772709 -1.26754545 -0.3179925 ]
  [-0.03353068 -1.86521062 -0.29753206]
  [-1.63898663 -1.87605536 -0.29090519]
  [-0.1208679  -0.36381787 -0.35833839]
  [-0.53142477 -0.39755703 -0.31438826]
  [-1.21328779 -0.87231526 -0.28371041]
  [-2.08011487 -0.02281133 -0.32185366]
  [-1.14031563 -0.2939296  -0.35522561]
  [-0.2740679   0.20251808 -0.32621972]]]

***********************************



In [44]:
write_tensor(tensor = inference_input_tensor, 
             bucket_name = model_bucketname, 
             model_name = model_name, 
             version = model_version, 
             model_data_type = 'inference_input_tensor')

'emr-serverless-output-pd/node_autoencoder_test/v0.0.1/data/tensors/inference_input_tensor.npy'

In [None]:
autoencoder_testing_pipeline(*node_autoencoder_input()[2:])