In [1]:
import pandas as pd
import os
import tensorflow as tf

# load data into data frame
symptom_data = pd.read_csv("root_cause_analysis.csv")
symptom_data.head()

Unnamed: 0,ID,CPU_LOAD,MEMORY_LEAK_LOAD,DELAY,ERROR_1000,ERROR_1001,ERROR_1002,ERROR_1003,ROOT_CAUSE
0,1,0,0,0,0,1,0,1,MEMORY_LEAK
1,2,0,0,0,0,0,0,1,MEMORY_LEAK
2,3,0,1,1,0,0,1,1,MEMORY_LEAK
3,4,0,1,0,1,1,0,1,MEMORY_LEAK
4,5,1,1,0,1,0,1,0,NETWORK_DELAY


In [3]:
symptom_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   ID                1000 non-null   int64 
 1   CPU_LOAD          1000 non-null   int64 
 2   MEMORY_LEAK_LOAD  1000 non-null   int64 
 3   DELAY             1000 non-null   int64 
 4   ERROR_1000        1000 non-null   int64 
 5   ERROR_1001        1000 non-null   int64 
 6   ERROR_1002        1000 non-null   int64 
 7   ERROR_1003        1000 non-null   int64 
 8   ROOT_CAUSE        1000 non-null   object
dtypes: int64(8), object(1)
memory usage: 70.4+ KB


In [5]:
symptom_data.shape

(1000, 9)

In [6]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

# convert target variable into numeric using label encoder
label_encoder = preprocessing.LabelEncoder()
symptom_data["ROOT_CAUSE"] = label_encoder.fit_transform(symptom_data["ROOT_CAUSE"])

In [8]:
symptom_data.head()

Unnamed: 0,ID,CPU_LOAD,MEMORY_LEAK_LOAD,DELAY,ERROR_1000,ERROR_1001,ERROR_1002,ERROR_1003,ROOT_CAUSE
0,1,0,0,0,0,1,0,1,1
1,2,0,0,0,0,0,0,1,1
2,3,0,1,1,0,0,1,1,1
3,4,0,1,0,1,1,0,1,1
4,5,1,1,0,1,0,1,0,2


In [10]:
# covert the dataframe to numpy vector
np_symptom = symptom_data.to_numpy().astype(float)

In [12]:
np_symptom[0:2]

array([[1., 0., 0., 0., 0., 1., 0., 1., 1.],
       [2., 0., 0., 0., 0., 0., 0., 1., 1.]])

In [15]:
# extract feature and target variable
X_data = np_symptom[:, 1:8]
Y_data = np_symptom[:,8]

In [16]:
# convert target variable to one-hot encoding
Y_data = tf.keras.utils.to_categorical(Y_data,3)

In [17]:
# split dataset into test and training data
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.10)

In [19]:
from tensorflow import keras

output_class = len(label_encoder.classes_)
print("Output class", output_class)

# create a keras sequential model
model = tf.keras.models.Sequential()

#Add a dense layer
model.add(keras.layers.Dense(128,
                            input_shape=(7,),
                            name="Dense-Layer-1",
                            activation='relu'))
#Add a second layer
model.add(keras.layers.Dense(128, name="Dense-layer-2",
                            activation="relu"))
#Add a softmax layer for categorical prediction
model.add(keras.layers.Dense(output_class, name="Final",
                            activation="softmax"))
#Compile the model
model.compile(loss="categorical_crossentropy", metrics=["accuracy"])

model.summary()

Output class 3
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Dense-Layer-1 (Dense)       (None, 128)               1024      
                                                                 
 Dense-layer-2 (Dense)       (None, 128)               16512     
                                                                 
 Final (Dense)               (None, 3)                 387       
                                                                 
Total params: 17923 (70.01 KB)
Trainable params: 17923 (70.01 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [20]:
#build the model
model.fit(X_train, Y_train,
         batch_size=64,
         epochs=20,
         verbose=1,
         validation_split=0.2)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x1de3137edc0>

In [22]:
model.evaluate(X_test, Y_test)



[0.4048117399215698, 0.8399999737739563]

In [24]:
import numpy as np

CPU_LOAD=1
MEMORY_LOAD=0
DELAY=0
ERROR_1000=0
ERROR_1001=1
ERROR_1002=1
ERROR_1003=0

prediction = np.argmax(model.predict(
    [[CPU_LOAD, MEMORY_LOAD, DELAY, ERROR_1000,ERROR_1001, ERROR_1002, ERROR_1003]]), axis=1)

# covert the numeric to string inverse of label_encoder
print(label_encoder.inverse_transform(prediction))

['DATABASE_ISSUE']


In [28]:
# predict a bunch of arrays
print(label_encoder.inverse_transform(np.argmax(model.predict(
    [[1,0,0,1,1,0,0],
    [0,1,1,1,0,0,0],
    [0,0,0,0,0,0,0],
    [1,0,1,0,1,1,1]]), axis=1
)))

['NETWORK_DELAY' 'NETWORK_DELAY' 'MEMORY_LEAK' 'DATABASE_ISSUE']
