#  Incident Root Cause Analysis 

Incident Reports in ITOps usually states the symptoms. Identifying the root cause of the symptom quickly is a key determinant to reducing resolution times and improving user satisfaction.

# 06.02. Preprocessing Incident Data

### Loading the Dataset

In [120]:
import pandas as pd
import os
import tensorflow as tf

#Load the data file into a Pandas Dataframe
symptom_data = pd.read_csv("root_cause_analysis.csv")

#Explore the data loaded
print(symptom_data.dtypes)
symptom_data.head()

ID                   int64
CPU_LOAD             int64
MEMORY_LEAK_LOAD     int64
DELAY                int64
ERROR_1000           int64
ERROR_1001           int64
ERROR_1002           int64
ERROR_1003           int64
ROOT_CAUSE          object
dtype: object


Unnamed: 0,ID,CPU_LOAD,MEMORY_LEAK_LOAD,DELAY,ERROR_1000,ERROR_1001,ERROR_1002,ERROR_1003,ROOT_CAUSE
0,1,0,0,0,0,1,0,1,MEMORY_LEAK
1,2,0,0,0,0,0,0,1,MEMORY_LEAK
2,3,0,1,1,0,0,1,1,MEMORY_LEAK
3,4,0,1,0,1,1,0,1,MEMORY_LEAK
4,5,1,1,0,1,0,1,0,NETWORK_DELAY


### Convert  data

Input data needs to be converted to formats that can be consumed by ML algorithms

In [121]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

label_encoder = preprocessing.LabelEncoder()
symptom_data['ROOT_CAUSE'] = label_encoder.fit_transform(
                                symptom_data['ROOT_CAUSE'])

#Convert Pandas DataFrame to a numpy vector
np_symptom = symptom_data.to_numpy().astype(float)

#Extract the feature variables (X)
X_data = np_symptom[:,1:8]

#Extract the target variable (Y), conver to one-hot-encodign
Y_data=np_symptom[:,8]
Y_data = tf.keras.utils.to_categorical(Y_data,3)

#Split training and test data
X_train,X_test,Y_train,Y_test = train_test_split( X_data, Y_data, test_size=0.10)

print("Shape of feature variables :", X_train.shape)
print("Shape of target variable :",Y_train.shape)

Shape of feature variables : (900, 7)
Shape of target variable : (900, 3)


## 06.03. Building and evaluating the model

In [122]:
from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.regularizers import l2

#Setup Training Parameters
EPOCHS=20
BATCH_SIZE=64
VERBOSE=1
OUTPUT_CLASSES=len(label_encoder.classes_)
N_HIDDEN=128
VALIDATION_SPLIT=0.2

#Create a Keras sequential model
model = tf.keras.models.Sequential()
#Add a Dense Layer
model.add(keras.layers.Dense(N_HIDDEN,
                             input_shape=(7,),
                              name='Dense-Layer-1',
                              activation='relu'))

#Add a second dense layer
model.add(keras.layers.Dense(N_HIDDEN,
                              name='Dense-Layer-2',
                              activation='relu'))

#Add a softmax layer for categorial prediction
model.add(keras.layers.Dense(OUTPUT_CLASSES,
                             name='Final',
                             activation='softmax'))

#Compile the model
model.compile(
              loss='categorical_crossentropy',
              metrics=['accuracy'])


model.summary()

#Build the model
model.fit(X_train,
          Y_train,
          batch_size=BATCH_SIZE,
          epochs=EPOCHS,
          verbose=VERBOSE,
          validation_split=VALIDATION_SPLIT)


#Evaluate the model against the test dataset and print results
print("\nEvaluation against Test Dataset :\n------------------------------------")
model.evaluate(X_test,Y_test)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Dense-Layer-1 (Dense)       (None, 128)               1024      
                                                                 
 Dense-Layer-2 (Dense)       (None, 128)               16512     
                                                                 
 Final (Dense)               (None, 3)                 387       
                                                                 
Total params: 17,923
Trainable params: 17,923
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Evaluation against Test Dataset :
------------------------------------


[0.5743610262870789, 0.7599999904632568]

## 06.04. Predicting Root Causes

In [123]:
#Pass individual flags to Predict the root cause
import numpy as np

CPU_LOAD=1
MEMORY_LOAD=0
DELAY=0
ERROR_1000=0
ERROR_1001=1
ERROR_1002=1
ERROR_1003=0

prediction=np.argmax(model.predict(
    [[CPU_LOAD,MEMORY_LOAD,DELAY,
      ERROR_1000,ERROR_1001,ERROR_1002,ERROR_1003]]), axis=1 )

print(label_encoder.inverse_transform(prediction))

['DATABASE_ISSUE']


In [124]:
#Predicting as a Batch
print(label_encoder.inverse_transform(np.argmax(
        model.predict([[1,0,0,0,1,1,0],
                                [0,1,1,1,0,0,0],
                                [1,1,0,1,1,0,1],
                                [0,0,0,0,0,1,0],
                                [1,0,1,0,1,1,1]]), axis=1 )))

['DATABASE_ISSUE' 'NETWORK_DELAY' 'MEMORY_LEAK' 'DATABASE_ISSUE'
 'DATABASE_ISSUE']


## My Own Model

### Libraries 

In [141]:
from random import shuffle
from tensorflow import keras
from keras.layers import Input,Dense
from keras import initializers,Model
import numpy as np

### Constants

In [142]:
readDataFile = "root_cause_analysis.csv"
BATCH_SIZE = 16
epochs = 50

### Loading the data

In [143]:
table = pd.read_csv(readDataFile)
data = table.values
attributes = table.columns

### Preprocessing Data

In [144]:
def generate(array):
    trainy = []
    for label in array:
        if label == "MEMORY_LEAK":
            trainy.append([1,0,0])
        elif label == "NETWORK_DELAY":
            trainy.append([0,1,0])
        else:
            trainy.append([0,0,1])
    return np.array(trainy)

shuffle(data)
listOfClasses = []
for tuple in data:
    if(tuple[8] not in listOfClasses):
        listOfClasses.append(tuple[8])
classes = len(listOfClasses)
train = data[0:int(0.8*len(data))]
validation = data[int(0.8*len(data)):int(0.9*len(data))]
test = data[int(0.9*len(data)):len(data)]
trainx = train[:,0:8].astype("float32")
trainy = generate(train[:,8:9])
validationx = validation[:,0:8].astype("float32")
validationy = generate(validation[:,8:9])
testx = test[:,0:8].astype("float32")
testy = generate(test[:,8:9])

In [145]:
trainx.shape,trainy.shape

((800, 8), (800, 3))

### Network Architecture

In [146]:
input_layer = Input((8,))
hidden_layer1 = Dense(128,activation='relu')(input_layer)
hidden_layer2 = Dense(64,activation='relu',kernel_initializer=initializers.RandomNormal(stddev=0.1))(hidden_layer1)
output_layer = Dense(3,activation="softmax",kernel_initializer=initializers.RandomNormal(stddev=0.1))(hidden_layer2)
model = Model(inputs = input_layer,outputs = output_layer)
model.summary()

Model: "model_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_15 (InputLayer)       [(None, 8)]               0         
                                                                 
 dense_34 (Dense)            (None, 128)               1152      
                                                                 
 dense_35 (Dense)            (None, 64)                8256      
                                                                 
 dense_36 (Dense)            (None, 3)                 195       
                                                                 
Total params: 9,603
Trainable params: 9,603
Non-trainable params: 0
_________________________________________________________________


### Training the model

In [147]:
model.compile(loss = "categorical_crossentropy",optimizer="rmsprop",metrics=['accuracy'])
model.fit(trainx,trainy,batch_size=BATCH_SIZE,epochs=epochs,verbose=1,validation_data=(validationx,validationy))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fce1bf52f40>

In [148]:
print("\nEvaluation against Test Dataset :\n------------------------------------")
model.evaluate(testx,testy)


Evaluation against Test Dataset :
------------------------------------


[1.31907057762146, 0.6399999856948853]