# Root Cause Analysis

Incident Root Cause Analysis can be employed to determine the reason of issues stated in Incident Reports

In [2]:
# Install the packages
import sys
!conda install --yes --prefix {sys.prefix} pandas tensorflow scikit-learn

Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... 
  - https://repo.continuum.io/pkgs/main/win-64/_ipyw_jlab_nb_ext_conf-0.1.0-py36he6757f0_0.tar.bz2/win-64::_ipyw_jlab_nb_ext_conf-0.1.0-py36he6757f0_0, https://repo.continuum.io/pkgs/main/win-64/conda-env-2.6.0-h36134e3_1.tar.bz2/win-64::conda-env-2.6.0-h36134e3_1, https://repo.continuum.io/pkgs/main/win-64/navigator-updater-0.1.0-py36h8a7b86b_0.tar.bz2/win-64::navigator-updater-0.1.0-py36h8a7b86b_0, https://repo.continuum.io/pkgs/main/win-64/win-64::python-graphviz-0.8.3-py36_0
  - defaults/win-64::conda-env-2.6.0-h36134e3_1, https://repo.continuum.io/pkgs/main/win-64/_ipyw_jlab_nb_ext_conf-0.1.0-py36he6757f0_0.tar.bz2/win-64::_ipyw_jlab_nb_ext_conf-0.1.0-py36he6757f0_0, https://repo.continuum.io/pkgs/main/win-64/navigator-updater-0.1.0-py36h8a7b86b_0.tar.bz2/win-64::navigator-updater-0.1.0-py36h8a7b86b_0, https://repo.continuum.io/pkgs/main/win-64/win-64::python-graphviz-0.8.3-py36_0
 

In [3]:
# Loading Dataset
import pandas as pd
import os
import tensorflow as tf

# Load the dataset into pandas dataframe
symptom_data = pd.read_csv('root_cause_analysis.csv')

  from ._conv import register_converters as _register_converters


### Explore the dataset

In [4]:
# determine the dataset datatypes
symptom_data.dtypes

ID              int64
CPU_LOAD        int64
MEMORY_LOAD     int64
DELAY           int64
ERROR_1000      int64
ERROR_1001      int64
ERROR_1002      int64
ERROR_1003      int64
ROOT_CAUSE     object
dtype: object

In [5]:
symptom_data.head()

Unnamed: 0,ID,CPU_LOAD,MEMORY_LOAD,DELAY,ERROR_1000,ERROR_1001,ERROR_1002,ERROR_1003,ROOT_CAUSE
0,1,0,0,0,0,1,0,1,MEMORY
1,2,0,0,0,0,0,0,1,MEMORY
2,3,0,1,1,0,0,1,1,MEMORY
3,4,0,1,0,1,1,0,1,MEMORY
4,5,1,1,0,1,0,1,0,NETWORK_DELAY


In [6]:
symptom_data.describe()

Unnamed: 0,ID,CPU_LOAD,MEMORY_LOAD,DELAY,ERROR_1000,ERROR_1001,ERROR_1002,ERROR_1003
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,500.5,0.472,0.418,0.394,0.395,0.485,0.432,0.381
std,288.819436,0.499465,0.493477,0.488879,0.489095,0.500025,0.495602,0.485876
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,250.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,500.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,750.25,1.0,1.0,1.0,1.0,1.0,1.0,1.0
max,1000.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [7]:
symptom_data.info

<bound method DataFrame.info of        ID  CPU_LOAD  MEMORY_LOAD  DELAY  ERROR_1000  ERROR_1001  ERROR_1002  \
0       1         0            0      0           0           1           0   
1       2         0            0      0           0           0           0   
2       3         0            1      1           0           0           1   
3       4         0            1      0           1           1           0   
4       5         1            1      0           1           0           1   
..    ...       ...          ...    ...         ...         ...         ...   
995   996         0            0      0           0           0           0   
996   997         0            0      0           1           0           0   
997   998         1            1      1           0           0           0   
998   999         0            1      1           1           1           0   
999  1000         1            0      0           0           1           1   

     ERROR_1003    

### Data Conversion

In [8]:
# Data to be converted so that it can be provided to ML algorithm
from sklearn import preprocessing

In [9]:
label_encoder = preprocessing.LabelEncoder()
symptom_data['ROOT_CAUSE']=label_encoder.fit_transform(symptom_data['ROOT_CAUSE'])

In [10]:
symptom_data.head(10)

Unnamed: 0,ID,CPU_LOAD,MEMORY_LOAD,DELAY,ERROR_1000,ERROR_1001,ERROR_1002,ERROR_1003,ROOT_CAUSE
0,1,0,0,0,0,1,0,1,1
1,2,0,0,0,0,0,0,1,1
2,3,0,1,1,0,0,1,1,1
3,4,0,1,0,1,1,0,1,1
4,5,1,1,0,1,0,1,0,2
5,6,0,0,1,1,0,0,0,2
6,7,1,0,0,1,1,0,0,2
7,8,0,0,0,1,1,0,1,0
8,9,0,1,0,0,1,0,1,1
9,10,0,0,0,1,1,0,1,2


In [11]:
# Convert pandas dataframe into numpy vector
np_symptom = symptom_data.to_numpy().astype(float)
np_symptom

array([[   1.,    0.,    0., ...,    0.,    1.,    1.],
       [   2.,    0.,    0., ...,    0.,    1.,    1.],
       [   3.,    0.,    1., ...,    1.,    1.,    1.],
       ...,
       [ 998.,    1.,    1., ...,    0.,    0.,    1.],
       [ 999.,    0.,    1., ...,    0.,    0.,    2.],
       [1000.,    1.,    0., ...,    1.,    0.,    0.]])

In [12]:
# Group the feature data
X_train = np_symptom[:,1:8]
X_train

array([[0., 0., 0., ..., 1., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 1., 1., ..., 0., 1., 1.],
       ...,
       [1., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 1., 0., 0.],
       [1., 0., 0., ..., 1., 1., 0.]])

In [13]:
# Group the target variable
Y_train = np_symptom[:,8]
Y_train = tf.keras.utils.to_categorical(Y_train,3)
Y_train

array([[0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       ...,
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.]], dtype=float32)

In [14]:
print("Feature variable shape :", X_train.shape)
print("Target variable shape :", Y_train.shape)


Feature variable shape : (1000, 7)
Target variable shape : (1000, 3)


### Building Model with Keras

In [15]:
from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.regularizers import l2

In [16]:
# Initialize Training Parameters
BATCH_SIZE = 100
EPOCHS = 12
N_HIDDEN = 128
OUTPUT_CLASSES = len(label_encoder.classes_)
VALIDATION_SPLIT = 0.2
VERBOSE = 1

In [19]:
# Create Keras sequential model
model = tf.keras.models.Sequential()

# Add a dense layer
model.add(keras.layers.Dense(N_HIDDEN,input_shape = (7,),name='Dense-Layer-1',activation='relu'))

# Add a second layer
model.add(keras.layers.Dense(N_HIDDEN,name='Dense-Layer-2',activation='relu'))

# Add a softmax layer for categorical prediction
model.add(keras.layers.Dense(OUTPUT_CLASSES,name='Final',activation='softmax'))

# Compile the model using Adam optimizer
model.compile(optimizer = 'adam',loss='categorical_crossentropy',metrics=['accuracy'])

# Build the model
model.fit(X_train,Y_train,batch_size=BATCH_SIZE,epochs=EPOCHS,verbose=VERBOSE,validation_split=VALIDATION_SPLIT)

Train on 800 samples, validate on 200 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<tensorflow.python.keras.callbacks.History at 0x254bd9b2b38>