# Imports

In [2]:
import pandas as pd
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

# Loading the dataset

In [4]:
#Load Sample Data and review content
dataset = pd.read_csv("root_cause_analysis.csv")

print("\nLoaded Data :\n------------------------------------")
print(dataset.head())


Loaded Data :
------------------------------------
   ID  CPU_LOAD  MEMORY_LEAK_LOAD  DELAY  ERROR_1000  ERROR_1001  ERROR_1002  \
0   1         0                 0      0           0           1           0   
1   2         0                 0      0           0           0           0   
2   3         0                 1      1           0           0           1   
3   4         0                 1      0           1           1           0   
4   5         1                 1      0           1           0           1   

   ERROR_1003     ROOT_CAUSE  
0           1    MEMORY_LEAK  
1           1    MEMORY_LEAK  
2           1    MEMORY_LEAK  
3           1    MEMORY_LEAK  
4           0  NETWORK_DELAY  


# Preprocessing

## Label Encoding of the output

In [5]:
label_encoder = LabelEncoder()
dataset['ROOT_CAUSE'] = label_encoder.fit_transform(dataset['ROOT_CAUSE'])
dataset.head()

Unnamed: 0,ID,CPU_LOAD,MEMORY_LEAK_LOAD,DELAY,ERROR_1000,ERROR_1001,ERROR_1002,ERROR_1003,ROOT_CAUSE
0,1,0,0,0,0,1,0,1,1
1,2,0,0,0,0,0,0,1,1
2,3,0,1,1,0,0,1,1,1
3,4,0,1,0,1,1,0,1,1
4,5,1,1,0,1,0,1,0,2


## Dropping Unnecessary Columns

In [9]:
dataset = dataset.drop("ID", axis = 1)
dataset

Unnamed: 0,CPU_LOAD,MEMORY_LEAK_LOAD,DELAY,ERROR_1000,ERROR_1001,ERROR_1002,ERROR_1003,ROOT_CAUSE
0,0,0,0,0,1,0,1,1
1,0,0,0,0,0,0,1,1
2,0,1,1,0,0,1,1,1
3,0,1,0,1,1,0,1,1
4,1,1,0,1,0,1,0,2
...,...,...,...,...,...,...,...,...
995,0,0,0,0,0,0,1,0
996,0,0,0,1,0,0,0,2
997,1,1,1,0,0,0,0,1
998,0,1,1,1,1,0,0,2


## Converting the dataset into numpy array

In [19]:
# Converting the input into numpy array which is the preferred input format for keras
np_dataset = dataset.to_numpy().astype(float)

## Seperating input and output columns

In [20]:
X_data = np_dataset[:,0:len(np_dataset[0])-1]
Y_data = np_dataset[:,len(np_dataset[0])-1]

In [21]:
X_data[:5]

array([[0., 0., 0., 0., 1., 0., 1.],
       [0., 0., 0., 0., 0., 0., 1.],
       [0., 1., 1., 0., 0., 1., 1.],
       [0., 1., 0., 1., 1., 0., 1.],
       [1., 1., 0., 1., 0., 1., 0.]])

In [22]:
Y_data[:5]

array([1., 1., 1., 1., 2.])

In [23]:
# One hot encoding of the output
Y_data = tf.keras.utils.to_categorical(Y_data, 3)
Y_data[:5]

array([[0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.]], dtype=float32)

## Train Test Split

In [24]:
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.10)
print(X_train.shape,Y_train.shape,X_test.shape,Y_test.shape)

(900, 7) (900, 3) (100, 7) (100, 3)


# Creating the deep learning model