In [7]:
import pandas as pd
import os
import tensorflow as tf
from sklearn import preprocessing
from tensorflow import keras
from tensorflow.keras import optimizers
from tensorflow.keras.regularizers import l2
import numpy as np

symptom_data = pd.read_csv('/content/drive/MyDrive/college/mca/societe generale hackathon/root_cause_analysis.csv')

print(symptom_data.dtypes)
symptom_data.head()

ID              int64
CPU_LOAD        int64
MEMORY_LOAD     int64
DELAY           int64
ERROR_1000      int64
ERROR_1001      int64
ERROR_1002      int64
ERROR_1003      int64
ROOT_CAUSE     object
dtype: object


Unnamed: 0,ID,CPU_LOAD,MEMORY_LOAD,DELAY,ERROR_1000,ERROR_1001,ERROR_1002,ERROR_1003,ROOT_CAUSE
0,1,0,0,0,0,1,0,1,MEMORY
1,2,0,0,0,0,0,0,1,MEMORY
2,3,0,1,1,0,0,1,1,MEMORY
3,4,0,1,0,1,1,0,1,MEMORY
4,5,1,1,0,1,0,1,0,NETWORK_DELAY


In [8]:
label_encoder = preprocessing.LabelEncoder()
symptom_data['ROOT_CAUSE'] = label_encoder.fit_transform(symptom_data['ROOT_CAUSE'])

np_symptom = symptom_data.to_numpy().astype(float)
X_train = np_symptom[:,1:8]
Y_train = np_symptom[:,8]
Y_train = tf.keras.utils.to_categorical(Y_train,3)

print('Shape of feature variables: ', X_train.shape)
print('Shape of target variable: ', Y_train.shape)

Shape of feature variables:  (1000, 7)
Shape of target variable:  (1000, 3)


In [9]:
EPOCHS = 20
BATCH_SIZE = 100
VERBOSE = 1
OUTPUT_CLASSES = len(label_encoder.classes_)
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2

model = tf.keras.models.Sequential()

model.add(keras.layers.Dense(N_HIDDEN,
                             input_shape=(7,),
                             name='Dense-Layer-1',
                             activation='relu'))

model.add(keras.layers.Dense(N_HIDDEN,
                             name='Dense-Layer-2',
                             activation='relu'))

model.add(keras.layers.Dense(OUTPUT_CLASSES,
                             name='Final',
                             activation='softmax'))

model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(X_train,
          Y_train,
          batch_size=BATCH_SIZE,
          epochs=EPOCHS,
          verbose=VERBOSE,
          validation_split=VALIDATION_SPLIT)

model.summary()

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Dense-Layer-1 (Dense)       (None, 128)               1024      
                                                                 
 Dense-Layer-2 (Dense)       (None, 128)               16512     
                                                                 
 Final (Dense)               (None, 3)                 387       
                                                                 
Total params: 17,923
Trainable params: 17,923
Non-trainable params: 0
_________________________________________________________________


In [10]:
CPU_LOAD = 1
MEMORY_LOAD = 0
DELAY = 0
ERROR_1000 = 0
ERROR_1001 = 1
ERROR_1002 = 1
ERROR_1003 = 0

prediction = np.argmax(model.predict(
    [[CPU_LOAD, MEMORY_LOAD, DELAY, ERROR_1000, ERROR_1001, ERROR_1002, ERROR_1003]]),
    axis = 1)

print(label_encoder.inverse_transform(prediction))

['DATABASE_ISSUE']


In [11]:
batch_prediction = np.argmax(model.predict(
    [[1,0,0,0,1,1,0],
     [0,1,1,1,0,0,0],
     [1,1,0,1,1,0,1],
     [0,0,0,0,0,1,0],
     [1,0,1,0,1,1,1]]), axis=1)



In [12]:
print(label_encoder.inverse_transform(batch_prediction))

['DATABASE_ISSUE' 'NETWORK_DELAY' 'MEMORY' 'DATABASE_ISSUE'
 'DATABASE_ISSUE']
