In [32]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [31]:
FILE_PATH = 'root_cause_analysis.csv'
BATCH_SIZE = 64
EPOCHS = 20
LR = 0.01
DROPOUT_RATE = 0.3

In [24]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [19]:
df = pd.read_csv(FILE_PATH)
df

Unnamed: 0,ID,CPU_LOAD,MEMORY_LEAK_LOAD,DELAY,ERROR_1000,ERROR_1001,ERROR_1002,ERROR_1003,ROOT_CAUSE
0,1,0,0,0,0,1,0,1,MEMORY_LEAK
1,2,0,0,0,0,0,0,1,MEMORY_LEAK
2,3,0,1,1,0,0,1,1,MEMORY_LEAK
3,4,0,1,0,1,1,0,1,MEMORY_LEAK
4,5,1,1,0,1,0,1,0,NETWORK_DELAY
...,...,...,...,...,...,...,...,...,...
995,996,0,0,0,0,0,0,1,DATABASE_ISSUE
996,997,0,0,0,1,0,0,0,NETWORK_DELAY
997,998,1,1,1,0,0,0,0,MEMORY_LEAK
998,999,0,1,1,1,1,0,0,NETWORK_DELAY


In [20]:
df = df.drop(columns=['ID'])
df['ROOT_CAUSE'] = df['ROOT_CAUSE'].map({'MEMORY_LEAK': 0, 'NETWORK_DELAY': 1, 'DATABASE_ISSUE': 2})
df

Unnamed: 0,CPU_LOAD,MEMORY_LEAK_LOAD,DELAY,ERROR_1000,ERROR_1001,ERROR_1002,ERROR_1003,ROOT_CAUSE
0,0,0,0,0,1,0,1,0
1,0,0,0,0,0,0,1,0
2,0,1,1,0,0,1,1,0
3,0,1,0,1,1,0,1,0
4,1,1,0,1,0,1,0,1
...,...,...,...,...,...,...,...,...
995,0,0,0,0,0,0,1,2
996,0,0,0,1,0,0,0,1
997,1,1,1,0,0,0,0,0
998,0,1,1,1,1,0,0,1


In [27]:
X = df.drop(columns=['ROOT_CAUSE'])
y = df['ROOT_CAUSE']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=420)

X_train_tensor = torch.FloatTensor(X_train.values).to(device)
X_test_tensor = torch.FloatTensor(X_test.values).to(device)
y_train_tensor = torch.LongTensor(y_train.values).to(device)
y_test_tensor = torch.LongTensor(y_test.values).to(device)

In [29]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

input_dim = X_train.shape[1]
output_dim = len(y_train.unique())
print(f"Input Size = {input_dim}, Output Size = {output_dim}")

Input Size = 7, Output Size = 3


In [34]:
class RootCauseAnalyser(nn.Module):
    def __init__(self, input_size, output_size):
        super(RootCauseAnalyser, self).__init__()

        self.fc1 = nn.Linear(input_size, 32)
        self.fc2 = nn.Linear(32, 16)
        self.fc3 = nn.Linear(16, output_size)

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(DROPOUT_RATE)

    def forward(self, x):
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.dropout(self.relu(self.fc2(x)))
        x = self.fc3(x)

        return x

In [36]:
model = RootCauseAnalyser(input_size=input_dim, output_size=output_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)
schedular = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10)

In [37]:
def train_model():
    model.to(device)

    for epoch in range(EPOCHS):
        model.train()
        running_loss = 0.0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch [{epoch+1}/{EPOCHS}], Loss: {epoch_loss:.4f}")
        schedular.step(epoch_loss)

In [38]:
def validate_model():
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
    print("Accuracy:", accuracy_score(all_labels, all_preds))
    print("Confusion Matrix:\n", confusion_matrix(all_labels, all_preds))
    print("Classification Report:\n", classification_report(all_labels, all_preds))


In [45]:
def predict_sample(sample):
    model.eval()
    label_map = {0: "Memory Leak", 1: "Network Delay", 2: "Database Issue"}
    with torch.no_grad():
        sample_tensor = torch.FloatTensor(sample).to(device)
        if sample_tensor.ndim == 1:
            sample_tensor = sample_tensor.unsqueeze(0)
        outputs = model(sample_tensor)
        probs = torch.softmax(outputs, dim=1)
        confs, preds = torch.max(probs, 1)
        preds = preds.cpu().numpy()
        confs = confs.cpu().numpy()
        results = [(label_map[p], float(c) * 100) for p, c in zip(preds, confs)]
        return results

In [40]:
train_model()

Epoch [1/20], Loss: 1.0218
Epoch [2/20], Loss: 0.7919
Epoch [3/20], Loss: 0.6772
Epoch [4/20], Loss: 0.5961
Epoch [5/20], Loss: 0.5815
Epoch [6/20], Loss: 0.5401
Epoch [7/20], Loss: 0.5377
Epoch [8/20], Loss: 0.4954
Epoch [9/20], Loss: 0.5036
Epoch [10/20], Loss: 0.4865
Epoch [11/20], Loss: 0.4937
Epoch [12/20], Loss: 0.4765
Epoch [13/20], Loss: 0.4760
Epoch [14/20], Loss: 0.4895
Epoch [15/20], Loss: 0.5007
Epoch [16/20], Loss: 0.4551
Epoch [17/20], Loss: 0.4750
Epoch [18/20], Loss: 0.4759
Epoch [19/20], Loss: 0.4792
Epoch [20/20], Loss: 0.4617


In [41]:
validate_model()

Accuracy: 0.845
Confusion Matrix:
 [[46  4  9]
 [ 5 55  2]
 [ 8  3 68]]
Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.78      0.78        59
           1       0.89      0.89      0.89        62
           2       0.86      0.86      0.86        79

    accuracy                           0.84       200
   macro avg       0.84      0.84      0.84       200
weighted avg       0.84      0.84      0.84       200



In [46]:
new_samples = [
    [1, 1, 1, 0, 1, 0, 1],
    [0, 0, 1, 1, 0, 1, 0],
    [1, 1, 0, 0, 1, 0, 1],
    [0, 0, 0, 0, 0, 0, 1],
    [1, 0, 1, 1, 1, 0, 0]
]

results = predict_sample(new_samples)
for label, confidence in results:
    print(f'The root cause is "{label}" with {confidence:.2f}% confidence.')

The root cause is "Memory Leak" with 94.92% confidence.
The root cause is "Network Delay" with 99.72% confidence.
The root cause is "Memory Leak" with 85.96% confidence.
The root cause is "Memory Leak" with 83.13% confidence.
The root cause is "Network Delay" with 99.23% confidence.
