In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report
import pandas as pd
import numpy as np

In [None]:
# Load dataset
file_path = "/content/balanced_k8s_data.csv"
df = pd.read_csv(file_path)

# Remove unnecessary columns
df_cleaned = df.drop(columns=['node_status', 'pod_state'])


In [None]:

# Split features and target
X = df_cleaned.drop(columns=['failure_type'])
y = df_cleaned['failure_type']

In [None]:
# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y.values, dtype=torch.long)

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42, stratify=y)

# Create DataLoader
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
# Define the model
class K8sFailureMLP(nn.Module):
  def __init__(self, input_size, num_classes):
    super(K8sFailureMLP, self).__init__()
    self.fc1 = nn.Linear(input_size, 128)
    self.bn1 = nn.BatchNorm1d(128)
    self.fc2 = nn.Linear(128, 64)
    self.bn2 = nn.BatchNorm1d(64)
    self.fc3 = nn.Linear(64, 32)
    self.bn3 = nn.BatchNorm1d(32)
    self.output = nn.Linear(32, num_classes)
    self.leaky_relu = nn.LeakyReLU(0.1)
    self.dropout = nn.Dropout(0.3)

  def forward(self, x):
    x = self.leaky_relu(self.bn1(self.fc1(x)))
    x = self.dropout(x)
    x = self.leaky_relu(self.bn2(self.fc2(x)))
    x = self.dropout(x)
    x = self.leaky_relu(self.bn3(self.fc3(x)))
    return self.output(x)

In [None]:
# Initialize model
num_classes = len(torch.unique(y_tensor))
model = K8sFailureMLP(input_size=X_train.shape[1], num_classes=num_classes)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training loop
epochs = 20
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")

Epoch 1/20, Loss: 0.5183
Epoch 2/20, Loss: 0.4162
Epoch 3/20, Loss: 0.4059
Epoch 4/20, Loss: 0.3979
Epoch 5/20, Loss: 0.3918
Epoch 6/20, Loss: 0.3910
Epoch 7/20, Loss: 0.3866
Epoch 8/20, Loss: 0.3841
Epoch 9/20, Loss: 0.3867
Epoch 10/20, Loss: 0.3814
Epoch 11/20, Loss: 0.3742
Epoch 12/20, Loss: 0.3797
Epoch 13/20, Loss: 0.3774
Epoch 14/20, Loss: 0.3777
Epoch 15/20, Loss: 0.3719
Epoch 16/20, Loss: 0.3742
Epoch 17/20, Loss: 0.3722
Epoch 18/20, Loss: 0.3682
Epoch 19/20, Loss: 0.3681
Epoch 20/20, Loss: 0.3717


In [None]:
# Evaluation
model.eval()
y_true = []
y_pred = []
with torch.no_grad():
    for batch_X, batch_y in test_loader:
        outputs = model(batch_X)
        _, predicted = torch.max(outputs, 1)
        y_true.extend(batch_y.numpy())
        y_pred.extend(predicted.numpy())

accuracy = accuracy_score(y_true, y_pred)
precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')
conf_matrix = confusion_matrix(y_true, y_pred)
class_report = classification_report(y_true, y_pred)

print(f"Test Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(class_report)

Test Accuracy: 0.7776
Precision: 0.7831
Recall: 0.7776
F1 Score: 0.7456
Confusion Matrix:
[[997   0   1   2   0]
 [  0 864  25 109   2]
 [  0  78 914   6   2]
 [  0 862  24 114   0]
 [  0   0   1   0 999]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1000
           1       0.48      0.86      0.62      1000
           2       0.95      0.91      0.93      1000
           3       0.49      0.11      0.19      1000
           4       1.00      1.00      1.00      1000

    accuracy                           0.78      5000
   macro avg       0.78      0.78      0.75      5000
weighted avg       0.78      0.78      0.75      5000



In [None]:
# Testing with new data
def predict(model, scaler, new_data):
    model.eval()
    new_data_scaled = scaler.transform(new_data)
    new_tensor = torch.tensor(new_data_scaled, dtype=torch.float32)
    with torch.no_grad():
        outputs = model(new_tensor)
        _, predicted = torch.max(outputs, 1)
    return predicted.numpy()

new_sample = np.array([X.iloc[0].values])
predicted_class = predict(model, scaler, new_sample)
print(f"Predicted Failure Type: {predicted_class[0]}")

Predicted Failure Type: 0


