In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load your dataset
data = pd.read_csv("/content/drive/MyDrive/k8s_data_fair.csv")

# Convert categorical columns to numeric
le = LabelEncoder()
data["pod_status"] = le.fit_transform(data["pod_status"])
data["node_status"] = le.fit_transform(data["node_status"])
data["issue"] = le.fit_transform(data["issue"])

#  drop timestamp
X = data.drop(columns=["timestamp", "issue"])  # Features
y = data["issue"]  # Target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=le.classes_))

new_data = X_test.iloc[0].values.reshape(1, -1)  # Take one test row
prediction = model.predict(new_data)
print("Predicted issue:", le.inverse_transform(prediction)[0])  # Convert back to label

# Export the trained model to a file
joblib.dump(model, "random_forest_model.pkl")
print("Model exported to 'random_forest_model.pkl'")

joblib.dump(le, "issue_label_encoder.pkl")
print("LabelEncoder exported to 'issue_label_encoder.pkl'")

Accuracy: 0.783
Classification Report:
                      precision    recall  f1-score   support

      Network Issue       0.73      0.77      0.75       365
           No Issue       0.79      0.80      0.80       253
        Pod Failure       0.79      0.81      0.80       812
Resource Exhaustion       0.85      0.81      0.83       473
 Service Disruption       0.55      0.43      0.48        97

           accuracy                           0.78      2000
          macro avg       0.74      0.72      0.73      2000
       weighted avg       0.78      0.78      0.78      2000

Predicted issue: No Issue
Model exported to 'random_forest_model.pkl'
LabelEncoder exported to 'issue_label_encoder.pkl'




In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
