<a href="https://www.kaggle.com/code/ffflores/redfish-nlp-log-classification?scriptVersionId=295640304" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Training

In [1]:
from sklearn.cluster import DBSCAN
from sentence_transformers import SentenceTransformer
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import joblib
import re


if __name__ == "__main__":
    df = pd.read_csv("/kaggle/input/redfish-api-logs/dataset.csv")
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode(df['message'].tolist())
    clustering = DBSCAN(eps=0.2, min_samples=1, metric='cosine').fit(embeddings)
    df['cluster'] = clustering.labels_
    X = embeddings
    y = df['class'].values
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    clf = LogisticRegression(max_iter=1000)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    report = classification_report(y_test, y_pred)
    print(report)
    joblib.dump(clf, 'log_classifier.joblib')


2026-02-03 13:01:51.216406: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1770123711.393969      17 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1770123711.444868      17 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1770123711.864736      17 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1770123711.864770      17 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1770123711.864773      17 computation_placer.cc:177] computation placer alr

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

              precision    recall  f1-score   support

    hardware       0.98      0.99      0.99       347
       power       1.00      0.98      0.99       122
    security       0.96      0.95      0.95        77

    accuracy                           0.98       546
   macro avg       0.98      0.97      0.98       546
weighted avg       0.98      0.98      0.98       546



# Usage

In [2]:
import joblib
import csv
from sentence_transformers import SentenceTransformer

model_embedding = SentenceTransformer('all-MiniLM-L6-v2')
model_classification = joblib.load("/kaggle/working/log_classifier.joblib")


def classify_with_bert(message: str) -> str:
    """
    Classify the log with BERT model.

    Args:
        message (str): the Redfish log entry message

    Returns:
        str: the class that the log belongs to
    """
    embeddings = model_embedding.encode([message])
    probabilities = next(iter(model_classification.predict_proba(embeddings)))
    if max(probabilities) < 0.5:
        return "NULL"
    return next(iter(model_classification.predict(embeddings)))


if __name__ == "__main__":
    log = "Access to the resource located at /Systems/1/Bios/Actions/Bios.ChangePassword was allowed."
    label = classify_with_bert(log)
    print(log, "->", label)



Access to the resource located at /Systems/1/Bios/Actions/Bios.ChangePassword was allowed. -> security
