In [4]:
import numpy as np
from tensorflow.keras.models import load_model
import joblib
import re
import os


model = load_model('log_model.h5')
vectorizer = joblib.load('log_vectorizer.pkl')

# Définir les patterns
patterns = [
    re.compile(r'(?P<date>\w{3} \d{2} \d{2}:\d{2}:\d{2}) (?P<host>\S+) (?P<service>\S+)\[(?P<pid>\d+)\]: (?P<message>.+)'),
    re.compile(r'(?P<date>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{4}) (?P<host>\S+) (?P<service>\S+)\[(?P<pid>\d+)\]: (?P<message>.+)'),
    re.compile(r'(?P<date>\w{3} \d{2} \d{2}:\d{2}:\d{2}) (?P<host>\S+) (?P<service>\S+): (?P<message>.+)'),
    re.compile(r'\[(?P<date>[\d\.]+)\] (?P<message>.+)'),
    re.compile(r'(?P<host>\S+) (?P<service>\S+)\[(?P<pid>\d+)\]: (?P<message>.+)'),
    re.compile(r'(?P<date>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{4}) (?P<host>\S+) (?P<service>\S+): (?P<message>.+)'),
    re.compile(r'(?P<date>\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z) level=(?P<level>\w+) msg="(?P<message>.+)"'),
    re.compile(r'(?P<host>\S+) (?P<service>\S+)\[(?P<pid>\d+)\]: (?P<message>.+)')
]

# Fonction pour prétraiter le log
def preprocess_log(log):
    for pattern in patterns:
        match = pattern.match(log)
        if match:
            log_dict = match.groupdict()
            message = log_dict.get('message', '')
            return message
    # Retourner None si aucun pattern ne correspond
    return None

# Log à prédire
log = 'Jul 17 09:01:11 ared-machine balena-engine-daemon[3202]: time="2024-07-17T09:01:11.796037074Z" level=error msg="failed to initialize a tracing processor \"otlp\"" error="no OpenTelemetry endpoint: skip plugin"'
log1 = 'Jun 05 06:47:11 ared-machine balena-engine-daemon[3065]: time="2024-06-05T06:47:11.969153495Z" level=i nfo msg="loading plugin \"io.containerd.internal.v1.shutdown\"..." runtime=io.containerd.runc.v2 type=io.containerd.internal.v1'

# Prétraiter le log
processed_log = preprocess_log(log)

# Vérifier si le log a été correctement prétraité
if processed_log:
    # Transformer le log en vecteur
    log_vector = vectorizer.transform([processed_log]).toarray()

    # Faire la prédiction
    prediction = model.predict(log_vector)

    # Interpréter la prédiction
    if prediction > 0.5:
        print("This log is classified as an error (label 1).")
    else:
        print("This log is classified as not an error (label 0).")
else:
    print("The log format is incorrect or could not be processed.")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
This log is classified as an error (label 1).
