In [None]:
import pandas as pd
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import IsolationForest

def ingest_logs(file_path):
    df = pd.read_csv(file_path)
    logs = df.apply(lambda row: f"{row['timestamp']} {row['level']} {row['message']}", axis=1).tolist()
    return logs

def extract_patterns(logs):
    return [re.sub(r'\d+', '<NUM>', log) for log in logs]

def detect_anomalies(patterns):
    vectorizer = CountVectorizer()
    X = vectorizer.fit_transform(patterns).toarray()
    model = IsolationForest(contamination=0.1, random_state=42)
    model.fit(X)
    preds = model.predict(X)
    anomaly_indices = [i for i, pred in enumerate(preds) if pred == -1]
    return anomaly_indices


def predict_incident(anomalies, threshold=3):
    return len(anomalies) >= threshold


def raise_alert(logs, anomalies):
    print("🚨 INCIDENT PREDICTED! 🚨")
    print("Anomalous Logs:")
    for i in anomalies:
        print(f" - {logs[i]}")

# Main function
def main():
    logs = ingest_logs("logs.csv")
    patterns = extract_patterns(logs)
    anomalies = detect_anomalies(patterns)

    if predict_incident(anomalies):
        raise_alert(logs, anomalies)
    else:
        print("✅ System stable. No incidents predicted.")

if __name__ == "__main__":
    main()


✅ System stable. No incidents predicted.
