<a href="https://colab.research.google.com/github/CyberMetrics/Prototypes/blob/main/Prototype_002.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

A Base Model That Can Be Able To Do Three At a Time(Anomaly,Classify,Time Series)

In [9]:
import pandas as pd
from collections import Counter
from sklearn.ensemble import IsolationForest
from sklearn.linear_model import LogisticRegression
import numpy as np

class HybridSecurityModel:
    def __init__(self):
        self.num_model = None
        self.is_trained = False

    # ----------- Numeric Data Pipeline -----------
    def fit_numeric(self, X, y):
        # Simple classifier
        self.num_model = LogisticRegression()
        self.num_model.fit(X, y)
        self.is_trained = True

    def analyze_numeric(self, X_batch):
        results = []
        # 1. Anomaly Detection (Isolation Forest)
        iso = IsolationForest(contamination=0.2, random_state=42)
        iso.fit(X_batch)
        anomalies = iso.predict(X_batch)  # -1 = anomaly

        # 2. Classification (Logistic Regression if trained)
        preds = self.num_model.predict(X_batch) if self.is_trained else ["Unknown"] * len(X_batch)

        # 3. Sequence/Time analysis (rolling mean check)
        rolling_mean = pd.DataFrame(X_batch).mean(axis=1).rolling(window=3, min_periods=1).mean()
        seq_alert = (pd.DataFrame(X_batch).mean(axis=1) > rolling_mean * 1.5).astype(int)

        for i in range(len(X_batch)):
            results.append({
                "Sample": i,
                "AnomalyFlag": 1 if anomalies[i] == -1 else 0,
                "Class": preds[i],
                "SeqAlert": int(seq_alert.iloc[i]),
                "FinalAlert": 1 if (anomalies[i]==-1 or seq_alert.iloc[i]==1) else 0
            })
        return pd.DataFrame(results)

    # ----------- Log Data Pipeline -----------
    def analyze_logs(self, df):
        df = df.copy()

        # Module 1: Anomaly Detection (rare events)
        event_counts = Counter(df["EventId"])
        rare_threshold = 1
        df["AnomalyFlag"] = df["EventId"].apply(lambda x: 1 if event_counts[x] <= rare_threshold else 0)

        # Module 2: Classification (simple rule)
        def classify(row):
            if ("fail" in row["Content"].lower() or "error" in row["Content"].lower()) and row["Level"].lower()=="info":
                return "Mismatch"
            return "OK"
        df["ClassCheck"] = df.apply(classify, axis=1)

        # Module 3: Sequence Analysis
        df = df.sort_values(by="LineId")
        df["LineGap"] = df["LineId"].diff().fillna(0).astype(int)
        df["SeqAlert"] = df["LineGap"].apply(lambda g: 1 if g>1 else 0)

        # Final decision
        df["FinalAlert"] = df.apply(
            lambda r: 1 if (r.AnomalyFlag or r.ClassCheck=="Mismatch" or r.SeqAlert) else 0, axis=1
        )
        return df[["LineId","EventId","Level","AnomalyFlag","ClassCheck","SeqAlert","FinalAlert"]]



Feeding Our Model real World Data

In [10]:

    # Log data sample
    data = [
        [1,"2016-09-28","04:30:30","Info","CBS","Loaded Servicing Stack v6.1.7601.23505...","E23"],
        [2,"2016-09-28","04:30:31","Info","CSI","00000001@2016/9/27:20:30:31.455 WcpInitialize...","E13"],
        [3,"2016-09-28","04:30:31","Info","CSI","00000002@2016/9/27:20:30:31.458 WcpInitialize...","E13"],
        [5,"2016-09-28","04:30:31","Info","CBS","Ending TrustedInstaller initialization.","E17"],
        [11,"2016-09-28","04:30:31","Info","CBS","SQM: Failed to start upload... HRESULT=0x80004005","E39"],
        [14,"2016-09-28","04:30:31","Info","CBS","SQM: Warning: Failed to upload all unsent reports.","E43"],
    ]
    df_logs = pd.DataFrame(data, columns=["LineId","Date","Time","Level","Component","Content","EventId"])

    print("\n--- Log Data Analysis ---")
    print(model.analyze_logs(df_logs))


--- Log Data Analysis ---
   LineId EventId Level  AnomalyFlag ClassCheck  SeqAlert  FinalAlert
0       1     E23  Info            1         OK         0           1
1       2     E13  Info            0         OK         0           0
2       3     E13  Info            0         OK         0           0
3       5     E17  Info            1         OK         1           1
4      11     E39  Info            1   Mismatch         1           1
5      14     E43  Info            1   Mismatch         1           1
