<a href="https://colab.research.google.com/github/CyberMetrics/Prototypes/blob/main/AISEIM01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
# ------------- Install required packages -------------
!pip install -q pandas numpy gradio transformers torch

# ------------- Imports -------------
import pandas as pd
import numpy as np
from collections import Counter
import gradio as gr
from transformers import pipeline

# ------------- Free local AI setup -------------
# Using DistilBERT for simple text classification / explanation
# Can also swap for other Hugging Face models
text_analyzer = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Device set to use cpu


In [29]:
# ----------------- Logistic Regression from scratch -----------------
class SimpleLogisticRegression:
    def __init__(self, lr=0.01, epochs=1000):
        self.lr = lr
        self.epochs = epochs
        self.w = None
        self.b = 0

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        X = np.array(X, dtype=float)
        y = np.array(y, dtype=float)
        n, d = X.shape
        self.w = np.zeros(d)
        self.b = 0

        for _ in range(self.epochs):
            z = X.dot(self.w) + self.b
            pred = self._sigmoid(z)
            grad_w = (1/n) * X.T.dot(pred - y)
            grad_b = (1/n) * np.sum(pred - y)
            self.w -= self.lr * grad_w
            self.b -= self.lr * grad_b

    def predict_proba(self, X):
        z = X.dot(self.w) + self.b
        return self._sigmoid(z)

    def predict(self, X, threshold=0.5):
        return (self.predict_proba(X) >= threshold).astype(int)

# ----------------- Hybrid Security Model -----------------
class HybridSecurityModel:
    def __init__(self):
        self.classifier = SimpleLogisticRegression()
        self.is_trained = False

    # Numeric data analysis
    def fit_numeric(self, X, y):
        self.classifier.fit(X, y)
        self.is_trained = True

    def analyze_numeric(self, X_batch):
        X_df = pd.DataFrame(X_batch)
        results = []

        mean = X_df.mean()
        std = X_df.std().replace(0, 1)
        zscores = ((X_df - mean)/std).abs()
        anomalies = (zscores > 2).any(axis=1).astype(int)

        preds = self.classifier.predict(X_batch) if self.is_trained else ["Unknown"]*len(X_batch)

        rolling_mean = X_df.mean(axis=1).rolling(window=3, min_periods=1).mean()
        seq_alert = (X_df.mean(axis=1) > rolling_mean*1.5).astype(int)

        for i in range(len(X_batch)):
            results.append({
                "Sample": i,
                "AnomalyFlag": anomalies.iloc[i],
                "Class": preds[i],
                "SeqAlert": seq_alert.iloc[i],
                "FinalAlert": 1 if anomalies.iloc[i]==1 or seq_alert.iloc[i]==1 else 0
            })
        return pd.DataFrame(results)

    # Log data analysis
    def analyze_logs(self, df):
        df = df.copy()

        event_counts = Counter(df["EventId"])
        df["AnomalyFlag"] = df["EventId"].apply(lambda x: 1 if event_counts[x]==1 else 0)

        def classify(row):
            content = row["Content"].lower()
            if ("fail" in content or "error" in content) and row["Level"].lower()=="info":
                return "Mismatch"
            return "OK"
        df["ClassCheck"] = df.apply(classify, axis=1)

        df = df.sort_values("LineId")
        df["LineGap"] = df["LineId"].diff().fillna(0).astype(int)
        df["SeqAlert"] = df["LineGap"].apply(lambda g: 1 if g>1 else 0)

        df["FinalAlert"] = df.apply(
            lambda r: 1 if (r.AnomalyFlag==1 or r.ClassCheck=="Mismatch" or r.SeqAlert==1) else 0,
            axis=1
        )
        return df[["LineId","EventId","Level","AnomalyFlag","ClassCheck","SeqAlert","FinalAlert"]]


In [30]:
# Initialize model
model = HybridSecurityModel()

# Train numeric example (you can replace with real SIEM numeric data)
X_train = np.array([[1,2],[2,1],[2,2],[3,3],[10,10]])
y_train = np.array([0,0,0,0,1])
model.fit_numeric(X_train, y_train)


In [31]:
def siem_ai(email, user_input, file=None):
    if not email:
        return "⚠️ Please enter your email first.", None

    # If a log file is uploaded
    if file is not None:
        try:
            df = pd.read_csv(file.name)
            if "EventId" in df.columns:
                analyzed = model.analyze_logs(df)
                message = "🧩 Log file analyzed successfully!"
            else:
                analyzed = model.analyze_numeric(df.values)
                message = "📊 Numeric data analyzed successfully!"

            # Add AI explanation using free local model
            explanation = []
            for content in df.get("Content", []):
                if isinstance(content, str) and len(content.strip()) > 0:
                    res = text_analyzer(content[:512])[0]  # limit to 512 tokens
                    explanation.append(f"{content[:50]}... -> {res['label']} ({res['score']:.2f})")
            if explanation:
                message += "\n\nAI Summary of logs:\n" + "\n".join(explanation[:5])

            return message, analyzed
        except Exception as e:
            return f"⚠️ Error processing file: {e}", None

    # If user asks a question
    if user_input:
        return f"💬 AI Answer (free local model): {user_input}\n(Note: Detailed explanation not yet implemented)", None

    return "Enter a question or upload a file.", None


In [32]:
ui = gr.Interface(
    fn=siem_ai,
    inputs=[
        gr.Textbox(label="Your Email", placeholder="Enter your email"),
        gr.Textbox(label="Ask a question or command", placeholder="e.g., Explain anomalies"),
        gr.File(label="Optional: Upload SIEM Log CSV")
    ],
    outputs=[
        gr.Textbox(label="AI Response / Alerts"),
        gr.Dataframe(label="Analysis Results")
    ],
    title="🛡️ Free AI-Powered SIEM Assistant",
    description="Upload logs or ask questions about your SIEM. Uses your local AI and numeric/log analysis."
)

ui.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://aa6a4ee9ddc9a8a842.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


