In [1]:
import pandas as pd
import numpy as np
from typing import List, Dict


In [3]:
df = pd.read_csv("neural_detection_output.csv")

print("Loaded neural detection output:", df.shape)
df.head()


Loaded neural detection output: (211033, 62)


Unnamed: 0,src_ip,src_port,dst_ip,dst_port,proto,service,duration,src_bytes,dst_bytes,conn_state,...,packet_count_log,traffic_rate_log,protocol_tcp,protocol_udp,protocol_http,sequence_anomaly_score,event_score_norm,sequence_score_norm,final_anomaly_score,is_anomalous
0,192.168.1.133,5353,224.0.0.251,5353,udp,dns,0.0,0,0,S0,...,0.693147,0.0,0,1,0,1.044067e-06,0.002603,1.044067e-06,0.001562,False
1,192.168.1.1,138,192.168.1.255,138,udp,-,0.0,0,0,S0,...,0.693147,0.0,0,1,0,3.104635e-06,0.002603,3.104635e-06,0.001563,False
2,192.168.1.32,61172,192.168.1.186,1687,tcp,-,0.0,0,0,S0,...,0.693147,0.0,1,0,0,4.276221e-06,0.002394,4.276221e-06,0.001438,False
3,192.168.1.32,36402,176.28.50.165,80,tcp,-,0.0,0,0,SH,...,0.693147,0.0,1,0,0,1.01929e-06,0.002394,1.01929e-06,0.001437,False
4,192.168.1.32,54774,192.168.1.133,8500,tcp,-,0.0,0,0,S0,...,0.693147,0.0,1,0,0,9.440002e-07,0.002394,9.440002e-07,0.001437,False


In [5]:
 
NEURAL_SIGNALS = [
    "final_anomaly_score",
    "event_score_norm",
    "sequence_score_norm",
    "traffic_rate_log",
    "packet_count_log",
    "flow_duration_log",
    "protocol_tcp",
    "protocol_udp",
    "protocol_http"
]

df = df[NEURAL_SIGNALS].copy()


In [6]:
# Recompute confidence as agreement between neural models
df["confidence"] = 1 - np.abs(
    df["event_score_norm"] - df["sequence_score_norm"]
)

df["confidence"] = df["confidence"].clip(0, 1)

df[["confidence"]].head()


Unnamed: 0,confidence
0,0.997398
1,0.9974
2,0.997611
3,0.997607
4,0.997607


In [7]:
class RuleTemplate:
    def __init__(
        self,
        rule_id: str,
        mitre_family: str,
        description: str,
        required_conditions: Dict,
        weight: float = 1.0
    ):
        self.rule_id = rule_id
        self.mitre_family = mitre_family
        self.description = description
        self.required_conditions = required_conditions
        self.weight = weight

    def evaluate(self, event: pd.Series) -> float:
        score = 1.0

        for feature, condition in self.required_conditions.items():
            value = event.get(feature, None)
            if value is None:
                return 0.0

            if callable(condition):
                if not condition(value):
                    return 0.0
            else:
                if value < condition:
                    return 0.0

        return score * self.weight * event["confidence"]


In [8]:
RULE_TEMPLATES = [

    RuleTemplate(
        rule_id="APPLICATION_LAYER_ABUSE",
        mitre_family="T1071",
        description="Suspicious application-layer communication",
        required_conditions={
            "protocol_http": lambda x: x == 1,
            "final_anomaly_score": 0.7,
            "flow_duration_log": 4
        },
        weight=1.0
    ),

    RuleTemplate(
        rule_id="NETWORK_SCANNING",
        mitre_family="T1046",
        description="Scanning-like burst behavior",
        required_conditions={
            "packet_count_log": 3.5,
            "flow_duration_log": lambda x: x < 2,
            "final_anomaly_score": 0.65
        },
        weight=0.9
    ),

    RuleTemplate(
        rule_id="DENIAL_OF_SERVICE",
        mitre_family="T1499",
        description="High-rate traffic flood",
        required_conditions={
            "traffic_rate_log": 4,
            "final_anomaly_score": 0.8
        },
        weight=1.2
    ),

    RuleTemplate(
        rule_id="C2_LIKE_BEHAVIOR",
        mitre_family="T1095",
        description="Persistent anomalous communication pattern",
        required_conditions={
            "sequence_score_norm": 0.6,
            "flow_duration_log": 5
        },
        weight=1.1
    )
]


In [9]:
def evaluate_rules(event, rules: List[RuleTemplate]):
    matches = []

    for rule in rules:
        score = rule.evaluate(event)
        if score > 0:
            matches.append({
                "rule_id": rule.rule_id,
                "mitre_family": rule.mitre_family,
                "score": round(score, 3),
                "description": rule.description
            })

    return matches


In [10]:
df["mitre_matches"] = df.apply(
    lambda row: evaluate_rules(row, RULE_TEMPLATES),
    axis=1
)


In [11]:
def rank_techniques(matches):
    if not matches:
        return []

    df_m = pd.DataFrame(matches)
    ranked = (
        df_m.groupby("mitre_family")["score"]
        .sum()
        .sort_values(ascending=False)
    )

    return ranked.reset_index().to_dict("records")


df["ranked_mitre_techniques"] = df["mitre_matches"].apply(rank_techniques)
df[["mitre_matches", "ranked_mitre_techniques"]].head()

Unnamed: 0,mitre_matches,ranked_mitre_techniques
0,[],[]
1,[],[]
2,[],[]
3,[],[]
4,[],[]


In [12]:
def rank_techniques(matches):
    if not matches:
        return []

    df_m = pd.DataFrame(matches)
    ranked = (
        df_m.groupby("mitre_family")["score"]
        .sum()
        .sort_values(ascending=False)
    )

    return ranked.reset_index().to_dict("records")


df["ranked_mitre_techniques"] = df["mitre_matches"].apply(rank_techniques)


In [13]:
def build_explanation(row):
    return {
        "final_anomaly_score": round(row["final_anomaly_score"], 3),
        "confidence": round(row["confidence"], 3),
        "ranked_mitre_techniques": row["ranked_mitre_techniques"]
    }


df["explanation"] = df.apply(build_explanation, axis=1)


In [18]:
# -------------------------------
# Symbolic Gating (NOT detection)
# -------------------------------

SYMBOLIC_GATE_QUANTILE = 0.90  # top 10% strongest anomalies

symbolic_gate = df["final_anomaly_score"].quantile(
    SYMBOLIC_GATE_QUANTILE
)

df_alerts = df[
    (df["final_anomaly_score"] >= symbolic_gate) &
    (df["ranked_mitre_techniques"].map(len) > 0)
].copy()

print(
    f"Symbolic gating quantile: {SYMBOLIC_GATE_QUANTILE}\n"
    f"Symbolic gate value: {round(symbolic_gate, 4)}\n"
    f"Symbolic alert candidates: {df_alerts.shape}"
)


Symbolic gating quantile: 0.9
Symbolic gate value: 0.0144
Symbolic alert candidates: (13, 13)


In [14]:
def resolve_conflicts(ranked_techniques, top_k=3):
    """
    Resolve conflicts by selecting top-K MITRE families
    based on aggregated symbolic score.
    """
    if not ranked_techniques:
        return []

    return ranked_techniques[:top_k]


In [15]:

def build_rule_trace(matches):
    trace = []
    for m in matches:
        trace.append({
            "rule_id": m["rule_id"],
            "mitre_family": m["mitre_family"],
            "symbolic_score": m["score"],
            "description": m["description"]
        })
    return trace


In [16]:
def build_reasoned_decision(row):
    resolved = resolve_conflicts(row["ranked_mitre_techniques"])

    return {
        "decision": "ANOMALOUS" if resolved else "SUSPICIOUS",
        "top_mitre_techniques": resolved,
        "confidence": round(row["confidence"], 3),
        "rule_trace": build_rule_trace(row["mitre_matches"])
    }


In [19]:
df_alerts["reasoned_decision"] = df_alerts.apply(
    build_reasoned_decision,
    axis=1
)

df_alerts[["reasoned_decision"]].head()


Unnamed: 0,reasoned_decision
210977,"{'decision': 'ANOMALOUS', 'top_mitre_technique..."
211020,"{'decision': 'ANOMALOUS', 'top_mitre_technique..."
211022,"{'decision': 'ANOMALOUS', 'top_mitre_technique..."
211023,"{'decision': 'ANOMALOUS', 'top_mitre_technique..."
211024,"{'decision': 'ANOMALOUS', 'top_mitre_technique..."


In [21]:
df_alerts.to_csv(
    "reasoned_symbolic_alerts.csv",
    index=False
)

print("Reasoned symbolic decisions saved with dynamic gating")


Reasoned symbolic decisions saved with dynamic gating
