In [23]:
!pip install 

[31mERROR: Could not find a version that satisfies the requirement libyara (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for libyara[0m[31m
[0m

In [1]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.pipeline import Pipeline

import sys
from pathlib import Path

# Ensure the repo root (containing `core/`) is on sys.path so `import core...` works in notebooks.
PROJECT_ROOT = Path.cwd()
while not (PROJECT_ROOT / "core").exists() and PROJECT_ROOT != PROJECT_ROOT.parent:
    PROJECT_ROOT = PROJECT_ROOT.parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

INPUT_PATH = str(PROJECT_ROOT / "datasets" / "barrikada.csv")
MODEL_PATH = str(PROJECT_ROOT / "core" / "layer_c" / "models" / "tf_idf_logreg.joblib")
VECTORIZER_PATH = str(PROJECT_ROOT / "core" / "layer_c" / "models" / "tf_idf_vectorizer.joblib")

SEED = 42

In [2]:
def would_reach_layer_c(layer_a_result, layer_b_result):
    # Layer B hard-blocks never reach Layer C.
    if getattr(layer_b_result, "verdict", None) == "block":
        return False

    # SAFE allowlisting allows early exit only when Layer A is not suspicious.
    if (not getattr(layer_a_result, "suspicious", False)) and getattr(layer_b_result, "allowlisted", False):
        return False

    return True

In [3]:
def load_data(csv_path):
    # NOTE: needs PROJECT_ROOT on sys.path (see Cell 2)
    from core.layer_a.pipeline import analyze_text
    from core.layer_b.signature_engine import SignatureEngine

    df = pd.read_csv(csv_path)

    y = df["label"].astype(int)

    layer_c_results = []
    signature_engine = SignatureEngine()

    allowlisted_allow = 0
    non_allowlisted_allow = 0

    for i in range(len(df)):
        layer_a_result = analyze_text(df["text"].iloc[i])

        layer_b_result = signature_engine.detect(layer_a_result.processed_text)

        if would_reach_layer_c(layer_a_result, layer_b_result):
            layer_c_results.append((layer_a_result.processed_text, y[i]))

            if layer_b_result.verdict == "allow" and not getattr(layer_b_result, "allowlisted", False):
                non_allowlisted_allow += 1
            if layer_b_result.verdict == "allow" and getattr(layer_b_result, "allowlisted", False):
                allowlisted_allow += 1

    used_n = len(layer_c_results)
    print(f"Filtered to {used_n} samples that would reach Layer C (orchestrator routing).")
    print(f"  allowlisted early-allow skipped: {allowlisted_allow}")
    print(f"  non-allowlisted allow (goes to Layer C): {non_allowlisted_allow}")

    X = pd.Series([t for (t, _) in layer_c_results], name="processed_text")
    y = pd.Series([lab for (_, lab) in layer_c_results], name="label")
    used_df = pd.DataFrame({"processed_text": X, "label": y})

    return X, y, used_df

In [4]:
def route_to_label(scores, low, high) :
    """Convert probabilities into a verdict.
    """

    verdict = np.full(scores.shape, "allow")
    verdict[(scores >= low) & (scores < high)] = "flag"
    verdict[scores >= high] = "block"

    predicted_label = (verdict != "allow").astype(int)

    return verdict, predicted_label

In [5]:
def tune_routing_thresholds_prod(
    y_true,
    scores,
    target_block_precision = 0.99,
    max_malicious_allow_rate = 0.02,
    min_flag_band = 0.05,
):
    """Pick (low, high) using validation data only.

    Policy-style tuning:
    - enforce a high-precision BLOCK
    - cap malicious samples that slip through as ALLOW
    - choose thresholds to minimize FLAG (LM load), then BLOCKs, then maximize ALLOWs
    """

    y = np.asarray(y_true).astype(int)
    s = np.asarray(scores)

    low_grid = np.linspace(0.05, 0.60, 56)
    high_grid = np.linspace(0.40, 0.99, 60)

    best = None
    for low in low_grid:
        for high in high_grid:
            if low >= high or (high - low) < min_flag_band:
                continue

            pred_allow = s < low
            pred_block = s >= high
            pred_flag = (~pred_allow) & (~pred_block)

            tp_block = int(np.sum(pred_block & (y == 1)))
            fp_block = int(np.sum(pred_block & (y == 0)))
            block_precision = (tp_block / (tp_block + fp_block)) if (tp_block + fp_block) else 1.0
            if block_precision < target_block_precision:
                continue

            mal_total = max(1, int(np.sum(y == 1)))
            mal_allow_rate = float(int(np.sum(pred_allow & (y == 1))) / mal_total)
            if mal_allow_rate > max_malicious_allow_rate:
                continue

            flag_rate = float(np.mean(pred_flag))
            block_rate = float(np.mean(pred_block))
            allow_rate = float(np.mean(pred_allow))
            block_recall = float(tp_block / mal_total)

            key = (flag_rate, block_rate, -allow_rate)
            if best is None or key < best[0]:
                best = (
                    key,
                    {
                        "low": float(low),
                        "high": float(high),
                        "val_flag_rate": float(flag_rate),
                        "val_block_rate": float(block_rate),
                        "val_allow_rate": float(allow_rate),
                        "val_block_precision": float(block_precision),
                        "val_block_recall": float(block_recall),
                        "val_malicious_allow_rate": float(mal_allow_rate),
                    },
                )

    if best is None:
        return {
            "low": 0.25,
            "high": 0.75,
            "val_flag_rate": 0.0,
            "val_block_rate": 0.0,
            "val_allow_rate": 0.0,
            "val_block_precision": 0.0,
            "val_block_recall": 0.0,
            "val_malicious_allow_rate": 0.0,
        }

    return best[1]

In [6]:
def verdict_breakdown(y_true, verdict):
    y = np.asarray(y_true).astype(int)
    v = np.asarray(verdict)
    out = {
        "allow": {"0": 0, "1": 0},
        "flag": {"0": 0, "1": 0},
        "block": {"0": 0, "1": 0},
    }
    for label in (0, 1):
        for decision in ("allow", "flag", "block"):
            out[decision][str(label)] = int(np.sum((y == label) & (v == decision)))
    return out

In [7]:
def binary_report(y_true, y_pred):
    return classification_report(y_true, y_pred, digits=4, zero_division=0, output_dict=False)


In [8]:
from sklearn.metrics import f1_score

def train_eval(X, y, low = None, high = None):
    X_train, X_temp, y_train, y_temp = train_test_split(
        X, y, test_size=0.30, stratify=y, random_state=SEED
    )
    X_val, X_test, y_val, y_test = train_test_split(
        X_temp, y_temp, test_size=0.50, stratify=y_temp, random_state=SEED
    )

    word = TfidfVectorizer(ngram_range=(1, 2), analyzer="word", min_df=2)
    char = TfidfVectorizer(ngram_range=(3, 5), analyzer="char_wb", min_df=2)
    vec = FeatureUnion([("word", word), ("char", char)])  # type: ignore

    X_train_vec = vec.fit_transform(X_train)
    X_val_vec = vec.transform(X_val)
    X_test_vec = vec.transform(X_test)

    model = LogisticRegression(
        solver="saga",
        max_iter=4000,
        class_weight="balanced",
        n_jobs=-1,
        random_state=SEED,
    )
    model.fit(X_train_vec, y_train)

    val_scores = model.predict_proba(X_val_vec)[:, 1]
    test_scores = model.predict_proba(X_test_vec)[:, 1]
    val_pred_05 = val_scores >= 0.5
    test_pred_05 = test_scores >= 0.5

    tuned = None
    if low is None or high is None:
        tuned = tune_routing_thresholds_prod(
            y_val,
            val_scores,
            target_block_precision=0.99,
            max_malicious_allow_rate=0.02,
            min_flag_band=0.05,
        )
        low = float(tuned["low"])
        high = float(tuned["high"])
    print(tuned)

    val_verdict, val_pred_route = route_to_label(val_scores, low=low, high=high)
    test_verdict, test_pred_route = route_to_label(test_scores, low=low, high=high)

    val_verdict_counts,  = pd.Series(val_verdict).value_counts().to_dict()
    test_verdict_counts = pd.Series(test_verdict).value_counts().to_dict()

    return {
        "vectorizer": vec,
        "model": model,
        "thresholds": {
            "low": float(low),
            "high": float(high),
            "tuned_on_val": tuned is not None,
            "tuning": (None if tuned is None else tuned),
        },
        "metrics": {
            "val": {
                "roc_auc": float(roc_auc_score(y_val, val_scores)),
                "report_0.5": binary_report(y_val, val_pred_05),
                "report_routing": binary_report(y_val, val_pred_route),
                "routing_verdict_counts": val_verdict_counts,
                "routing_verdict_by_label": verdict_breakdown(y_val.to_numpy(), val_verdict),
                "routing_f1": float(f1_score(y_val.to_numpy(), val_pred_route, zero_division=0)),
            },
            "test": {
                "roc_auc": float(roc_auc_score(y_test, test_scores)),
                "report_0.5": binary_report(y_test, test_pred_05),
                "report_routing": binary_report(y_test, test_pred_route),
                "routing_verdict_counts": test_verdict_counts,
                "routing_verdict_by_label": verdict_breakdown(y_test.to_numpy(), test_verdict),
                "routing_f1": float(f1_score(y_test.to_numpy(), test_pred_route, zero_division=0)),
            },
        },
    }

In [9]:
X, y, used_df = load_data(INPUT_PATH)

train_eval(X, y)

Failed to import '/Users/ishaan/.pyenv/versions/3.11.12/lib/libyara.so'
PATH = /Users/ishaan/.pyenv/versions/3.11.12/bin:/opt/homebrew/opt/postgresql@15/bin:/Users/ishaan/.pyenv/shims:/Users/ishaan/.pyenv/bin:/opt/homebrew/bin:/opt/homebrew/sbin:/usr/local/bin:/System/Cryptexes/App/usr/bin:/usr/bin:/bin:/usr/sbin:/sbin:/var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/local/bin:/var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/bin:/var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/appleinternal/bin:/opt/pmk/env/global/bin:/usr/local/share/dotnet:~/.dotnet/tools:/Users/ishaan/.lmstudio/bin;/Users/ishaan/.pyenv/versions/3.11.12/lib


OSError: dlopen(/Users/ishaan/.pyenv/versions/3.11.12/lib/libyara.so, 0x0006): tried: '/Users/ishaan/.pyenv/versions/3.11.12/lib/libyara.so' (no such file), '/System/Volumes/Preboot/Cryptexes/OS/Users/ishaan/.pyenv/versions/3.11.12/lib/libyara.so' (no such file), '/Users/ishaan/.pyenv/versions/3.11.12/lib/libyara.so' (no such file)

In [None]:
def compare_advanced_models():
    """Compare advanced models: XGBoost, LightGBM, and SVM"""
    from sklearn.pipeline import Pipeline
    from sklearn.svm import LinearSVC
    from xgboost import XGBClassifier
    from lightgbm import LGBMClassifier
    
    X, y, _ = load_data("../../../datasets/barrikada.csv")
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
    
    # Define pipelines for advanced models
    pipelines = {
        'Linear SVM': Pipeline([
            ('tfidf', TfidfVectorizer(analyzer="word", ngram_range=(1,2), max_features=5000, stop_words="english")),
            ('clf', LinearSVC(class_weight='balanced', dual=False, max_iter=2000, random_state=42))
        ]),
        'XGBoost': Pipeline([
            ('tfidf', TfidfVectorizer(analyzer="word", ngram_range=(1,2), max_features=5000, stop_words="english")),
            ('clf', XGBClassifier(n_estimators=100, max_depth=6, learning_rate=0.1, 
                                 scale_pos_weight=len(y[y==0])/len(y[y==1]),  # handles imbalance
                                 random_state=42, n_jobs=-1, eval_metric='logloss'))
        ]),
        'LightGBM': Pipeline([
            ('tfidf', TfidfVectorizer(analyzer="word", ngram_range=(1,2), max_features=5000, stop_words="english")),
            ('clf', LGBMClassifier(n_estimators=100, max_depth=6, learning_rate=0.1,
                                  class_weight='balanced', random_state=42, n_jobs=-1, verbose=-1))
        ])
    }
    
    # Train and evaluate each pipeline
    results = {}
    for name, pipeline in pipelines.items():
        print(f"\n{'='*60}")
        print(f"Training {name}...")
        print('='*60)
        
        # Fit the pipeline
        pipeline.fit(X_train, y_train)
        
        # Predict (handle SVM separately as it doesn't have predict_proba by default)
        y_pred = pipeline.predict(X_test)
        if hasattr(pipeline.named_steps['clf'], 'predict_proba'):
            y_proba = pipeline.predict_proba(X_test)[:, 1]
        else:
            # For LinearSVC, use decision_function
            y_proba = pipeline.decision_function(X_test)
        
        # Calculate metrics
        print(classification_report(y_test, y_pred))
        roc_auc = roc_auc_score(y_test, y_proba)
        print(f"ROC AUC: {roc_auc:.4f}")
        
        # Display confusion matrix
        cm = confusion_matrix(y_test, y_pred)
        print("\nConfusion Matrix:")
        print(f"{'':>15} Predicted Safe  Predicted Malicious")
        print(f"Actual Safe     {cm[0][0]:>8}        {cm[0][1]:>8}")
        print(f"Actual Malicious{cm[1][0]:>8}        {cm[1][1]:>8}")
        
        # Store results
        results[name] = {
            'pipeline': pipeline,
            'y_pred': y_pred,
            'y_proba': y_proba,
            'roc_auc': roc_auc,
            'confusion_matrix': cm
        }
    
    return results, y_test

# Run advanced comparison
advanced_results, y_test_adv = compare_advanced_models()


Training Linear SVM...
              precision    recall  f1-score   support

           0       0.98      0.99      0.98      1217
           1       0.97      0.95      0.96       540

    accuracy                           0.97      1757
   macro avg       0.97      0.97      0.97      1757
weighted avg       0.97      0.97      0.97      1757

ROC AUC: 0.9931

Confusion Matrix:
                Predicted Safe  Predicted Malicious
Actual Safe         1201              16
Actual Malicious      29             511

Training XGBoost...
              precision    recall  f1-score   support

           0       0.98      0.99      0.98      1217
           1       0.97      0.95      0.96       540

    accuracy                           0.97      1757
   macro avg       0.97      0.97      0.97      1757
weighted avg       0.97      0.97      0.97      1757

ROC AUC: 0.9931

Confusion Matrix:
                Predicted Safe  Predicted Malicious
Actual Safe         1201              16
Actu

