<a href="https://colab.research.google.com/github/Tier1Security/roberta_classifier/blob/main/Noise_Gen.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import subprocess
import time
import random
import os

# ==========================================
# BENIGN 1: PURE NOISE (Daily Activity)
# Based on your generate_pure_noise() logic
# ==========================================
Benign_Pure_Noise = [
    "explorer.exe",
    "tasklist /v",
    "systeminfo",
    "whoami /groups",
    "ipconfig /all",
    "ping 192.168.1.1",
    "dir c:\\users",
    "powershell get-process",
    "notepad.exe",
    "hostname",
    "get-date"
]

# ==========================================
# BENIGN 2: HARD NEGATIVES (Toxin Removal Compliant)
# Based on your generate_hard_negative() logic
# Eliminates 'reg save/export' to prevent conflict with T1003
# ==========================================
Benign_Hard_Negatives = [
    # 35% Weight: T1134/WMIC/Service Hard Negatives
    "wmic product get name, version",
    "get-help adjusttokenprivileges -full",
    "sc query windefend",
    "start cmd.exe /c whoami",
    "get-registry-value -path hklm\\sam\\domains",

    # 35% Weight: Placeholder Verbs & Safe Backups
    "netsh dump > netconfig.txt",
    "backup-utility save registry config",
    "backup-utility write hklm\\software to c:\\backups\\config.dat",
    "reg query hkcu\\console", # Safe query only

    # 30% Weight: Dev Servers & System Processes
    "python -m http.server 8080",
    "node server.js --port 3000",
    "c:\\windows\\system32\\svchost.exe -k networkservice"
]

# ==========================================
# MALICIOUS TEST CASES (For Detection Verification)
# NOT used for baseline training, used to test the Z-Score later
# ==========================================
Malicious_TTP_Samples = [
    "reg save hklm\\sam c:\\temp\\s.dat",
    "reg export hklm\\security c:\\temp\\sec.reg",
    "rundll32.exe c:\\windows\\system32\\comsvcs.dll, minidump 1234 c:\\temp\\lsass.dmp full"
]

def run_emulation(profile="balanced"):
    """
    Simulates user noise by executing benign commands.
    Ensures the Anomaly Engine learns the 'Toxin-Free' baseline.
    """
    print(f"[*] Starting {profile} noise emulation...")
    print(f"[*] TOXIN REMOVAL ACTIVE: 'reg save/export' is strictly excluded from this baseline.")

    # Define timing based on profile
    if profile == "pure_noise":
        delay_range = (5, 20)
    elif profile == "hard_negatives":
        delay_range = (30, 120)
    else: # balanced
        delay_range = (15, 60)

    try:
        while True:
            # Logic mirroring your generator's probability distribution
            rng = random.random()

            if profile == "pure_noise":
                cmd = random.choice(Benign_Pure_Noise)
            elif profile == "hard_negatives":
                cmd = random.choice(Benign_Hard_Negatives)
            else:
                # Balanced: 70% Pure Noise, 30% Hard Negatives
                if rng < 0.7:
                    cmd = random.choice(Benign_Pure_Noise)
                else:
                    cmd = random.choice(Benign_Hard_Negatives)

            print(f"[>] Executing: {cmd}")

            # Execution via PowerShell to trigger Event ID 4688
            try:
                # We use shell=True and handle timeouts for dev servers (like http.server)
                if "server" in cmd or "svchost" in cmd:
                    # Run these as non-blocking or short-lived for the sake of the log
                    proc = subprocess.Popen(["powershell", "-Command", cmd],
                                         stdout=subprocess.DEVNULL,
                                         stderr=subprocess.DEVNULL)
                    time.sleep(2)
                    proc.terminate()
                else:
                    subprocess.run(["powershell", "-Command", cmd],
                                   capture_output=True,
                                   timeout=10)
            except Exception:
                pass

            wait_time = random.randint(*delay_range)
            print(f"[*] Next event in {wait_time}s...")
            time.sleep(wait_time)

    except KeyboardInterrupt:
        print("\n[!] Emulation stopped. Baseline collection complete.")

if __name__ == "__main__":
    # Standard recommendation: Run 'balanced' to capture a realistic
    # mixture of high-frequency noise and low-frequency admin tasks.
    run_emulation("balanced")